npm - @workbench-ai/workbench-core - Versions diffs - 0.0.63 → 0.0.65 - Mend

@workbench-ai/workbench-core 0.0.63 → 0.0.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/dist/execution-events.d.ts +2 -2
package/dist/execution-events.d.ts.map +1 -1
package/dist/execution-events.js +6 -6
package/dist/execution-evidence.d.ts +11 -11
package/dist/execution-jobs.d.ts +5 -5
package/dist/execution-runtime-types.d.ts +3 -3
package/dist/execution-scheduler.d.ts +12 -12
package/dist/execution-scheduler.d.ts.map +1 -1
package/dist/execution-scheduler.js +5 -5
package/dist/execution-traces.d.ts +3 -3
package/dist/index.d.ts +24 -25
package/dist/index.d.ts.map +1 -1
package/dist/index.js +56 -54
package/dist/inspection.d.ts +111 -0
package/dist/inspection.d.ts.map +1 -0
package/dist/inspection.js +217 -0
package/dist/sandbox-backends/index.d.ts +12 -20
package/dist/sandbox-backends/index.d.ts.map +1 -1
package/dist/sandbox-backends/index.js +15 -26
package/dist/sandbox-backends/names.d.ts +2 -3
package/dist/sandbox-backends/names.d.ts.map +1 -1
package/dist/sandbox-backends/names.js +5 -5
package/dist/sandbox-inputs.d.ts +6 -6
package/package.json +3 -3

package/dist/index.js CHANGED Viewed

@@ -8,7 +8,7 @@ import { BENCHMARK_SPEC_FILE, DEFAULT_EXECUTION_RESOURCES, engineCasePrivateFile
 import { attachSandboxMetadataToJob, createWorkbenchSandboxFileStore, isSurfaceSnapshotFile, readWorkbenchExecutionSpec, } from "./sandbox-inputs.js";
 import { asRuntimeRecord, importNodeModule, isJsonPayload, jsonRecord, nodeBuiltin, quoteShellArg, resolveWorkbenchWorkerId, } from "./runtime-utils.js";
 import { createWorkbenchExecutionCapability, createWorkbenchSandboxAllocation, collectExecutionCapabilityScopeIssues, collectSandboxAllocationScopeIssues, collectSandboxHandleScopeIssues, assertSandboxBackendSupportsNetworkPolicy, executeValidatedSandboxExecution, } from "./sandbox-plane.js";
-import { createSandboxBackendPlaneForProvider, } from "./sandbox-backends/index.js";
+import { createSandboxBackendPlaneForBackend, } from "./sandbox-backends/index.js";
 import { applyWorkbenchCandidatePatch } from "./candidate-patch.js";
 import { assignUsageRole, completeUsageSummary, mergeUsageSummaries, normalizeUsageSummary, usageStats, } from "./execution-usage.js";
 import { traceFilePaths, workbenchTraceExecutionDirectory, } from "./trace-files.js";
@@ -35,12 +35,42 @@ export { addCapacity, capacityFits, runWorkbenchExecutionDag, subtractCapacity,
 export { assertWorkbenchExecutionIsolation, collectWorkbenchExecutionIsolationIssues, validateWorkbenchExecutionOutputPayloads, } from "./execution-outputs.js";
 export { collectSandboxAllocationScopeIssues, collectExecutionCapabilityScopeIssues, collectSandboxHandleScopeIssues, createWorkbenchSandboxAllocation, createWorkbenchSandboxExecutionMetadata, createWorkbenchExecutionCapability, executeValidatedSandboxExecution, } from "./sandbox-plane.js";
 export { buildCandidateCaseExecutionRefs, buildWorkbenchExecutionEvidence, isWorkbenchExecutionActive, readWorkbenchExecutionId, readWorkbenchExecutionMetadataNumber, readWorkbenchExecutionMetadataString, readWorkbenchExecutionPurpose, resolveWorkbenchJobGroupStatus, } from "./execution-evidence.js";
+export { buildCandidateLineage, buildWorkbenchEvaluationComparison, buildWorkbenchEvaluationMetricDescriptors, formatEvaluationConfigurationLabel, isCompleteEvaluationSummary, readEvaluationScore, } from "@workbench-ai/workbench-contract";
+export { WorkbenchInspectionError, createWorkbenchInspection, } from "./inspection.js";
 export { buildWorkbenchTraceSessionsFromFiles, combineWorkbenchTraceSessions, finalizeWorkbenchExecutionTraceForJob, mergeWorkbenchExecutionTracesByJob, readWorkbenchExecutionTraceFiles, traceSessionLabel, } from "./execution-traces.js";
-export { DOCKER_SANDBOX_BACKEND, assertSandboxHostHealthForProvider, createDockerSandboxBackendDescriptor, createDockerSandboxPlane, resolveWorkbenchSandboxProviderName, sandboxProviderAdmissionForResources, sandboxProviderDefaultMaxConcurrentJobs, sandboxProviderLeaseScope, sandboxHostHealthExpectationForProvider, } from "./sandbox-backends/index.js";
+export { DOCKER_SANDBOX_BACKEND, assertSandboxHostHealthForBackend, createDockerSandboxBackendDescriptor, createDockerSandboxPlane, resolveWorkbenchSandboxBackendName, sandboxBackendAdmissionForResources, sandboxHostHealthExpectationForBackend, } from "./sandbox-backends/index.js";
 export function sanitizeWorkbenchRuntimeJobForExchange(job) {
     const { leaseUntil: _leaseUntil, wakeupLeaseUntil: _wakeupLeaseUntil, hostId: _hostId, workerId: _workerId, claimTokenHash: _claimTokenHash, trace: _trace, traceSessions: _traceSessions, ...portable } = job;
     return { ...portable };
 }
+export function compactWorkbenchRuntimeJobForExchange(job) {
+    const portable = sanitizeWorkbenchRuntimeJobForExchange(job);
+    return {
+        ...portable,
+        input: compactRuntimeJobJson(portable.input),
+        ...(portable.output !== undefined
+            ? { output: compactRuntimeJobJson(portable.output) }
+            : {}),
+    };
+}
+function compactRuntimeJobJson(value) {
+    if (!value || typeof value !== "object" || Array.isArray(value)) {
+        return value;
+    }
+    const next = { ...value };
+    delete next.baseFiles;
+    delete next.engineResolveFiles;
+    delete next.fileSet;
+    delete next.files;
+    delete next.traceFiles;
+    const candidatePatch = next.candidatePatch;
+    if (candidatePatch && typeof candidatePatch === "object" && !Array.isArray(candidatePatch)) {
+        const compactPatch = { ...candidatePatch };
+        delete compactPatch.files;
+        next.candidatePatch = compactPatch;
+    }
+    return next;
+}
 export function sanitizeWorkbenchRuntimeCandidateForExchange(candidate) {
     const { ownerUserId: _ownerUserId, ownerUsername: _ownerUsername, metrics: _metrics, candidateRunId: _candidateRunId, candidateRunName: _candidateRunName, ...portable } = candidate;
     return { ...portable };
@@ -1354,22 +1384,6 @@ export function filterCandidateSourceFiles(files) {
         .filter((file) => isCandidateSourceFilePath(file.path))
         .map((file) => ({ ...file }));
 }
-export function buildCandidateLineage(args) {
-    const orderedSummaries = args.summaries.slice().sort((left, right) => {
-        const createdAt = left.createdAt.localeCompare(right.createdAt);
-        return createdAt !== 0 ? createdAt : left.id.localeCompare(right.id);
-    });
-    const summaryIds = new Set(orderedSummaries.map((summary) => summary.id));
-    return {
-        activeId: args.activeId,
-        nodes: orderedSummaries.map((summary) => ({
-            id: summary.id,
-            active: args.activeId === summary.id,
-            summary,
-        })),
-        edges: orderedSummaries.flatMap((summary) => buildLineageEdges(summary, summaryIds)),
-    };
-}
 export function normalizeSurfaceFiles(files) {
     const byPath = new Map();
     for (const file of files) {
@@ -1610,18 +1624,6 @@ function summarizeCaseInputs(files) {
         };
     });
 }
-function buildLineageEdges(summary, summaryIds) {
-    const edges = [];
-    if (summary.baseId && summary.baseId !== summary.id && summaryIds.has(summary.baseId)) {
-        edges.push({
-            id: `anchor:${summary.baseId}:${summary.id}`,
-            kind: "anchor",
-            sourceId: summary.baseId,
-            targetId: summary.id,
-        });
-    }
-    return edges;
-}
 export function createWorkbenchRunWorkload(args) {
     const purpose = workbenchExecutionPurpose(args.job);
     if (!purpose) {
@@ -1725,8 +1727,8 @@ async function executeWorkbenchExecutionJobWithResolvedAuth(runtimeArgs, options
         }, executionForRuntime, startedAt, createWorkbenchExecutionCapability(executionForRuntime, { now: startedAt })));
     }
     const fileStore = createWorkbenchSandboxFileStore(runtimeArgs);
-    const planeFactory = options.createSandboxPlaneForProvider ?? createSandboxBackendPlaneForProvider;
-    const plane = planeFactory(options.sandboxProvider, runtimeArgs, startedAt, fileStore);
+    const planeFactory = options.createSandboxPlaneForBackend ?? createSandboxBackendPlaneForBackend;
+    const plane = planeFactory(options.sandboxBackend, runtimeArgs, startedAt, fileStore);
     const validated = await executeValidatedSandboxExecution(plane, executionForRuntime, {
         now: startedAt,
         runnerId: resolveWorkbenchWorkerId([
@@ -2282,7 +2284,7 @@ function completedJobFromSandboxResult(fallbackJob, startedAt, result) {
     return attachSandboxMetadataToJob(failWorkbenchRunJob(fallbackJob, result.startedAt || startedAt, result.error ?? `Sandbox execution ${result.status}.`, result.finishedAt), asRuntimeRecord(result.metadata).sandbox);
 }
 async function executeCandidateRevisionExecutionInCurrentRuntime(args, execution, startedAt, capability, eventPublisher) {
-    const { workload, result } = await runHostedProtocolExecutionResult(args, execution, startedAt, capability, eventPublisher);
+    const { workload, result } = await runRemoteProtocolExecutionResult(args, execution, startedAt, capability, eventPublisher);
     if (result.error || (result.exitCode ?? 0) !== 0) {
         return failWorkbenchRunJob(args.job, startedAt, result.error ?? `Adapter ${execution.adapter.use} exited with status ${result.exitCode}.`, result.finishedAt, result);
     }
@@ -2331,7 +2333,7 @@ async function executeAttemptExecutionInCurrentRuntime(args, execution, startedA
         engineCases: args.engineCases,
         traceFiles: args.traceFiles,
     });
-    const workloadResult = await runHostedCommandExecutionSteps(args, workload, attemptStepsForExecution(execution, args.spec, args.adapterManifests), startedAt, {
+    const workloadResult = await runRemoteCommandExecutionSteps(args, workload, attemptStepsForExecution(execution, args.spec, args.adapterManifests), startedAt, {
         capability,
         eventPublisher,
     });
@@ -2411,7 +2413,7 @@ export async function executeRuntimeControlOperationSequenceInCurrentRuntime(arg
     const adapterAuth = await materializeSandboxAdapterAuth(runtimeArgs, childExecution);
     let result;
     try {
-        result = await runHostedCommandExecutionSteps({
+        result = await runRemoteCommandExecutionSteps({
             ...runtimeArgs,
             ...(adapterAuth.root ? { adapterAuthRoot: adapterAuth.root } : {}),
         }, workload, args.runtimeControlOperation.operations.map((operation, index) => runtimeControlStepForOperation(operation, index, args.adapterManifests)), startedAt, {
@@ -2446,8 +2448,8 @@ async function executeRuntimeControlOperationSequenceInSandbox(args, options, st
     const childArgs = createRuntimeControlSandboxInput(args, request);
     const execution = readWorkbenchExecutionSpec(childArgs.job);
     const fileStore = createWorkbenchSandboxFileStore(childArgs);
-    const planeFactory = options.createSandboxPlaneForProvider ?? createSandboxBackendPlaneForProvider;
-    const plane = planeFactory(options.sandboxProvider, childArgs, startedAt, fileStore);
+    const planeFactory = options.createSandboxPlaneForBackend ?? createSandboxBackendPlaneForBackend;
+    const plane = planeFactory(options.sandboxBackend, childArgs, startedAt, fileStore);
     assertSandboxBackendSupportsNetworkPolicy(plane.backend, execution);
     const sandboxOptions = {
         now: startedAt,
@@ -2667,7 +2669,7 @@ function assertRuntimeControlScope(label, issues) {
         throw new Error(`${label} failed validation:\n${issues.join("\n")}`);
     }
 }
-async function runHostedProtocolExecutionResult(args, execution, startedAt, capability, eventPublisher) {
+async function runRemoteProtocolExecutionResult(args, execution, startedAt, capability, eventPublisher) {
     const workload = createWorkbenchRunWorkload({
         job: args.job,
         spec: args.spec,
@@ -2676,13 +2678,13 @@ async function runHostedProtocolExecutionResult(args, execution, startedAt, capa
         engineCases: args.engineCases,
         traceFiles: args.traceFiles,
     });
-    const result = await runHostedCommandExecutionSteps(args, workload, [protocolStepForExecution(execution, args.adapterManifests)], startedAt, {
+    const result = await runRemoteCommandExecutionSteps(args, workload, [protocolStepForExecution(execution, args.adapterManifests)], startedAt, {
         capability,
         eventPublisher,
     });
     return { workload, result };
 }
-async function runHostedCommandExecutionSteps(args, workload, steps, startedAt, options = {}) {
+async function runRemoteCommandExecutionSteps(args, workload, steps, startedAt, options = {}) {
     const [{ execFile }, fs, os, path, { promisify }] = await Promise.all([
         importNodeModule(nodeBuiltin("child_process")),
         importNodeModule(nodeBuiltin("fs/promises")),
@@ -2752,7 +2754,7 @@ async function runHostedCommandExecutionSteps(args, workload, steps, startedAt,
                     await stageWorkbenchEnginePrivateFiles(workspace.root, workload);
                     enginePrivateStaged = true;
                 }
-                await resetHostedWorkloadStepOutput(workspace.root);
+                await resetRemoteWorkloadStepOutput(workspace.root);
                 const stepAdapterId = step.adapter?.use ?? execution.adapter.use;
                 const adapterRequestPath = await writeWorkbenchAdapterRequest(workspace.root, workload, execution, step, adapterAuthRequestForStep(args, stepAdapterId), args.adapterManifests);
                 const stepRole = stepEventRole(step);
@@ -2768,10 +2770,10 @@ async function runHostedCommandExecutionSteps(args, workload, steps, startedAt,
                     const adapterRoot = step.executor === "host"
                         ? hostAdapterRoots.get(stepAdapterId)
                         : undefined;
-                    const command = createHostedWorkloadShellCommand(workspace.root, step.command, step.label, step.okExitCodes);
+                    const command = createRemoteWorkloadShellCommand(workspace.root, step.command, step.label, step.okExitCodes);
                     await execFileAsync("sh", ["-c", command], {
                         cwd: adapterRoot ?? workspace.root,
-                        env: createHostedWorkloadAdapterEnv(workspace.root, adapterRequestPath, adapterAuthEnvForStep(args, stepAdapterId), adapterRoot ? { adapterRoot } : undefined, args.adapterRuntimeEnv),
+                        env: createRemoteWorkloadAdapterEnv(workspace.root, adapterRequestPath, adapterAuthEnvForStep(args, stepAdapterId), adapterRoot ? { adapterRoot } : undefined, args.adapterRuntimeEnv),
                         maxBuffer: 10 * 1024 * 1024,
                         timeout: stepTimeoutMs,
                     });
@@ -2811,7 +2813,7 @@ async function runHostedCommandExecutionSteps(args, workload, steps, startedAt,
                 .catch(() => undefined);
         }
         if (exitCode !== 0) {
-            return await readHostedRunFailureResult(workspace.root, workload, {
+            return await readRemoteRunFailureResult(workspace.root, workload, {
                 exitCode,
                 error: runtimeError ?? `Runtime command exited with status ${exitCode}.`,
                 startedAt,
@@ -2843,10 +2845,10 @@ async function runCandidatePrepareCommand(args) {
         role,
     });
     try {
-        const shellCommand = createHostedWorkloadShellCommand(args.root, command, "candidate_prepare");
+        const shellCommand = createRemoteWorkloadShellCommand(args.root, command, "candidate_prepare");
         await args.execFileAsync("sh", ["-c", shellCommand], {
             cwd: args.root,
-            env: createHostedWorkloadPrepareEnv(args.root),
+            env: createRemoteWorkloadPrepareEnv(args.root),
             maxBuffer: 10 * 1024 * 1024,
             timeout: args.timeoutMs,
         });
@@ -3126,7 +3128,7 @@ function adapterFilePathWithinRoot(filePath, sourceRoot) {
     }
     return normalized.slice(sourceRoot.length + 1);
 }
-async function readHostedRunFailureResult(root, workload, options) {
+async function readRemoteRunFailureResult(root, workload, options) {
     const traceFiles = await readRuntimeTraceFiles(root, workload);
     const outputFiles = filterRuntimeOutputFiles(await readSurfaceFiles(outputDir(root), { ignorePath: isWorkbenchInternalOutputPath }));
     const startedAt = options.startedAt ?? new Date().toISOString();
@@ -3213,7 +3215,7 @@ async function readRuntimeTraceFiles(root, workload) {
 function filterRuntimeOutputFiles(files) {
     return files.filter((file) => !isWorkbenchInternalOutputPath(file.path));
 }
-function createHostedWorkloadShellCommand(root, command, prefix = "", okExitCodes = [0]) {
+function createRemoteWorkloadShellCommand(root, command, prefix = "", okExitCodes = [0]) {
     const outputPrefix = prefix ? `${prefix}_` : "";
     const okExpression = [...new Set(okExitCodes)]
         .sort((left, right) => left - right)
@@ -3233,7 +3235,7 @@ function createHostedWorkloadShellCommand(root, command, prefix = "", okExitCode
         'exit "$status"',
     ].join("; ");
 }
-async function resetHostedWorkloadStepOutput(root) {
+async function resetRemoteWorkloadStepOutput(root) {
     const fs = await importNodeModule(nodeBuiltin("fs/promises"));
     await fs
         .rm(workbenchAdapterOperationResultPath(outputDir(root)), { force: true })
@@ -3331,8 +3333,8 @@ function requireImproveEdits(spec) {
     }
     return edits;
 }
-function createHostedWorkloadAdapterEnv(root, adapterRequestPath, adapterEnv = {}, options = {}, runtimeEnv = {}) {
-    const env = createHostedWorkloadBaseEnv();
+function createRemoteWorkloadAdapterEnv(root, adapterRequestPath, adapterEnv = {}, options = {}, runtimeEnv = {}) {
+    const env = createRemoteWorkloadBaseEnv();
     env.WORKBENCH_ADAPTER_REQUEST = adapterRequestPath;
     env.WORKBENCH_OUTPUT = outputDir(root);
     env.WORKBENCH_RESULT = workbenchAdapterOperationResultPath(outputDir(root));
@@ -3348,12 +3350,12 @@ function createHostedWorkloadAdapterEnv(root, adapterRequestPath, adapterEnv = {
     Object.assign(env, runtimeEnv);
     return env;
 }
-function createHostedWorkloadPrepareEnv(root) {
-    const env = createHostedWorkloadBaseEnv();
+function createRemoteWorkloadPrepareEnv(root) {
+    const env = createRemoteWorkloadBaseEnv();
     env.WORKBENCH_OUTPUT = outputDir(root);
     return env;
 }
-function createHostedWorkloadBaseEnv() {
+function createRemoteWorkloadBaseEnv() {
     const env = {};
     for (const [key, value] of Object.entries(process.env)) {
         if (typeof value === "string") {

package/dist/inspection.d.ts ADDED Viewed

@@ -0,0 +1,111 @@
+import { type WorkbenchEvaluationComparison } from "@workbench-ai/workbench-contract";
+import type { AuthoredWorkbenchSourceDocument, CandidateCaseReview, CandidateFilePreview, CandidateFileSummary, CandidateLineageGraph, CandidateRecord, EvaluationScorecard, RemoteWorkbenchJob, RunSummary, RuntimeSnapshot, SurfaceSnapshotFile, WorkbenchExecutionEventRole, WorkbenchExecutionTrace, WorkbenchExecutionTraceDetail, WorkbenchTraceSession } from "@workbench-ai/workbench-contract";
+export interface WorkbenchInspectionErrorOptions {
+    status?: number;
+}
+export declare class WorkbenchInspectionError extends Error {
+    readonly status: number;
+    readonly statusCode: number;
+    constructor(message: string, options?: WorkbenchInspectionErrorOptions);
+}
+export interface WorkbenchInspectionFileListInput {
+    fingerprint?: string | null;
+}
+export interface WorkbenchInspectionPreviewInput {
+    path: string;
+    view: "diff" | "raw" | "rendered";
+}
+export interface WorkbenchInspectionFilePreviewInput extends WorkbenchInspectionFileListInput, WorkbenchInspectionPreviewInput {
+}
+export interface WorkbenchInspectionCandidateInput {
+    id: string;
+}
+export interface WorkbenchInspectionCandidatePreviewInput extends WorkbenchInspectionCandidateInput, WorkbenchInspectionPreviewInput {
+}
+export interface WorkbenchInspectionEvaluationInput {
+    id: string;
+}
+export interface WorkbenchInspectionCaseReviewInput {
+    candidateId: string;
+    caseId: string;
+    runId: string;
+    sampleIndex?: number;
+}
+export interface WorkbenchInspectionRunInput {
+    id: string;
+    includeJobs?: boolean;
+}
+export interface WorkbenchInspectionExecutionInput {
+    runId: string;
+    jobId: string;
+}
+export interface WorkbenchInspectionExecutionPreviewInput extends WorkbenchInspectionExecutionInput {
+    path: string;
+    view: "diff" | "raw" | "rendered";
+}
+export interface WorkbenchInspectionRunDetail {
+    run: RunSummary;
+    jobs?: RemoteWorkbenchJob[];
+}
+export type WorkbenchFailureKind = "run" | "evaluation" | "sample" | "case" | "job";
+export interface WorkbenchFailureDetail {
+    kind: WorkbenchFailureKind;
+    id: string;
+    status?: string;
+    runId?: string;
+    candidateId?: string;
+    evaluationId?: string;
+    jobId?: string;
+    caseId?: string;
+    sampleIndex?: number;
+    attemptIndex?: number;
+    error?: string;
+}
+export interface WorkbenchFailureDiagnosis {
+    targetId: string | null;
+    failures: WorkbenchFailureDetail[];
+    failedRunCount: number;
+    failedEvaluationCount: number;
+    failedJobCount: number;
+}
+export interface WorkbenchInspectionBackend {
+    projectId: string;
+    snapshot(): Promise<RuntimeSnapshot>;
+    spec(input: WorkbenchInspectionFileListInput): Promise<AuthoredWorkbenchSourceDocument>;
+    sourceFiles(input: WorkbenchInspectionFileListInput): Promise<SurfaceSnapshotFile[]>;
+    candidate(input: WorkbenchInspectionCandidateInput): Promise<CandidateRecord>;
+    candidateFiles(input: WorkbenchInspectionCandidateInput): Promise<{
+        files: SurfaceSnapshotFile[];
+        changedPaths: readonly string[];
+    }>;
+    evaluation(input: WorkbenchInspectionEvaluationInput): Promise<EvaluationScorecard>;
+    run(input: WorkbenchInspectionRunInput): Promise<WorkbenchInspectionRunDetail>;
+    jobInRun?(input: WorkbenchInspectionExecutionInput): Promise<RemoteWorkbenchJob>;
+    executionFiles(input: WorkbenchInspectionExecutionInput): Promise<SurfaceSnapshotFile[]>;
+    caseReview?(input: WorkbenchInspectionCaseReviewInput): Promise<CandidateCaseReview>;
+    executionTrace?(input: WorkbenchInspectionExecutionInput): Promise<WorkbenchExecutionTraceDetail>;
+    traceForJob?(job: RemoteWorkbenchJob, role: WorkbenchExecutionEventRole): WorkbenchExecutionTrace;
+    traceSessionsForJob?(job: RemoteWorkbenchJob, role: WorkbenchExecutionEventRole): WorkbenchTraceSession[];
+}
+export interface WorkbenchInspection {
+    snapshot(): Promise<RuntimeSnapshot>;
+    spec(input?: WorkbenchInspectionFileListInput): Promise<AuthoredWorkbenchSourceDocument>;
+    sourceFiles(input?: WorkbenchInspectionFileListInput): Promise<CandidateFileSummary[]>;
+    sourcePreview(input: WorkbenchInspectionFilePreviewInput): Promise<CandidateFilePreview>;
+    candidate(input: WorkbenchInspectionCandidateInput): Promise<CandidateRecord>;
+    candidateFiles(input: WorkbenchInspectionCandidateInput): Promise<CandidateFileSummary[]>;
+    candidatePreview(input: WorkbenchInspectionCandidatePreviewInput): Promise<CandidateFilePreview>;
+    evaluations(): Promise<WorkbenchEvaluationComparison>;
+    evaluation(input: WorkbenchInspectionEvaluationInput): Promise<EvaluationScorecard>;
+    caseReview(input: WorkbenchInspectionCaseReviewInput): Promise<CandidateCaseReview>;
+    run(input: WorkbenchInspectionRunInput): Promise<WorkbenchInspectionRunDetail>;
+    executionTrace(input: WorkbenchInspectionExecutionInput): Promise<WorkbenchExecutionTraceDetail>;
+    executionFiles(input: WorkbenchInspectionExecutionInput): Promise<CandidateFileSummary[]>;
+    executionPreview(input: WorkbenchInspectionExecutionPreviewInput): Promise<CandidateFilePreview>;
+    lineage(): Promise<CandidateLineageGraph>;
+    diagnose(input?: {
+        targetId?: string | null;
+    }): Promise<WorkbenchFailureDiagnosis>;
+}
+export declare function createWorkbenchInspection(backend: WorkbenchInspectionBackend): WorkbenchInspection;
+//# sourceMappingURL=inspection.d.ts.map

package/dist/inspection.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"inspection.d.ts","sourceRoot":"","sources":["../src/inspection.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,KAAK,6BAA6B,EACnC,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,+BAA+B,EAC/B,mBAAmB,EACnB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,eAAe,EACf,mBAAmB,EAEnB,kBAAkB,EAElB,UAAU,EACV,eAAe,EACf,mBAAmB,EACnB,2BAA2B,EAC3B,uBAAuB,EACvB,6BAA6B,EAC7B,qBAAqB,EACtB,MAAM,kCAAkC,CAAC;AAc1C,MAAM,WAAW,+BAA+B;IAC9C,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,qBAAa,wBAAyB,SAAQ,KAAK;IACjD,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;gBAEhB,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,+BAAoC;CAM3E;AAED,MAAM,WAAW,gCAAgC;IAC/C,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B;AAED,MAAM,WAAW,+BAA+B;IAC9C,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACnC;AAED,MAAM,WAAW,mCAAoC,SAAQ,gCAAgC,EAAE,+BAA+B;CAAG;AAEjI,MAAM,WAAW,iCAAiC;IAChD,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,wCAAyC,SAAQ,iCAAiC,EAAE,+BAA+B;CAAG;AAEvI,MAAM,WAAW,kCAAkC;IACjD,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,kCAAkC;IACjD,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,2BAA2B;IAC1C,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED,MAAM,WAAW,iCAAiC;IAChD,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,wCAAyC,SAAQ,iCAAiC;IACjG,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACnC;AAED,MAAM,WAAW,4BAA4B;IAC3C,GAAG,EAAE,UAAU,CAAC;IAChB,IAAI,CAAC,EAAE,kBAAkB,EAAE,CAAC;CAC7B;AAED,MAAM,MAAM,oBAAoB,GAAG,KAAK,GAAG,YAAY,GAAG,QAAQ,GAAG,MAAM,GAAG,KAAK,CAAC;AAEpF,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,yBAAyB;IACxC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,QAAQ,EAAE,sBAAsB,EAAE,CAAC;IACnC,cAAc,EAAE,MAAM,CAAC;IACvB,qBAAqB,EAAE,MAAM,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,0BAA0B;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,IAAI,OAAO,CAAC,eAAe,CAAC,CAAC;IACrC,IAAI,CAAC,KAAK,EAAE,gCAAgC,GAAG,OAAO,CAAC,+BAA+B,CAAC,CAAC;IACxF,WAAW,CAAC,KAAK,EAAE,gCAAgC,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAAC;IACrF,SAAS,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAC9E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC;QAChE,KAAK,EAAE,mBAAmB,EAAE,CAAC;QAC7B,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;KACjC,CAAC,CAAC;IACH,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,GAAG,CAAC,KAAK,EAAE,2BAA2B,GAAG,OAAO,CAAC,4BAA4B,CAAC,CAAC;IAC/E,QAAQ,CAAC,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACjF,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAAC;IACzF,UAAU,CAAC,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACrF,cAAc,CAAC,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;IAClG,WAAW,CAAC,CACV,GAAG,EAAE,kBAAkB,EACvB,IAAI,EAAE,2BAA2B,GAChC,uBAAuB,CAAC;IAC3B,mBAAmB,CAAC,CAClB,GAAG,EAAE,kBAAkB,EACvB,IAAI,EAAE,2BAA2B,GAChC,qBAAqB,EAAE,CAAC;CAC5B;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,IAAI,OAAO,CAAC,eAAe,CAAC,CAAC;IACrC,IAAI,CAAC,KAAK,CAAC,EAAE,gCAAgC,GAAG,OAAO,CAAC,+BAA+B,CAAC,CAAC;IACzF,WAAW,CAAC,KAAK,CAAC,EAAE,gCAAgC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IACvF,aAAa,CAAC,KAAK,EAAE,mCAAmC,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACzF,SAAS,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAC9E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,gBAAgB,CAAC,KAAK,EAAE,wCAAwC,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACjG,WAAW,IAAI,OAAO,CAAC,6BAA6B,CAAC,CAAC;IACtD,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,GAAG,CAAC,KAAK,EAAE,2BAA2B,GAAG,OAAO,CAAC,4BAA4B,CAAC,CAAC;IAC/E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;IACjG,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,gBAAgB,CAAC,KAAK,EAAE,wCAAwC,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACjG,OAAO,IAAI,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAC1C,QAAQ,CAAC,KAAK,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAC;CACpF;AAED,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,0BAA0B,GAClC,mBAAmB,CAiGrB"}

package/dist/inspection.js ADDED Viewed

@@ -0,0 +1,217 @@
+import { buildCandidateLineage, buildWorkbenchEvaluationComparison, } from "@workbench-ai/workbench-contract";
+import { buildCandidateCaseExecutionRefs, buildWorkbenchExecutionEvidence, } from "./execution-evidence.js";
+import { candidateRecordWithoutDerivedFields, createCandidateFilePreview, createCaseReview, summarizeCandidateFiles, } from "./index.js";
+export class WorkbenchInspectionError extends Error {
+    status;
+    statusCode;
+    constructor(message, options = {}) {
+        super(message);
+        this.name = "WorkbenchInspectionError";
+        this.status = options.status ?? 400;
+        this.statusCode = this.status;
+    }
+}
+export function createWorkbenchInspection(backend) {
+    return {
+        snapshot: () => backend.snapshot(),
+        spec: (input = {}) => backend.spec(input),
+        sourceFiles: async (input = {}) => {
+            const files = await backend.sourceFiles(input);
+            return summarizeCandidateFiles(files, files.map((file) => file.path));
+        },
+        sourcePreview: async (input) => createCandidateFilePreview({
+            files: await backend.sourceFiles(input),
+            path: input.path,
+            view: input.view,
+        }),
+        candidate: async (input) => candidateRecordWithoutDerivedFields(await backend.candidate(input)),
+        candidateFiles: async (input) => {
+            const result = await backend.candidateFiles(input);
+            return summarizeCandidateFiles(result.files, result.changedPaths);
+        },
+        candidatePreview: async (input) => createCandidateFilePreview({
+            files: (await backend.candidateFiles(input)).files,
+            path: input.path,
+            view: input.view,
+        }),
+        evaluations: async () => {
+            const snapshot = await backend.snapshot();
+            return buildWorkbenchEvaluationComparison(snapshot.evaluations);
+        },
+        evaluation: (input) => backend.evaluation(input),
+        caseReview: async (input) => {
+            if (backend.caseReview) {
+                return await backend.caseReview(input);
+            }
+            const candidate = await backend.candidate({ id: input.candidateId });
+            const jobs = (await backend.run({ id: input.runId, includeJobs: true })).jobs ?? [];
+            return createCaseReview({
+                candidate,
+                caseId: input.caseId,
+                executions: buildCandidateCaseExecutionRefs({
+                    jobs,
+                    candidateId: input.candidateId,
+                    caseId: input.caseId,
+                    sampleIndex: input.sampleIndex,
+                }),
+            });
+        },
+        run: (input) => backend.run(input),
+        executionTrace: async (input) => {
+            if (backend.executionTrace) {
+                return await backend.executionTrace(input);
+            }
+            if (!backend.jobInRun || !backend.traceForJob) {
+                throw new WorkbenchInspectionError("Execution traces are not available for this Workbench inspection backend.", { status: 404 });
+            }
+            const jobs = [await backend.jobInRun(input)];
+            return {
+                projectId: backend.projectId,
+                runId: input.runId,
+                executions: buildWorkbenchExecutionEvidence({
+                    jobs,
+                    traceIdPrefix: `${backend.projectId}-execution`,
+                    traceForJob: backend.traceForJob,
+                    traceSessionsForJob: backend.traceSessionsForJob,
+                }),
+            };
+        },
+        executionFiles: async (input) => {
+            const files = await backend.executionFiles(input);
+            return summarizeCandidateFiles(files, files.map((file) => file.path));
+        },
+        executionPreview: async (input) => createCandidateFilePreview({
+            files: await backend.executionFiles(input),
+            path: input.path,
+            view: input.view,
+        }),
+        lineage: async () => {
+            const snapshot = await backend.snapshot();
+            return buildCandidateLineage({
+                summaries: snapshot.summaries,
+                activeId: snapshot.activeId,
+            });
+        },
+        diagnose: async (input = {}) => {
+            const snapshot = await backend.snapshot();
+            return await diagnoseWorkbenchFailures({
+                snapshot,
+                backend,
+                targetId: input.targetId?.trim() || null,
+            });
+        },
+    };
+}
+async function diagnoseWorkbenchFailures(args) {
+    const targetRun = args.targetId
+        ? args.snapshot.runs.find((run) => run.id === args.targetId)
+        : null;
+    const targetEvaluation = args.targetId
+        ? args.snapshot.evaluations.find((evaluation) => evaluation.id === args.targetId)
+        : null;
+    const failures = [];
+    if (args.targetId && targetRun) {
+        const detail = await args.backend.run({ id: targetRun.id, includeJobs: true });
+        failures.push(...runFailures(detail.run));
+        failures.push(...jobFailures(detail.jobs ?? []));
+    }
+    else if (args.targetId && targetEvaluation) {
+        const evaluation = await args.backend.evaluation({ id: targetEvaluation.id });
+        failures.push(...evaluationFailures(evaluation));
+    }
+    else {
+        for (const run of args.snapshot.runs) {
+            failures.push(...runFailures(run));
+        }
+        for (const evaluation of args.snapshot.evaluations) {
+            failures.push(...evaluationSummaryFailures(evaluation));
+        }
+    }
+    return {
+        targetId: args.targetId,
+        failures,
+        failedRunCount: failures.filter((failure) => failure.kind === "run").length,
+        failedEvaluationCount: failures.filter((failure) => failure.kind === "evaluation").length,
+        failedJobCount: failures.filter((failure) => failure.kind === "job").length,
+    };
+}
+function runFailures(run) {
+    if (run.status !== "finished" || (run.outcome !== "error" && run.outcome !== "cancelled")) {
+        return [];
+    }
+    return [{
+            kind: "run",
+            id: run.id,
+            runId: run.id,
+            candidateId: run.outputCandidateId ?? run.candidateId ?? undefined,
+            status: run.outcome,
+            ...(run.error ? { error: run.error } : {}),
+        }];
+}
+function evaluationSummaryFailures(evaluation) {
+    if (evaluation.status === "completed" &&
+        evaluation.errorSampleCount === 0 &&
+        !evaluation.error) {
+        return [];
+    }
+    return [{
+            kind: "evaluation",
+            id: evaluation.id,
+            evaluationId: evaluation.id,
+            runId: evaluation.runId,
+            candidateId: evaluation.candidateId,
+            status: evaluation.status,
+            ...(evaluation.error ? { error: evaluation.error } : {}),
+        }];
+}
+function evaluationFailures(evaluation) {
+    const failures = evaluationSummaryFailures(evaluation);
+    for (const sample of evaluation.evaluation.samples) {
+        if (!sample.error && !(sample.cases ?? []).some((entry) => entry.status && entry.status !== "completed")) {
+            continue;
+        }
+        failures.push({
+            kind: "sample",
+            id: `${evaluation.id}:sample:${sample.index}`,
+            evaluationId: evaluation.id,
+            runId: evaluation.runId,
+            candidateId: evaluation.candidateId,
+            sampleIndex: sample.index,
+            status: sample.status,
+            ...(sample.error ? { error: sample.error } : {}),
+        });
+        for (const result of sample.cases ?? []) {
+            if (!result.status || result.status === "completed") {
+                continue;
+            }
+            failures.push({
+                kind: "case",
+                id: `${evaluation.id}:case:${result.id}:sample:${sample.index}`,
+                evaluationId: evaluation.id,
+                runId: evaluation.runId,
+                candidateId: evaluation.candidateId,
+                caseId: result.id,
+                sampleIndex: sample.index,
+                status: result.status,
+            });
+        }
+    }
+    return failures;
+}
+function jobFailures(jobs) {
+    return jobs
+        .filter((job) => isFailedJobStatus(job.status))
+        .map((job) => ({
+        kind: "job",
+        id: job.id,
+        jobId: job.id,
+        runId: job.runId,
+        candidateId: job.candidateId,
+        status: job.status,
+        attemptIndex: typeof job.attempt === "number" ? job.attempt : undefined,
+        ...(job.error ? { error: job.error } : {}),
+    }));
+}
+function isFailedJobStatus(status) {
+    return status === "failed" || status === "cancelled";
+}

package/dist/sandbox-backends/index.d.ts CHANGED Viewed

@@ -1,37 +1,29 @@
 import type { WorkbenchExecutionRuntimeInput } from "../execution-runtime-types.ts";
 import type { SandboxBackendCapabilities, SandboxExecutionFileStore, SandboxPlane } from "../sandbox-plane.ts";
-import { type WorkbenchSandboxProviderName } from "./names.ts";
-export { DOCKER_SANDBOX_BACKEND, resolveWorkbenchSandboxProviderName, type WorkbenchSandboxProviderName, } from "./names.ts";
+import { type WorkbenchSandboxBackendName } from "./names.ts";
+export { DOCKER_SANDBOX_BACKEND, resolveWorkbenchSandboxBackendName, type WorkbenchSandboxBackendName, } from "./names.ts";
 export { createDockerSandboxBackendDescriptor, createDockerSandboxPlane, } from "./docker.ts";
 export interface SandboxHostHealthExpectation {
-    provider: WorkbenchSandboxProviderName;
-    backend: string;
+    backend: WorkbenchSandboxBackendName;
     capabilities: SandboxBackendCapabilities;
 }
-export interface SandboxProviderRequestedResources {
+export interface SandboxBackendRequestedResources {
     cpu: number;
     memoryGb: number;
     diskGb?: number;
     timeoutMinutes?: number;
 }
-export interface SandboxProviderHostCost {
+export interface SandboxBackendHostCost {
     cpu: number;
     memoryGb: number;
     diskGb: number;
 }
-export interface SandboxProviderLeaseRequest {
-    scope: string;
-    units: number;
+export interface SandboxBackendAdmission {
+    backend: WorkbenchSandboxBackendName;
+    hostCost: SandboxBackendHostCost;
 }
-export interface SandboxProviderAdmission {
-    provider: WorkbenchSandboxProviderName;
-    hostCost: SandboxProviderHostCost;
-    providerLeases: SandboxProviderLeaseRequest[];
-}
-export declare function createSandboxBackendPlaneForProvider(provider: string, args: WorkbenchExecutionRuntimeInput, startedAt: string, fileStore: SandboxExecutionFileStore): SandboxPlane;
-export declare function sandboxHostHealthExpectationForProvider(provider: WorkbenchSandboxProviderName): SandboxHostHealthExpectation;
-export declare function assertSandboxHostHealthForProvider(value: unknown, provider: WorkbenchSandboxProviderName): void;
-export declare function sandboxProviderDefaultMaxConcurrentJobs(_provider: WorkbenchSandboxProviderName): number | null;
-export declare function sandboxProviderAdmissionForResources(provider: WorkbenchSandboxProviderName, resources: SandboxProviderRequestedResources): SandboxProviderAdmission;
-export declare function sandboxProviderLeaseScope(provider: WorkbenchSandboxProviderName): string;
+export declare function createSandboxBackendPlaneForBackend(backend: string, args: WorkbenchExecutionRuntimeInput, startedAt: string, fileStore: SandboxExecutionFileStore): SandboxPlane;
+export declare function sandboxHostHealthExpectationForBackend(backend: WorkbenchSandboxBackendName): SandboxHostHealthExpectation;
+export declare function assertSandboxHostHealthForBackend(value: unknown, backend: WorkbenchSandboxBackendName): void;
+export declare function sandboxBackendAdmissionForResources(backend: WorkbenchSandboxBackendName, resources: SandboxBackendRequestedResources): SandboxBackendAdmission;
 //# sourceMappingURL=index.d.ts.map