@workbench-ai/workbench-core 0.0.63 → 0.0.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/execution-events.d.ts +2 -2
- package/dist/execution-events.d.ts.map +1 -1
- package/dist/execution-events.js +6 -6
- package/dist/execution-evidence.d.ts +11 -11
- package/dist/execution-jobs.d.ts +5 -5
- package/dist/execution-runtime-types.d.ts +3 -3
- package/dist/execution-scheduler.d.ts +12 -12
- package/dist/execution-scheduler.d.ts.map +1 -1
- package/dist/execution-scheduler.js +5 -5
- package/dist/execution-traces.d.ts +3 -3
- package/dist/index.d.ts +24 -25
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +56 -54
- package/dist/inspection.d.ts +111 -0
- package/dist/inspection.d.ts.map +1 -0
- package/dist/inspection.js +217 -0
- package/dist/sandbox-backends/index.d.ts +12 -20
- package/dist/sandbox-backends/index.d.ts.map +1 -1
- package/dist/sandbox-backends/index.js +15 -26
- package/dist/sandbox-backends/names.d.ts +2 -3
- package/dist/sandbox-backends/names.d.ts.map +1 -1
- package/dist/sandbox-backends/names.js +5 -5
- package/dist/sandbox-inputs.d.ts +6 -6
- package/package.json +3 -3
package/dist/index.js
CHANGED
|
@@ -8,7 +8,7 @@ import { BENCHMARK_SPEC_FILE, DEFAULT_EXECUTION_RESOURCES, engineCasePrivateFile
|
|
|
8
8
|
import { attachSandboxMetadataToJob, createWorkbenchSandboxFileStore, isSurfaceSnapshotFile, readWorkbenchExecutionSpec, } from "./sandbox-inputs.js";
|
|
9
9
|
import { asRuntimeRecord, importNodeModule, isJsonPayload, jsonRecord, nodeBuiltin, quoteShellArg, resolveWorkbenchWorkerId, } from "./runtime-utils.js";
|
|
10
10
|
import { createWorkbenchExecutionCapability, createWorkbenchSandboxAllocation, collectExecutionCapabilityScopeIssues, collectSandboxAllocationScopeIssues, collectSandboxHandleScopeIssues, assertSandboxBackendSupportsNetworkPolicy, executeValidatedSandboxExecution, } from "./sandbox-plane.js";
|
|
11
|
-
import {
|
|
11
|
+
import { createSandboxBackendPlaneForBackend, } from "./sandbox-backends/index.js";
|
|
12
12
|
import { applyWorkbenchCandidatePatch } from "./candidate-patch.js";
|
|
13
13
|
import { assignUsageRole, completeUsageSummary, mergeUsageSummaries, normalizeUsageSummary, usageStats, } from "./execution-usage.js";
|
|
14
14
|
import { traceFilePaths, workbenchTraceExecutionDirectory, } from "./trace-files.js";
|
|
@@ -35,12 +35,42 @@ export { addCapacity, capacityFits, runWorkbenchExecutionDag, subtractCapacity,
|
|
|
35
35
|
export { assertWorkbenchExecutionIsolation, collectWorkbenchExecutionIsolationIssues, validateWorkbenchExecutionOutputPayloads, } from "./execution-outputs.js";
|
|
36
36
|
export { collectSandboxAllocationScopeIssues, collectExecutionCapabilityScopeIssues, collectSandboxHandleScopeIssues, createWorkbenchSandboxAllocation, createWorkbenchSandboxExecutionMetadata, createWorkbenchExecutionCapability, executeValidatedSandboxExecution, } from "./sandbox-plane.js";
|
|
37
37
|
export { buildCandidateCaseExecutionRefs, buildWorkbenchExecutionEvidence, isWorkbenchExecutionActive, readWorkbenchExecutionId, readWorkbenchExecutionMetadataNumber, readWorkbenchExecutionMetadataString, readWorkbenchExecutionPurpose, resolveWorkbenchJobGroupStatus, } from "./execution-evidence.js";
|
|
38
|
+
export { buildCandidateLineage, buildWorkbenchEvaluationComparison, buildWorkbenchEvaluationMetricDescriptors, formatEvaluationConfigurationLabel, isCompleteEvaluationSummary, readEvaluationScore, } from "@workbench-ai/workbench-contract";
|
|
39
|
+
export { WorkbenchInspectionError, createWorkbenchInspection, } from "./inspection.js";
|
|
38
40
|
export { buildWorkbenchTraceSessionsFromFiles, combineWorkbenchTraceSessions, finalizeWorkbenchExecutionTraceForJob, mergeWorkbenchExecutionTracesByJob, readWorkbenchExecutionTraceFiles, traceSessionLabel, } from "./execution-traces.js";
|
|
39
|
-
export { DOCKER_SANDBOX_BACKEND,
|
|
41
|
+
export { DOCKER_SANDBOX_BACKEND, assertSandboxHostHealthForBackend, createDockerSandboxBackendDescriptor, createDockerSandboxPlane, resolveWorkbenchSandboxBackendName, sandboxBackendAdmissionForResources, sandboxHostHealthExpectationForBackend, } from "./sandbox-backends/index.js";
|
|
40
42
|
export function sanitizeWorkbenchRuntimeJobForExchange(job) {
|
|
41
43
|
const { leaseUntil: _leaseUntil, wakeupLeaseUntil: _wakeupLeaseUntil, hostId: _hostId, workerId: _workerId, claimTokenHash: _claimTokenHash, trace: _trace, traceSessions: _traceSessions, ...portable } = job;
|
|
42
44
|
return { ...portable };
|
|
43
45
|
}
|
|
46
|
+
export function compactWorkbenchRuntimeJobForExchange(job) {
|
|
47
|
+
const portable = sanitizeWorkbenchRuntimeJobForExchange(job);
|
|
48
|
+
return {
|
|
49
|
+
...portable,
|
|
50
|
+
input: compactRuntimeJobJson(portable.input),
|
|
51
|
+
...(portable.output !== undefined
|
|
52
|
+
? { output: compactRuntimeJobJson(portable.output) }
|
|
53
|
+
: {}),
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
function compactRuntimeJobJson(value) {
|
|
57
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
58
|
+
return value;
|
|
59
|
+
}
|
|
60
|
+
const next = { ...value };
|
|
61
|
+
delete next.baseFiles;
|
|
62
|
+
delete next.engineResolveFiles;
|
|
63
|
+
delete next.fileSet;
|
|
64
|
+
delete next.files;
|
|
65
|
+
delete next.traceFiles;
|
|
66
|
+
const candidatePatch = next.candidatePatch;
|
|
67
|
+
if (candidatePatch && typeof candidatePatch === "object" && !Array.isArray(candidatePatch)) {
|
|
68
|
+
const compactPatch = { ...candidatePatch };
|
|
69
|
+
delete compactPatch.files;
|
|
70
|
+
next.candidatePatch = compactPatch;
|
|
71
|
+
}
|
|
72
|
+
return next;
|
|
73
|
+
}
|
|
44
74
|
export function sanitizeWorkbenchRuntimeCandidateForExchange(candidate) {
|
|
45
75
|
const { ownerUserId: _ownerUserId, ownerUsername: _ownerUsername, metrics: _metrics, candidateRunId: _candidateRunId, candidateRunName: _candidateRunName, ...portable } = candidate;
|
|
46
76
|
return { ...portable };
|
|
@@ -1354,22 +1384,6 @@ export function filterCandidateSourceFiles(files) {
|
|
|
1354
1384
|
.filter((file) => isCandidateSourceFilePath(file.path))
|
|
1355
1385
|
.map((file) => ({ ...file }));
|
|
1356
1386
|
}
|
|
1357
|
-
export function buildCandidateLineage(args) {
|
|
1358
|
-
const orderedSummaries = args.summaries.slice().sort((left, right) => {
|
|
1359
|
-
const createdAt = left.createdAt.localeCompare(right.createdAt);
|
|
1360
|
-
return createdAt !== 0 ? createdAt : left.id.localeCompare(right.id);
|
|
1361
|
-
});
|
|
1362
|
-
const summaryIds = new Set(orderedSummaries.map((summary) => summary.id));
|
|
1363
|
-
return {
|
|
1364
|
-
activeId: args.activeId,
|
|
1365
|
-
nodes: orderedSummaries.map((summary) => ({
|
|
1366
|
-
id: summary.id,
|
|
1367
|
-
active: args.activeId === summary.id,
|
|
1368
|
-
summary,
|
|
1369
|
-
})),
|
|
1370
|
-
edges: orderedSummaries.flatMap((summary) => buildLineageEdges(summary, summaryIds)),
|
|
1371
|
-
};
|
|
1372
|
-
}
|
|
1373
1387
|
export function normalizeSurfaceFiles(files) {
|
|
1374
1388
|
const byPath = new Map();
|
|
1375
1389
|
for (const file of files) {
|
|
@@ -1610,18 +1624,6 @@ function summarizeCaseInputs(files) {
|
|
|
1610
1624
|
};
|
|
1611
1625
|
});
|
|
1612
1626
|
}
|
|
1613
|
-
function buildLineageEdges(summary, summaryIds) {
|
|
1614
|
-
const edges = [];
|
|
1615
|
-
if (summary.baseId && summary.baseId !== summary.id && summaryIds.has(summary.baseId)) {
|
|
1616
|
-
edges.push({
|
|
1617
|
-
id: `anchor:${summary.baseId}:${summary.id}`,
|
|
1618
|
-
kind: "anchor",
|
|
1619
|
-
sourceId: summary.baseId,
|
|
1620
|
-
targetId: summary.id,
|
|
1621
|
-
});
|
|
1622
|
-
}
|
|
1623
|
-
return edges;
|
|
1624
|
-
}
|
|
1625
1627
|
export function createWorkbenchRunWorkload(args) {
|
|
1626
1628
|
const purpose = workbenchExecutionPurpose(args.job);
|
|
1627
1629
|
if (!purpose) {
|
|
@@ -1725,8 +1727,8 @@ async function executeWorkbenchExecutionJobWithResolvedAuth(runtimeArgs, options
|
|
|
1725
1727
|
}, executionForRuntime, startedAt, createWorkbenchExecutionCapability(executionForRuntime, { now: startedAt })));
|
|
1726
1728
|
}
|
|
1727
1729
|
const fileStore = createWorkbenchSandboxFileStore(runtimeArgs);
|
|
1728
|
-
const planeFactory = options.
|
|
1729
|
-
const plane = planeFactory(options.
|
|
1730
|
+
const planeFactory = options.createSandboxPlaneForBackend ?? createSandboxBackendPlaneForBackend;
|
|
1731
|
+
const plane = planeFactory(options.sandboxBackend, runtimeArgs, startedAt, fileStore);
|
|
1730
1732
|
const validated = await executeValidatedSandboxExecution(plane, executionForRuntime, {
|
|
1731
1733
|
now: startedAt,
|
|
1732
1734
|
runnerId: resolveWorkbenchWorkerId([
|
|
@@ -2282,7 +2284,7 @@ function completedJobFromSandboxResult(fallbackJob, startedAt, result) {
|
|
|
2282
2284
|
return attachSandboxMetadataToJob(failWorkbenchRunJob(fallbackJob, result.startedAt || startedAt, result.error ?? `Sandbox execution ${result.status}.`, result.finishedAt), asRuntimeRecord(result.metadata).sandbox);
|
|
2283
2285
|
}
|
|
2284
2286
|
async function executeCandidateRevisionExecutionInCurrentRuntime(args, execution, startedAt, capability, eventPublisher) {
|
|
2285
|
-
const { workload, result } = await
|
|
2287
|
+
const { workload, result } = await runRemoteProtocolExecutionResult(args, execution, startedAt, capability, eventPublisher);
|
|
2286
2288
|
if (result.error || (result.exitCode ?? 0) !== 0) {
|
|
2287
2289
|
return failWorkbenchRunJob(args.job, startedAt, result.error ?? `Adapter ${execution.adapter.use} exited with status ${result.exitCode}.`, result.finishedAt, result);
|
|
2288
2290
|
}
|
|
@@ -2331,7 +2333,7 @@ async function executeAttemptExecutionInCurrentRuntime(args, execution, startedA
|
|
|
2331
2333
|
engineCases: args.engineCases,
|
|
2332
2334
|
traceFiles: args.traceFiles,
|
|
2333
2335
|
});
|
|
2334
|
-
const workloadResult = await
|
|
2336
|
+
const workloadResult = await runRemoteCommandExecutionSteps(args, workload, attemptStepsForExecution(execution, args.spec, args.adapterManifests), startedAt, {
|
|
2335
2337
|
capability,
|
|
2336
2338
|
eventPublisher,
|
|
2337
2339
|
});
|
|
@@ -2411,7 +2413,7 @@ export async function executeRuntimeControlOperationSequenceInCurrentRuntime(arg
|
|
|
2411
2413
|
const adapterAuth = await materializeSandboxAdapterAuth(runtimeArgs, childExecution);
|
|
2412
2414
|
let result;
|
|
2413
2415
|
try {
|
|
2414
|
-
result = await
|
|
2416
|
+
result = await runRemoteCommandExecutionSteps({
|
|
2415
2417
|
...runtimeArgs,
|
|
2416
2418
|
...(adapterAuth.root ? { adapterAuthRoot: adapterAuth.root } : {}),
|
|
2417
2419
|
}, workload, args.runtimeControlOperation.operations.map((operation, index) => runtimeControlStepForOperation(operation, index, args.adapterManifests)), startedAt, {
|
|
@@ -2446,8 +2448,8 @@ async function executeRuntimeControlOperationSequenceInSandbox(args, options, st
|
|
|
2446
2448
|
const childArgs = createRuntimeControlSandboxInput(args, request);
|
|
2447
2449
|
const execution = readWorkbenchExecutionSpec(childArgs.job);
|
|
2448
2450
|
const fileStore = createWorkbenchSandboxFileStore(childArgs);
|
|
2449
|
-
const planeFactory = options.
|
|
2450
|
-
const plane = planeFactory(options.
|
|
2451
|
+
const planeFactory = options.createSandboxPlaneForBackend ?? createSandboxBackendPlaneForBackend;
|
|
2452
|
+
const plane = planeFactory(options.sandboxBackend, childArgs, startedAt, fileStore);
|
|
2451
2453
|
assertSandboxBackendSupportsNetworkPolicy(plane.backend, execution);
|
|
2452
2454
|
const sandboxOptions = {
|
|
2453
2455
|
now: startedAt,
|
|
@@ -2667,7 +2669,7 @@ function assertRuntimeControlScope(label, issues) {
|
|
|
2667
2669
|
throw new Error(`${label} failed validation:\n${issues.join("\n")}`);
|
|
2668
2670
|
}
|
|
2669
2671
|
}
|
|
2670
|
-
async function
|
|
2672
|
+
async function runRemoteProtocolExecutionResult(args, execution, startedAt, capability, eventPublisher) {
|
|
2671
2673
|
const workload = createWorkbenchRunWorkload({
|
|
2672
2674
|
job: args.job,
|
|
2673
2675
|
spec: args.spec,
|
|
@@ -2676,13 +2678,13 @@ async function runHostedProtocolExecutionResult(args, execution, startedAt, capa
|
|
|
2676
2678
|
engineCases: args.engineCases,
|
|
2677
2679
|
traceFiles: args.traceFiles,
|
|
2678
2680
|
});
|
|
2679
|
-
const result = await
|
|
2681
|
+
const result = await runRemoteCommandExecutionSteps(args, workload, [protocolStepForExecution(execution, args.adapterManifests)], startedAt, {
|
|
2680
2682
|
capability,
|
|
2681
2683
|
eventPublisher,
|
|
2682
2684
|
});
|
|
2683
2685
|
return { workload, result };
|
|
2684
2686
|
}
|
|
2685
|
-
async function
|
|
2687
|
+
async function runRemoteCommandExecutionSteps(args, workload, steps, startedAt, options = {}) {
|
|
2686
2688
|
const [{ execFile }, fs, os, path, { promisify }] = await Promise.all([
|
|
2687
2689
|
importNodeModule(nodeBuiltin("child_process")),
|
|
2688
2690
|
importNodeModule(nodeBuiltin("fs/promises")),
|
|
@@ -2752,7 +2754,7 @@ async function runHostedCommandExecutionSteps(args, workload, steps, startedAt,
|
|
|
2752
2754
|
await stageWorkbenchEnginePrivateFiles(workspace.root, workload);
|
|
2753
2755
|
enginePrivateStaged = true;
|
|
2754
2756
|
}
|
|
2755
|
-
await
|
|
2757
|
+
await resetRemoteWorkloadStepOutput(workspace.root);
|
|
2756
2758
|
const stepAdapterId = step.adapter?.use ?? execution.adapter.use;
|
|
2757
2759
|
const adapterRequestPath = await writeWorkbenchAdapterRequest(workspace.root, workload, execution, step, adapterAuthRequestForStep(args, stepAdapterId), args.adapterManifests);
|
|
2758
2760
|
const stepRole = stepEventRole(step);
|
|
@@ -2768,10 +2770,10 @@ async function runHostedCommandExecutionSteps(args, workload, steps, startedAt,
|
|
|
2768
2770
|
const adapterRoot = step.executor === "host"
|
|
2769
2771
|
? hostAdapterRoots.get(stepAdapterId)
|
|
2770
2772
|
: undefined;
|
|
2771
|
-
const command =
|
|
2773
|
+
const command = createRemoteWorkloadShellCommand(workspace.root, step.command, step.label, step.okExitCodes);
|
|
2772
2774
|
await execFileAsync("sh", ["-c", command], {
|
|
2773
2775
|
cwd: adapterRoot ?? workspace.root,
|
|
2774
|
-
env:
|
|
2776
|
+
env: createRemoteWorkloadAdapterEnv(workspace.root, adapterRequestPath, adapterAuthEnvForStep(args, stepAdapterId), adapterRoot ? { adapterRoot } : undefined, args.adapterRuntimeEnv),
|
|
2775
2777
|
maxBuffer: 10 * 1024 * 1024,
|
|
2776
2778
|
timeout: stepTimeoutMs,
|
|
2777
2779
|
});
|
|
@@ -2811,7 +2813,7 @@ async function runHostedCommandExecutionSteps(args, workload, steps, startedAt,
|
|
|
2811
2813
|
.catch(() => undefined);
|
|
2812
2814
|
}
|
|
2813
2815
|
if (exitCode !== 0) {
|
|
2814
|
-
return await
|
|
2816
|
+
return await readRemoteRunFailureResult(workspace.root, workload, {
|
|
2815
2817
|
exitCode,
|
|
2816
2818
|
error: runtimeError ?? `Runtime command exited with status ${exitCode}.`,
|
|
2817
2819
|
startedAt,
|
|
@@ -2843,10 +2845,10 @@ async function runCandidatePrepareCommand(args) {
|
|
|
2843
2845
|
role,
|
|
2844
2846
|
});
|
|
2845
2847
|
try {
|
|
2846
|
-
const shellCommand =
|
|
2848
|
+
const shellCommand = createRemoteWorkloadShellCommand(args.root, command, "candidate_prepare");
|
|
2847
2849
|
await args.execFileAsync("sh", ["-c", shellCommand], {
|
|
2848
2850
|
cwd: args.root,
|
|
2849
|
-
env:
|
|
2851
|
+
env: createRemoteWorkloadPrepareEnv(args.root),
|
|
2850
2852
|
maxBuffer: 10 * 1024 * 1024,
|
|
2851
2853
|
timeout: args.timeoutMs,
|
|
2852
2854
|
});
|
|
@@ -3126,7 +3128,7 @@ function adapterFilePathWithinRoot(filePath, sourceRoot) {
|
|
|
3126
3128
|
}
|
|
3127
3129
|
return normalized.slice(sourceRoot.length + 1);
|
|
3128
3130
|
}
|
|
3129
|
-
async function
|
|
3131
|
+
async function readRemoteRunFailureResult(root, workload, options) {
|
|
3130
3132
|
const traceFiles = await readRuntimeTraceFiles(root, workload);
|
|
3131
3133
|
const outputFiles = filterRuntimeOutputFiles(await readSurfaceFiles(outputDir(root), { ignorePath: isWorkbenchInternalOutputPath }));
|
|
3132
3134
|
const startedAt = options.startedAt ?? new Date().toISOString();
|
|
@@ -3213,7 +3215,7 @@ async function readRuntimeTraceFiles(root, workload) {
|
|
|
3213
3215
|
function filterRuntimeOutputFiles(files) {
|
|
3214
3216
|
return files.filter((file) => !isWorkbenchInternalOutputPath(file.path));
|
|
3215
3217
|
}
|
|
3216
|
-
function
|
|
3218
|
+
function createRemoteWorkloadShellCommand(root, command, prefix = "", okExitCodes = [0]) {
|
|
3217
3219
|
const outputPrefix = prefix ? `${prefix}_` : "";
|
|
3218
3220
|
const okExpression = [...new Set(okExitCodes)]
|
|
3219
3221
|
.sort((left, right) => left - right)
|
|
@@ -3233,7 +3235,7 @@ function createHostedWorkloadShellCommand(root, command, prefix = "", okExitCode
|
|
|
3233
3235
|
'exit "$status"',
|
|
3234
3236
|
].join("; ");
|
|
3235
3237
|
}
|
|
3236
|
-
async function
|
|
3238
|
+
async function resetRemoteWorkloadStepOutput(root) {
|
|
3237
3239
|
const fs = await importNodeModule(nodeBuiltin("fs/promises"));
|
|
3238
3240
|
await fs
|
|
3239
3241
|
.rm(workbenchAdapterOperationResultPath(outputDir(root)), { force: true })
|
|
@@ -3331,8 +3333,8 @@ function requireImproveEdits(spec) {
|
|
|
3331
3333
|
}
|
|
3332
3334
|
return edits;
|
|
3333
3335
|
}
|
|
3334
|
-
function
|
|
3335
|
-
const env =
|
|
3336
|
+
function createRemoteWorkloadAdapterEnv(root, adapterRequestPath, adapterEnv = {}, options = {}, runtimeEnv = {}) {
|
|
3337
|
+
const env = createRemoteWorkloadBaseEnv();
|
|
3336
3338
|
env.WORKBENCH_ADAPTER_REQUEST = adapterRequestPath;
|
|
3337
3339
|
env.WORKBENCH_OUTPUT = outputDir(root);
|
|
3338
3340
|
env.WORKBENCH_RESULT = workbenchAdapterOperationResultPath(outputDir(root));
|
|
@@ -3348,12 +3350,12 @@ function createHostedWorkloadAdapterEnv(root, adapterRequestPath, adapterEnv = {
|
|
|
3348
3350
|
Object.assign(env, runtimeEnv);
|
|
3349
3351
|
return env;
|
|
3350
3352
|
}
|
|
3351
|
-
function
|
|
3352
|
-
const env =
|
|
3353
|
+
function createRemoteWorkloadPrepareEnv(root) {
|
|
3354
|
+
const env = createRemoteWorkloadBaseEnv();
|
|
3353
3355
|
env.WORKBENCH_OUTPUT = outputDir(root);
|
|
3354
3356
|
return env;
|
|
3355
3357
|
}
|
|
3356
|
-
function
|
|
3358
|
+
function createRemoteWorkloadBaseEnv() {
|
|
3357
3359
|
const env = {};
|
|
3358
3360
|
for (const [key, value] of Object.entries(process.env)) {
|
|
3359
3361
|
if (typeof value === "string") {
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import { type WorkbenchEvaluationComparison } from "@workbench-ai/workbench-contract";
|
|
2
|
+
import type { AuthoredWorkbenchSourceDocument, CandidateCaseReview, CandidateFilePreview, CandidateFileSummary, CandidateLineageGraph, CandidateRecord, EvaluationScorecard, RemoteWorkbenchJob, RunSummary, RuntimeSnapshot, SurfaceSnapshotFile, WorkbenchExecutionEventRole, WorkbenchExecutionTrace, WorkbenchExecutionTraceDetail, WorkbenchTraceSession } from "@workbench-ai/workbench-contract";
|
|
3
|
+
export interface WorkbenchInspectionErrorOptions {
|
|
4
|
+
status?: number;
|
|
5
|
+
}
|
|
6
|
+
export declare class WorkbenchInspectionError extends Error {
|
|
7
|
+
readonly status: number;
|
|
8
|
+
readonly statusCode: number;
|
|
9
|
+
constructor(message: string, options?: WorkbenchInspectionErrorOptions);
|
|
10
|
+
}
|
|
11
|
+
export interface WorkbenchInspectionFileListInput {
|
|
12
|
+
fingerprint?: string | null;
|
|
13
|
+
}
|
|
14
|
+
export interface WorkbenchInspectionPreviewInput {
|
|
15
|
+
path: string;
|
|
16
|
+
view: "diff" | "raw" | "rendered";
|
|
17
|
+
}
|
|
18
|
+
export interface WorkbenchInspectionFilePreviewInput extends WorkbenchInspectionFileListInput, WorkbenchInspectionPreviewInput {
|
|
19
|
+
}
|
|
20
|
+
export interface WorkbenchInspectionCandidateInput {
|
|
21
|
+
id: string;
|
|
22
|
+
}
|
|
23
|
+
export interface WorkbenchInspectionCandidatePreviewInput extends WorkbenchInspectionCandidateInput, WorkbenchInspectionPreviewInput {
|
|
24
|
+
}
|
|
25
|
+
export interface WorkbenchInspectionEvaluationInput {
|
|
26
|
+
id: string;
|
|
27
|
+
}
|
|
28
|
+
export interface WorkbenchInspectionCaseReviewInput {
|
|
29
|
+
candidateId: string;
|
|
30
|
+
caseId: string;
|
|
31
|
+
runId: string;
|
|
32
|
+
sampleIndex?: number;
|
|
33
|
+
}
|
|
34
|
+
export interface WorkbenchInspectionRunInput {
|
|
35
|
+
id: string;
|
|
36
|
+
includeJobs?: boolean;
|
|
37
|
+
}
|
|
38
|
+
export interface WorkbenchInspectionExecutionInput {
|
|
39
|
+
runId: string;
|
|
40
|
+
jobId: string;
|
|
41
|
+
}
|
|
42
|
+
export interface WorkbenchInspectionExecutionPreviewInput extends WorkbenchInspectionExecutionInput {
|
|
43
|
+
path: string;
|
|
44
|
+
view: "diff" | "raw" | "rendered";
|
|
45
|
+
}
|
|
46
|
+
export interface WorkbenchInspectionRunDetail {
|
|
47
|
+
run: RunSummary;
|
|
48
|
+
jobs?: RemoteWorkbenchJob[];
|
|
49
|
+
}
|
|
50
|
+
export type WorkbenchFailureKind = "run" | "evaluation" | "sample" | "case" | "job";
|
|
51
|
+
export interface WorkbenchFailureDetail {
|
|
52
|
+
kind: WorkbenchFailureKind;
|
|
53
|
+
id: string;
|
|
54
|
+
status?: string;
|
|
55
|
+
runId?: string;
|
|
56
|
+
candidateId?: string;
|
|
57
|
+
evaluationId?: string;
|
|
58
|
+
jobId?: string;
|
|
59
|
+
caseId?: string;
|
|
60
|
+
sampleIndex?: number;
|
|
61
|
+
attemptIndex?: number;
|
|
62
|
+
error?: string;
|
|
63
|
+
}
|
|
64
|
+
export interface WorkbenchFailureDiagnosis {
|
|
65
|
+
targetId: string | null;
|
|
66
|
+
failures: WorkbenchFailureDetail[];
|
|
67
|
+
failedRunCount: number;
|
|
68
|
+
failedEvaluationCount: number;
|
|
69
|
+
failedJobCount: number;
|
|
70
|
+
}
|
|
71
|
+
export interface WorkbenchInspectionBackend {
|
|
72
|
+
projectId: string;
|
|
73
|
+
snapshot(): Promise<RuntimeSnapshot>;
|
|
74
|
+
spec(input: WorkbenchInspectionFileListInput): Promise<AuthoredWorkbenchSourceDocument>;
|
|
75
|
+
sourceFiles(input: WorkbenchInspectionFileListInput): Promise<SurfaceSnapshotFile[]>;
|
|
76
|
+
candidate(input: WorkbenchInspectionCandidateInput): Promise<CandidateRecord>;
|
|
77
|
+
candidateFiles(input: WorkbenchInspectionCandidateInput): Promise<{
|
|
78
|
+
files: SurfaceSnapshotFile[];
|
|
79
|
+
changedPaths: readonly string[];
|
|
80
|
+
}>;
|
|
81
|
+
evaluation(input: WorkbenchInspectionEvaluationInput): Promise<EvaluationScorecard>;
|
|
82
|
+
run(input: WorkbenchInspectionRunInput): Promise<WorkbenchInspectionRunDetail>;
|
|
83
|
+
jobInRun?(input: WorkbenchInspectionExecutionInput): Promise<RemoteWorkbenchJob>;
|
|
84
|
+
executionFiles(input: WorkbenchInspectionExecutionInput): Promise<SurfaceSnapshotFile[]>;
|
|
85
|
+
caseReview?(input: WorkbenchInspectionCaseReviewInput): Promise<CandidateCaseReview>;
|
|
86
|
+
executionTrace?(input: WorkbenchInspectionExecutionInput): Promise<WorkbenchExecutionTraceDetail>;
|
|
87
|
+
traceForJob?(job: RemoteWorkbenchJob, role: WorkbenchExecutionEventRole): WorkbenchExecutionTrace;
|
|
88
|
+
traceSessionsForJob?(job: RemoteWorkbenchJob, role: WorkbenchExecutionEventRole): WorkbenchTraceSession[];
|
|
89
|
+
}
|
|
90
|
+
export interface WorkbenchInspection {
|
|
91
|
+
snapshot(): Promise<RuntimeSnapshot>;
|
|
92
|
+
spec(input?: WorkbenchInspectionFileListInput): Promise<AuthoredWorkbenchSourceDocument>;
|
|
93
|
+
sourceFiles(input?: WorkbenchInspectionFileListInput): Promise<CandidateFileSummary[]>;
|
|
94
|
+
sourcePreview(input: WorkbenchInspectionFilePreviewInput): Promise<CandidateFilePreview>;
|
|
95
|
+
candidate(input: WorkbenchInspectionCandidateInput): Promise<CandidateRecord>;
|
|
96
|
+
candidateFiles(input: WorkbenchInspectionCandidateInput): Promise<CandidateFileSummary[]>;
|
|
97
|
+
candidatePreview(input: WorkbenchInspectionCandidatePreviewInput): Promise<CandidateFilePreview>;
|
|
98
|
+
evaluations(): Promise<WorkbenchEvaluationComparison>;
|
|
99
|
+
evaluation(input: WorkbenchInspectionEvaluationInput): Promise<EvaluationScorecard>;
|
|
100
|
+
caseReview(input: WorkbenchInspectionCaseReviewInput): Promise<CandidateCaseReview>;
|
|
101
|
+
run(input: WorkbenchInspectionRunInput): Promise<WorkbenchInspectionRunDetail>;
|
|
102
|
+
executionTrace(input: WorkbenchInspectionExecutionInput): Promise<WorkbenchExecutionTraceDetail>;
|
|
103
|
+
executionFiles(input: WorkbenchInspectionExecutionInput): Promise<CandidateFileSummary[]>;
|
|
104
|
+
executionPreview(input: WorkbenchInspectionExecutionPreviewInput): Promise<CandidateFilePreview>;
|
|
105
|
+
lineage(): Promise<CandidateLineageGraph>;
|
|
106
|
+
diagnose(input?: {
|
|
107
|
+
targetId?: string | null;
|
|
108
|
+
}): Promise<WorkbenchFailureDiagnosis>;
|
|
109
|
+
}
|
|
110
|
+
export declare function createWorkbenchInspection(backend: WorkbenchInspectionBackend): WorkbenchInspection;
|
|
111
|
+
//# sourceMappingURL=inspection.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"inspection.d.ts","sourceRoot":"","sources":["../src/inspection.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,KAAK,6BAA6B,EACnC,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,+BAA+B,EAC/B,mBAAmB,EACnB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,eAAe,EACf,mBAAmB,EAEnB,kBAAkB,EAElB,UAAU,EACV,eAAe,EACf,mBAAmB,EACnB,2BAA2B,EAC3B,uBAAuB,EACvB,6BAA6B,EAC7B,qBAAqB,EACtB,MAAM,kCAAkC,CAAC;AAc1C,MAAM,WAAW,+BAA+B;IAC9C,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,qBAAa,wBAAyB,SAAQ,KAAK;IACjD,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;gBAEhB,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,+BAAoC;CAM3E;AAED,MAAM,WAAW,gCAAgC;IAC/C,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B;AAED,MAAM,WAAW,+BAA+B;IAC9C,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACnC;AAED,MAAM,WAAW,mCAAoC,SAAQ,gCAAgC,EAAE,+BAA+B;CAAG;AAEjI,MAAM,WAAW,iCAAiC;IAChD,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,wCAAyC,SAAQ,iCAAiC,EAAE,+BAA+B;CAAG;AAEvI,MAAM,WAAW,kCAAkC;IACjD,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,kCAAkC;IACjD,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,2BAA2B;IAC1C,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED,MAAM,WAAW,iCAAiC;IAChD,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,wCAAyC,SAAQ,iCAAiC;IACjG,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACnC;AAED,MAAM,WAAW,4BAA4B;IAC3C,GAAG,EAAE,UAAU,CAAC;IAChB,IAAI,CAAC,EAAE,kBAAkB,EAAE,CAAC;CAC7B;AAED,MAAM,MAAM,oBAAoB,GAAG,KAAK,GAAG,YAAY,GAAG,QAAQ,GAAG,MAAM,GAAG,KAAK,CAAC;AAEpF,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,yBAAyB;IACxC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,QAAQ,EAAE,sBAAsB,EAAE,CAAC;IACnC,cAAc,EAAE,MAAM,CAAC;IACvB,qBAAqB,EAAE,MAAM,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,0BAA0B;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,IAAI,OAAO,CAAC,eAAe,CAAC,CAAC;IACrC,IAAI,CAAC,KAAK,EAAE,gCAAgC,GAAG,OAAO,CAAC,+BAA+B,CAAC,CAAC;IACxF,WAAW,CAAC,KAAK,EAAE,gCAAgC,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAAC;IACrF,SAAS,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAC9E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC;QAChE,KAAK,EAAE,mBAAmB,EAAE,CAAC;QAC7B,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;KACjC,CAAC,CAAC;IACH,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,GAAG,CAAC,KAAK,EAAE,2BAA2B,GAAG,OAAO,CAAC,4BAA4B,CAAC,CAAC;IAC/E,QAAQ,CAAC,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACjF,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAAC;IACzF,UAAU,CAAC,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACrF,cAAc,CAAC,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;IAClG,WAAW,CAAC,CACV,GAAG,EAAE,kBAAkB,EACvB,IAAI,EAAE,2BAA2B,GAChC,uBAAuB,CAAC;IAC3B,mBAAmB,CAAC,CAClB,GAAG,EAAE,kBAAkB,EACvB,IAAI,EAAE,2BAA2B,GAChC,qBAAqB,EAAE,CAAC;CAC5B;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,IAAI,OAAO,CAAC,eAAe,CAAC,CAAC;IACrC,IAAI,CAAC,KAAK,CAAC,EAAE,gCAAgC,GAAG,OAAO,CAAC,+BAA+B,CAAC,CAAC;IACzF,WAAW,CAAC,KAAK,CAAC,EAAE,gCAAgC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IACvF,aAAa,CAAC,KAAK,EAAE,mCAAmC,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACzF,SAAS,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAC9E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,gBAAgB,CAAC,KAAK,EAAE,wCAAwC,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACjG,WAAW,IAAI,OAAO,CAAC,6BAA6B,CAAC,CAAC;IACtD,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,GAAG,CAAC,KAAK,EAAE,2BAA2B,GAAG,OAAO,CAAC,4BAA4B,CAAC,CAAC;IAC/E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;IACjG,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,gBAAgB,CAAC,KAAK,EAAE,wCAAwC,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACjG,OAAO,IAAI,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAC1C,QAAQ,CAAC,KAAK,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAC;CACpF;AAED,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,0BAA0B,GAClC,mBAAmB,CAiGrB"}
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import { buildCandidateLineage, buildWorkbenchEvaluationComparison, } from "@workbench-ai/workbench-contract";
|
|
2
|
+
import { buildCandidateCaseExecutionRefs, buildWorkbenchExecutionEvidence, } from "./execution-evidence.js";
|
|
3
|
+
import { candidateRecordWithoutDerivedFields, createCandidateFilePreview, createCaseReview, summarizeCandidateFiles, } from "./index.js";
|
|
4
|
+
export class WorkbenchInspectionError extends Error {
|
|
5
|
+
status;
|
|
6
|
+
statusCode;
|
|
7
|
+
constructor(message, options = {}) {
|
|
8
|
+
super(message);
|
|
9
|
+
this.name = "WorkbenchInspectionError";
|
|
10
|
+
this.status = options.status ?? 400;
|
|
11
|
+
this.statusCode = this.status;
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
export function createWorkbenchInspection(backend) {
|
|
15
|
+
return {
|
|
16
|
+
snapshot: () => backend.snapshot(),
|
|
17
|
+
spec: (input = {}) => backend.spec(input),
|
|
18
|
+
sourceFiles: async (input = {}) => {
|
|
19
|
+
const files = await backend.sourceFiles(input);
|
|
20
|
+
return summarizeCandidateFiles(files, files.map((file) => file.path));
|
|
21
|
+
},
|
|
22
|
+
sourcePreview: async (input) => createCandidateFilePreview({
|
|
23
|
+
files: await backend.sourceFiles(input),
|
|
24
|
+
path: input.path,
|
|
25
|
+
view: input.view,
|
|
26
|
+
}),
|
|
27
|
+
candidate: async (input) => candidateRecordWithoutDerivedFields(await backend.candidate(input)),
|
|
28
|
+
candidateFiles: async (input) => {
|
|
29
|
+
const result = await backend.candidateFiles(input);
|
|
30
|
+
return summarizeCandidateFiles(result.files, result.changedPaths);
|
|
31
|
+
},
|
|
32
|
+
candidatePreview: async (input) => createCandidateFilePreview({
|
|
33
|
+
files: (await backend.candidateFiles(input)).files,
|
|
34
|
+
path: input.path,
|
|
35
|
+
view: input.view,
|
|
36
|
+
}),
|
|
37
|
+
evaluations: async () => {
|
|
38
|
+
const snapshot = await backend.snapshot();
|
|
39
|
+
return buildWorkbenchEvaluationComparison(snapshot.evaluations);
|
|
40
|
+
},
|
|
41
|
+
evaluation: (input) => backend.evaluation(input),
|
|
42
|
+
caseReview: async (input) => {
|
|
43
|
+
if (backend.caseReview) {
|
|
44
|
+
return await backend.caseReview(input);
|
|
45
|
+
}
|
|
46
|
+
const candidate = await backend.candidate({ id: input.candidateId });
|
|
47
|
+
const jobs = (await backend.run({ id: input.runId, includeJobs: true })).jobs ?? [];
|
|
48
|
+
return createCaseReview({
|
|
49
|
+
candidate,
|
|
50
|
+
caseId: input.caseId,
|
|
51
|
+
executions: buildCandidateCaseExecutionRefs({
|
|
52
|
+
jobs,
|
|
53
|
+
candidateId: input.candidateId,
|
|
54
|
+
caseId: input.caseId,
|
|
55
|
+
sampleIndex: input.sampleIndex,
|
|
56
|
+
}),
|
|
57
|
+
});
|
|
58
|
+
},
|
|
59
|
+
run: (input) => backend.run(input),
|
|
60
|
+
executionTrace: async (input) => {
|
|
61
|
+
if (backend.executionTrace) {
|
|
62
|
+
return await backend.executionTrace(input);
|
|
63
|
+
}
|
|
64
|
+
if (!backend.jobInRun || !backend.traceForJob) {
|
|
65
|
+
throw new WorkbenchInspectionError("Execution traces are not available for this Workbench inspection backend.", { status: 404 });
|
|
66
|
+
}
|
|
67
|
+
const jobs = [await backend.jobInRun(input)];
|
|
68
|
+
return {
|
|
69
|
+
projectId: backend.projectId,
|
|
70
|
+
runId: input.runId,
|
|
71
|
+
executions: buildWorkbenchExecutionEvidence({
|
|
72
|
+
jobs,
|
|
73
|
+
traceIdPrefix: `${backend.projectId}-execution`,
|
|
74
|
+
traceForJob: backend.traceForJob,
|
|
75
|
+
traceSessionsForJob: backend.traceSessionsForJob,
|
|
76
|
+
}),
|
|
77
|
+
};
|
|
78
|
+
},
|
|
79
|
+
executionFiles: async (input) => {
|
|
80
|
+
const files = await backend.executionFiles(input);
|
|
81
|
+
return summarizeCandidateFiles(files, files.map((file) => file.path));
|
|
82
|
+
},
|
|
83
|
+
executionPreview: async (input) => createCandidateFilePreview({
|
|
84
|
+
files: await backend.executionFiles(input),
|
|
85
|
+
path: input.path,
|
|
86
|
+
view: input.view,
|
|
87
|
+
}),
|
|
88
|
+
lineage: async () => {
|
|
89
|
+
const snapshot = await backend.snapshot();
|
|
90
|
+
return buildCandidateLineage({
|
|
91
|
+
summaries: snapshot.summaries,
|
|
92
|
+
activeId: snapshot.activeId,
|
|
93
|
+
});
|
|
94
|
+
},
|
|
95
|
+
diagnose: async (input = {}) => {
|
|
96
|
+
const snapshot = await backend.snapshot();
|
|
97
|
+
return await diagnoseWorkbenchFailures({
|
|
98
|
+
snapshot,
|
|
99
|
+
backend,
|
|
100
|
+
targetId: input.targetId?.trim() || null,
|
|
101
|
+
});
|
|
102
|
+
},
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
async function diagnoseWorkbenchFailures(args) {
|
|
106
|
+
const targetRun = args.targetId
|
|
107
|
+
? args.snapshot.runs.find((run) => run.id === args.targetId)
|
|
108
|
+
: null;
|
|
109
|
+
const targetEvaluation = args.targetId
|
|
110
|
+
? args.snapshot.evaluations.find((evaluation) => evaluation.id === args.targetId)
|
|
111
|
+
: null;
|
|
112
|
+
const failures = [];
|
|
113
|
+
if (args.targetId && targetRun) {
|
|
114
|
+
const detail = await args.backend.run({ id: targetRun.id, includeJobs: true });
|
|
115
|
+
failures.push(...runFailures(detail.run));
|
|
116
|
+
failures.push(...jobFailures(detail.jobs ?? []));
|
|
117
|
+
}
|
|
118
|
+
else if (args.targetId && targetEvaluation) {
|
|
119
|
+
const evaluation = await args.backend.evaluation({ id: targetEvaluation.id });
|
|
120
|
+
failures.push(...evaluationFailures(evaluation));
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
for (const run of args.snapshot.runs) {
|
|
124
|
+
failures.push(...runFailures(run));
|
|
125
|
+
}
|
|
126
|
+
for (const evaluation of args.snapshot.evaluations) {
|
|
127
|
+
failures.push(...evaluationSummaryFailures(evaluation));
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
return {
|
|
131
|
+
targetId: args.targetId,
|
|
132
|
+
failures,
|
|
133
|
+
failedRunCount: failures.filter((failure) => failure.kind === "run").length,
|
|
134
|
+
failedEvaluationCount: failures.filter((failure) => failure.kind === "evaluation").length,
|
|
135
|
+
failedJobCount: failures.filter((failure) => failure.kind === "job").length,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
function runFailures(run) {
|
|
139
|
+
if (run.status !== "finished" || (run.outcome !== "error" && run.outcome !== "cancelled")) {
|
|
140
|
+
return [];
|
|
141
|
+
}
|
|
142
|
+
return [{
|
|
143
|
+
kind: "run",
|
|
144
|
+
id: run.id,
|
|
145
|
+
runId: run.id,
|
|
146
|
+
candidateId: run.outputCandidateId ?? run.candidateId ?? undefined,
|
|
147
|
+
status: run.outcome,
|
|
148
|
+
...(run.error ? { error: run.error } : {}),
|
|
149
|
+
}];
|
|
150
|
+
}
|
|
151
|
+
function evaluationSummaryFailures(evaluation) {
|
|
152
|
+
if (evaluation.status === "completed" &&
|
|
153
|
+
evaluation.errorSampleCount === 0 &&
|
|
154
|
+
!evaluation.error) {
|
|
155
|
+
return [];
|
|
156
|
+
}
|
|
157
|
+
return [{
|
|
158
|
+
kind: "evaluation",
|
|
159
|
+
id: evaluation.id,
|
|
160
|
+
evaluationId: evaluation.id,
|
|
161
|
+
runId: evaluation.runId,
|
|
162
|
+
candidateId: evaluation.candidateId,
|
|
163
|
+
status: evaluation.status,
|
|
164
|
+
...(evaluation.error ? { error: evaluation.error } : {}),
|
|
165
|
+
}];
|
|
166
|
+
}
|
|
167
|
+
function evaluationFailures(evaluation) {
|
|
168
|
+
const failures = evaluationSummaryFailures(evaluation);
|
|
169
|
+
for (const sample of evaluation.evaluation.samples) {
|
|
170
|
+
if (!sample.error && !(sample.cases ?? []).some((entry) => entry.status && entry.status !== "completed")) {
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
failures.push({
|
|
174
|
+
kind: "sample",
|
|
175
|
+
id: `${evaluation.id}:sample:${sample.index}`,
|
|
176
|
+
evaluationId: evaluation.id,
|
|
177
|
+
runId: evaluation.runId,
|
|
178
|
+
candidateId: evaluation.candidateId,
|
|
179
|
+
sampleIndex: sample.index,
|
|
180
|
+
status: sample.status,
|
|
181
|
+
...(sample.error ? { error: sample.error } : {}),
|
|
182
|
+
});
|
|
183
|
+
for (const result of sample.cases ?? []) {
|
|
184
|
+
if (!result.status || result.status === "completed") {
|
|
185
|
+
continue;
|
|
186
|
+
}
|
|
187
|
+
failures.push({
|
|
188
|
+
kind: "case",
|
|
189
|
+
id: `${evaluation.id}:case:${result.id}:sample:${sample.index}`,
|
|
190
|
+
evaluationId: evaluation.id,
|
|
191
|
+
runId: evaluation.runId,
|
|
192
|
+
candidateId: evaluation.candidateId,
|
|
193
|
+
caseId: result.id,
|
|
194
|
+
sampleIndex: sample.index,
|
|
195
|
+
status: result.status,
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
return failures;
|
|
200
|
+
}
|
|
201
|
+
function jobFailures(jobs) {
|
|
202
|
+
return jobs
|
|
203
|
+
.filter((job) => isFailedJobStatus(job.status))
|
|
204
|
+
.map((job) => ({
|
|
205
|
+
kind: "job",
|
|
206
|
+
id: job.id,
|
|
207
|
+
jobId: job.id,
|
|
208
|
+
runId: job.runId,
|
|
209
|
+
candidateId: job.candidateId,
|
|
210
|
+
status: job.status,
|
|
211
|
+
attemptIndex: typeof job.attempt === "number" ? job.attempt : undefined,
|
|
212
|
+
...(job.error ? { error: job.error } : {}),
|
|
213
|
+
}));
|
|
214
|
+
}
|
|
215
|
+
function isFailedJobStatus(status) {
|
|
216
|
+
return status === "failed" || status === "cancelled";
|
|
217
|
+
}
|
|
@@ -1,37 +1,29 @@
|
|
|
1
1
|
import type { WorkbenchExecutionRuntimeInput } from "../execution-runtime-types.ts";
|
|
2
2
|
import type { SandboxBackendCapabilities, SandboxExecutionFileStore, SandboxPlane } from "../sandbox-plane.ts";
|
|
3
|
-
import { type
|
|
4
|
-
export { DOCKER_SANDBOX_BACKEND,
|
|
3
|
+
import { type WorkbenchSandboxBackendName } from "./names.ts";
|
|
4
|
+
export { DOCKER_SANDBOX_BACKEND, resolveWorkbenchSandboxBackendName, type WorkbenchSandboxBackendName, } from "./names.ts";
|
|
5
5
|
export { createDockerSandboxBackendDescriptor, createDockerSandboxPlane, } from "./docker.ts";
|
|
6
6
|
export interface SandboxHostHealthExpectation {
|
|
7
|
-
|
|
8
|
-
backend: string;
|
|
7
|
+
backend: WorkbenchSandboxBackendName;
|
|
9
8
|
capabilities: SandboxBackendCapabilities;
|
|
10
9
|
}
|
|
11
|
-
export interface
|
|
10
|
+
export interface SandboxBackendRequestedResources {
|
|
12
11
|
cpu: number;
|
|
13
12
|
memoryGb: number;
|
|
14
13
|
diskGb?: number;
|
|
15
14
|
timeoutMinutes?: number;
|
|
16
15
|
}
|
|
17
|
-
export interface
|
|
16
|
+
export interface SandboxBackendHostCost {
|
|
18
17
|
cpu: number;
|
|
19
18
|
memoryGb: number;
|
|
20
19
|
diskGb: number;
|
|
21
20
|
}
|
|
22
|
-
export interface
|
|
23
|
-
|
|
24
|
-
|
|
21
|
+
export interface SandboxBackendAdmission {
|
|
22
|
+
backend: WorkbenchSandboxBackendName;
|
|
23
|
+
hostCost: SandboxBackendHostCost;
|
|
25
24
|
}
|
|
26
|
-
export
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
}
|
|
31
|
-
export declare function createSandboxBackendPlaneForProvider(provider: string, args: WorkbenchExecutionRuntimeInput, startedAt: string, fileStore: SandboxExecutionFileStore): SandboxPlane;
|
|
32
|
-
export declare function sandboxHostHealthExpectationForProvider(provider: WorkbenchSandboxProviderName): SandboxHostHealthExpectation;
|
|
33
|
-
export declare function assertSandboxHostHealthForProvider(value: unknown, provider: WorkbenchSandboxProviderName): void;
|
|
34
|
-
export declare function sandboxProviderDefaultMaxConcurrentJobs(_provider: WorkbenchSandboxProviderName): number | null;
|
|
35
|
-
export declare function sandboxProviderAdmissionForResources(provider: WorkbenchSandboxProviderName, resources: SandboxProviderRequestedResources): SandboxProviderAdmission;
|
|
36
|
-
export declare function sandboxProviderLeaseScope(provider: WorkbenchSandboxProviderName): string;
|
|
25
|
+
export declare function createSandboxBackendPlaneForBackend(backend: string, args: WorkbenchExecutionRuntimeInput, startedAt: string, fileStore: SandboxExecutionFileStore): SandboxPlane;
|
|
26
|
+
export declare function sandboxHostHealthExpectationForBackend(backend: WorkbenchSandboxBackendName): SandboxHostHealthExpectation;
|
|
27
|
+
export declare function assertSandboxHostHealthForBackend(value: unknown, backend: WorkbenchSandboxBackendName): void;
|
|
28
|
+
export declare function sandboxBackendAdmissionForResources(backend: WorkbenchSandboxBackendName, resources: SandboxBackendRequestedResources): SandboxBackendAdmission;
|
|
37
29
|
//# sourceMappingURL=index.d.ts.map
|