@workbench-ai/workbench-core 0.0.64 → 0.0.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/execution-events.d.ts +2 -2
- package/dist/execution-events.d.ts.map +1 -1
- package/dist/execution-events.js +6 -6
- package/dist/execution-evidence.d.ts +11 -11
- package/dist/execution-jobs.d.ts +5 -5
- package/dist/execution-runtime-types.d.ts +3 -3
- package/dist/execution-scheduler.d.ts +12 -12
- package/dist/execution-scheduler.d.ts.map +1 -1
- package/dist/execution-scheduler.js +5 -5
- package/dist/execution-traces.d.ts +3 -3
- package/dist/index.d.ts +24 -26
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +28 -54
- package/dist/inspection.d.ts +111 -0
- package/dist/inspection.d.ts.map +1 -0
- package/dist/inspection.js +217 -0
- package/dist/sandbox-backends/index.d.ts +12 -20
- package/dist/sandbox-backends/index.d.ts.map +1 -1
- package/dist/sandbox-backends/index.js +15 -26
- package/dist/sandbox-backends/names.d.ts +2 -3
- package/dist/sandbox-backends/names.d.ts.map +1 -1
- package/dist/sandbox-backends/names.js +5 -5
- package/dist/sandbox-inputs.d.ts +6 -6
- package/package.json +3 -3
package/dist/index.js
CHANGED
|
@@ -8,7 +8,7 @@ import { BENCHMARK_SPEC_FILE, DEFAULT_EXECUTION_RESOURCES, engineCasePrivateFile
|
|
|
8
8
|
import { attachSandboxMetadataToJob, createWorkbenchSandboxFileStore, isSurfaceSnapshotFile, readWorkbenchExecutionSpec, } from "./sandbox-inputs.js";
|
|
9
9
|
import { asRuntimeRecord, importNodeModule, isJsonPayload, jsonRecord, nodeBuiltin, quoteShellArg, resolveWorkbenchWorkerId, } from "./runtime-utils.js";
|
|
10
10
|
import { createWorkbenchExecutionCapability, createWorkbenchSandboxAllocation, collectExecutionCapabilityScopeIssues, collectSandboxAllocationScopeIssues, collectSandboxHandleScopeIssues, assertSandboxBackendSupportsNetworkPolicy, executeValidatedSandboxExecution, } from "./sandbox-plane.js";
|
|
11
|
-
import {
|
|
11
|
+
import { createSandboxBackendPlaneForBackend, } from "./sandbox-backends/index.js";
|
|
12
12
|
import { applyWorkbenchCandidatePatch } from "./candidate-patch.js";
|
|
13
13
|
import { assignUsageRole, completeUsageSummary, mergeUsageSummaries, normalizeUsageSummary, usageStats, } from "./execution-usage.js";
|
|
14
14
|
import { traceFilePaths, workbenchTraceExecutionDirectory, } from "./trace-files.js";
|
|
@@ -35,8 +35,10 @@ export { addCapacity, capacityFits, runWorkbenchExecutionDag, subtractCapacity,
|
|
|
35
35
|
export { assertWorkbenchExecutionIsolation, collectWorkbenchExecutionIsolationIssues, validateWorkbenchExecutionOutputPayloads, } from "./execution-outputs.js";
|
|
36
36
|
export { collectSandboxAllocationScopeIssues, collectExecutionCapabilityScopeIssues, collectSandboxHandleScopeIssues, createWorkbenchSandboxAllocation, createWorkbenchSandboxExecutionMetadata, createWorkbenchExecutionCapability, executeValidatedSandboxExecution, } from "./sandbox-plane.js";
|
|
37
37
|
export { buildCandidateCaseExecutionRefs, buildWorkbenchExecutionEvidence, isWorkbenchExecutionActive, readWorkbenchExecutionId, readWorkbenchExecutionMetadataNumber, readWorkbenchExecutionMetadataString, readWorkbenchExecutionPurpose, resolveWorkbenchJobGroupStatus, } from "./execution-evidence.js";
|
|
38
|
+
export { buildCandidateLineage, buildWorkbenchEvaluationComparison, buildWorkbenchEvaluationMetricDescriptors, formatEvaluationConfigurationLabel, isCompleteEvaluationSummary, readEvaluationScore, } from "@workbench-ai/workbench-contract";
|
|
39
|
+
export { WorkbenchInspectionError, createWorkbenchInspection, } from "./inspection.js";
|
|
38
40
|
export { buildWorkbenchTraceSessionsFromFiles, combineWorkbenchTraceSessions, finalizeWorkbenchExecutionTraceForJob, mergeWorkbenchExecutionTracesByJob, readWorkbenchExecutionTraceFiles, traceSessionLabel, } from "./execution-traces.js";
|
|
39
|
-
export { DOCKER_SANDBOX_BACKEND,
|
|
41
|
+
export { DOCKER_SANDBOX_BACKEND, assertSandboxHostHealthForBackend, createDockerSandboxBackendDescriptor, createDockerSandboxPlane, resolveWorkbenchSandboxBackendName, sandboxBackendAdmissionForResources, sandboxHostHealthExpectationForBackend, } from "./sandbox-backends/index.js";
|
|
40
42
|
export function sanitizeWorkbenchRuntimeJobForExchange(job) {
|
|
41
43
|
const { leaseUntil: _leaseUntil, wakeupLeaseUntil: _wakeupLeaseUntil, hostId: _hostId, workerId: _workerId, claimTokenHash: _claimTokenHash, trace: _trace, traceSessions: _traceSessions, ...portable } = job;
|
|
42
44
|
return { ...portable };
|
|
@@ -1382,22 +1384,6 @@ export function filterCandidateSourceFiles(files) {
|
|
|
1382
1384
|
.filter((file) => isCandidateSourceFilePath(file.path))
|
|
1383
1385
|
.map((file) => ({ ...file }));
|
|
1384
1386
|
}
|
|
1385
|
-
export function buildCandidateLineage(args) {
|
|
1386
|
-
const orderedSummaries = args.summaries.slice().sort((left, right) => {
|
|
1387
|
-
const createdAt = left.createdAt.localeCompare(right.createdAt);
|
|
1388
|
-
return createdAt !== 0 ? createdAt : left.id.localeCompare(right.id);
|
|
1389
|
-
});
|
|
1390
|
-
const summaryIds = new Set(orderedSummaries.map((summary) => summary.id));
|
|
1391
|
-
return {
|
|
1392
|
-
activeId: args.activeId,
|
|
1393
|
-
nodes: orderedSummaries.map((summary) => ({
|
|
1394
|
-
id: summary.id,
|
|
1395
|
-
active: args.activeId === summary.id,
|
|
1396
|
-
summary,
|
|
1397
|
-
})),
|
|
1398
|
-
edges: orderedSummaries.flatMap((summary) => buildLineageEdges(summary, summaryIds)),
|
|
1399
|
-
};
|
|
1400
|
-
}
|
|
1401
1387
|
export function normalizeSurfaceFiles(files) {
|
|
1402
1388
|
const byPath = new Map();
|
|
1403
1389
|
for (const file of files) {
|
|
@@ -1638,18 +1624,6 @@ function summarizeCaseInputs(files) {
|
|
|
1638
1624
|
};
|
|
1639
1625
|
});
|
|
1640
1626
|
}
|
|
1641
|
-
function buildLineageEdges(summary, summaryIds) {
|
|
1642
|
-
const edges = [];
|
|
1643
|
-
if (summary.baseId && summary.baseId !== summary.id && summaryIds.has(summary.baseId)) {
|
|
1644
|
-
edges.push({
|
|
1645
|
-
id: `anchor:${summary.baseId}:${summary.id}`,
|
|
1646
|
-
kind: "anchor",
|
|
1647
|
-
sourceId: summary.baseId,
|
|
1648
|
-
targetId: summary.id,
|
|
1649
|
-
});
|
|
1650
|
-
}
|
|
1651
|
-
return edges;
|
|
1652
|
-
}
|
|
1653
1627
|
export function createWorkbenchRunWorkload(args) {
|
|
1654
1628
|
const purpose = workbenchExecutionPurpose(args.job);
|
|
1655
1629
|
if (!purpose) {
|
|
@@ -1753,8 +1727,8 @@ async function executeWorkbenchExecutionJobWithResolvedAuth(runtimeArgs, options
|
|
|
1753
1727
|
}, executionForRuntime, startedAt, createWorkbenchExecutionCapability(executionForRuntime, { now: startedAt })));
|
|
1754
1728
|
}
|
|
1755
1729
|
const fileStore = createWorkbenchSandboxFileStore(runtimeArgs);
|
|
1756
|
-
const planeFactory = options.
|
|
1757
|
-
const plane = planeFactory(options.
|
|
1730
|
+
const planeFactory = options.createSandboxPlaneForBackend ?? createSandboxBackendPlaneForBackend;
|
|
1731
|
+
const plane = planeFactory(options.sandboxBackend, runtimeArgs, startedAt, fileStore);
|
|
1758
1732
|
const validated = await executeValidatedSandboxExecution(plane, executionForRuntime, {
|
|
1759
1733
|
now: startedAt,
|
|
1760
1734
|
runnerId: resolveWorkbenchWorkerId([
|
|
@@ -2310,7 +2284,7 @@ function completedJobFromSandboxResult(fallbackJob, startedAt, result) {
|
|
|
2310
2284
|
return attachSandboxMetadataToJob(failWorkbenchRunJob(fallbackJob, result.startedAt || startedAt, result.error ?? `Sandbox execution ${result.status}.`, result.finishedAt), asRuntimeRecord(result.metadata).sandbox);
|
|
2311
2285
|
}
|
|
2312
2286
|
async function executeCandidateRevisionExecutionInCurrentRuntime(args, execution, startedAt, capability, eventPublisher) {
|
|
2313
|
-
const { workload, result } = await
|
|
2287
|
+
const { workload, result } = await runRemoteProtocolExecutionResult(args, execution, startedAt, capability, eventPublisher);
|
|
2314
2288
|
if (result.error || (result.exitCode ?? 0) !== 0) {
|
|
2315
2289
|
return failWorkbenchRunJob(args.job, startedAt, result.error ?? `Adapter ${execution.adapter.use} exited with status ${result.exitCode}.`, result.finishedAt, result);
|
|
2316
2290
|
}
|
|
@@ -2359,7 +2333,7 @@ async function executeAttemptExecutionInCurrentRuntime(args, execution, startedA
|
|
|
2359
2333
|
engineCases: args.engineCases,
|
|
2360
2334
|
traceFiles: args.traceFiles,
|
|
2361
2335
|
});
|
|
2362
|
-
const workloadResult = await
|
|
2336
|
+
const workloadResult = await runRemoteCommandExecutionSteps(args, workload, attemptStepsForExecution(execution, args.spec, args.adapterManifests), startedAt, {
|
|
2363
2337
|
capability,
|
|
2364
2338
|
eventPublisher,
|
|
2365
2339
|
});
|
|
@@ -2439,7 +2413,7 @@ export async function executeRuntimeControlOperationSequenceInCurrentRuntime(arg
|
|
|
2439
2413
|
const adapterAuth = await materializeSandboxAdapterAuth(runtimeArgs, childExecution);
|
|
2440
2414
|
let result;
|
|
2441
2415
|
try {
|
|
2442
|
-
result = await
|
|
2416
|
+
result = await runRemoteCommandExecutionSteps({
|
|
2443
2417
|
...runtimeArgs,
|
|
2444
2418
|
...(adapterAuth.root ? { adapterAuthRoot: adapterAuth.root } : {}),
|
|
2445
2419
|
}, workload, args.runtimeControlOperation.operations.map((operation, index) => runtimeControlStepForOperation(operation, index, args.adapterManifests)), startedAt, {
|
|
@@ -2474,8 +2448,8 @@ async function executeRuntimeControlOperationSequenceInSandbox(args, options, st
|
|
|
2474
2448
|
const childArgs = createRuntimeControlSandboxInput(args, request);
|
|
2475
2449
|
const execution = readWorkbenchExecutionSpec(childArgs.job);
|
|
2476
2450
|
const fileStore = createWorkbenchSandboxFileStore(childArgs);
|
|
2477
|
-
const planeFactory = options.
|
|
2478
|
-
const plane = planeFactory(options.
|
|
2451
|
+
const planeFactory = options.createSandboxPlaneForBackend ?? createSandboxBackendPlaneForBackend;
|
|
2452
|
+
const plane = planeFactory(options.sandboxBackend, childArgs, startedAt, fileStore);
|
|
2479
2453
|
assertSandboxBackendSupportsNetworkPolicy(plane.backend, execution);
|
|
2480
2454
|
const sandboxOptions = {
|
|
2481
2455
|
now: startedAt,
|
|
@@ -2695,7 +2669,7 @@ function assertRuntimeControlScope(label, issues) {
|
|
|
2695
2669
|
throw new Error(`${label} failed validation:\n${issues.join("\n")}`);
|
|
2696
2670
|
}
|
|
2697
2671
|
}
|
|
2698
|
-
async function
|
|
2672
|
+
async function runRemoteProtocolExecutionResult(args, execution, startedAt, capability, eventPublisher) {
|
|
2699
2673
|
const workload = createWorkbenchRunWorkload({
|
|
2700
2674
|
job: args.job,
|
|
2701
2675
|
spec: args.spec,
|
|
@@ -2704,13 +2678,13 @@ async function runHostedProtocolExecutionResult(args, execution, startedAt, capa
|
|
|
2704
2678
|
engineCases: args.engineCases,
|
|
2705
2679
|
traceFiles: args.traceFiles,
|
|
2706
2680
|
});
|
|
2707
|
-
const result = await
|
|
2681
|
+
const result = await runRemoteCommandExecutionSteps(args, workload, [protocolStepForExecution(execution, args.adapterManifests)], startedAt, {
|
|
2708
2682
|
capability,
|
|
2709
2683
|
eventPublisher,
|
|
2710
2684
|
});
|
|
2711
2685
|
return { workload, result };
|
|
2712
2686
|
}
|
|
2713
|
-
async function
|
|
2687
|
+
async function runRemoteCommandExecutionSteps(args, workload, steps, startedAt, options = {}) {
|
|
2714
2688
|
const [{ execFile }, fs, os, path, { promisify }] = await Promise.all([
|
|
2715
2689
|
importNodeModule(nodeBuiltin("child_process")),
|
|
2716
2690
|
importNodeModule(nodeBuiltin("fs/promises")),
|
|
@@ -2780,7 +2754,7 @@ async function runHostedCommandExecutionSteps(args, workload, steps, startedAt,
|
|
|
2780
2754
|
await stageWorkbenchEnginePrivateFiles(workspace.root, workload);
|
|
2781
2755
|
enginePrivateStaged = true;
|
|
2782
2756
|
}
|
|
2783
|
-
await
|
|
2757
|
+
await resetRemoteWorkloadStepOutput(workspace.root);
|
|
2784
2758
|
const stepAdapterId = step.adapter?.use ?? execution.adapter.use;
|
|
2785
2759
|
const adapterRequestPath = await writeWorkbenchAdapterRequest(workspace.root, workload, execution, step, adapterAuthRequestForStep(args, stepAdapterId), args.adapterManifests);
|
|
2786
2760
|
const stepRole = stepEventRole(step);
|
|
@@ -2796,10 +2770,10 @@ async function runHostedCommandExecutionSteps(args, workload, steps, startedAt,
|
|
|
2796
2770
|
const adapterRoot = step.executor === "host"
|
|
2797
2771
|
? hostAdapterRoots.get(stepAdapterId)
|
|
2798
2772
|
: undefined;
|
|
2799
|
-
const command =
|
|
2773
|
+
const command = createRemoteWorkloadShellCommand(workspace.root, step.command, step.label, step.okExitCodes);
|
|
2800
2774
|
await execFileAsync("sh", ["-c", command], {
|
|
2801
2775
|
cwd: adapterRoot ?? workspace.root,
|
|
2802
|
-
env:
|
|
2776
|
+
env: createRemoteWorkloadAdapterEnv(workspace.root, adapterRequestPath, adapterAuthEnvForStep(args, stepAdapterId), adapterRoot ? { adapterRoot } : undefined, args.adapterRuntimeEnv),
|
|
2803
2777
|
maxBuffer: 10 * 1024 * 1024,
|
|
2804
2778
|
timeout: stepTimeoutMs,
|
|
2805
2779
|
});
|
|
@@ -2839,7 +2813,7 @@ async function runHostedCommandExecutionSteps(args, workload, steps, startedAt,
|
|
|
2839
2813
|
.catch(() => undefined);
|
|
2840
2814
|
}
|
|
2841
2815
|
if (exitCode !== 0) {
|
|
2842
|
-
return await
|
|
2816
|
+
return await readRemoteRunFailureResult(workspace.root, workload, {
|
|
2843
2817
|
exitCode,
|
|
2844
2818
|
error: runtimeError ?? `Runtime command exited with status ${exitCode}.`,
|
|
2845
2819
|
startedAt,
|
|
@@ -2871,10 +2845,10 @@ async function runCandidatePrepareCommand(args) {
|
|
|
2871
2845
|
role,
|
|
2872
2846
|
});
|
|
2873
2847
|
try {
|
|
2874
|
-
const shellCommand =
|
|
2848
|
+
const shellCommand = createRemoteWorkloadShellCommand(args.root, command, "candidate_prepare");
|
|
2875
2849
|
await args.execFileAsync("sh", ["-c", shellCommand], {
|
|
2876
2850
|
cwd: args.root,
|
|
2877
|
-
env:
|
|
2851
|
+
env: createRemoteWorkloadPrepareEnv(args.root),
|
|
2878
2852
|
maxBuffer: 10 * 1024 * 1024,
|
|
2879
2853
|
timeout: args.timeoutMs,
|
|
2880
2854
|
});
|
|
@@ -3154,7 +3128,7 @@ function adapterFilePathWithinRoot(filePath, sourceRoot) {
|
|
|
3154
3128
|
}
|
|
3155
3129
|
return normalized.slice(sourceRoot.length + 1);
|
|
3156
3130
|
}
|
|
3157
|
-
async function
|
|
3131
|
+
async function readRemoteRunFailureResult(root, workload, options) {
|
|
3158
3132
|
const traceFiles = await readRuntimeTraceFiles(root, workload);
|
|
3159
3133
|
const outputFiles = filterRuntimeOutputFiles(await readSurfaceFiles(outputDir(root), { ignorePath: isWorkbenchInternalOutputPath }));
|
|
3160
3134
|
const startedAt = options.startedAt ?? new Date().toISOString();
|
|
@@ -3241,7 +3215,7 @@ async function readRuntimeTraceFiles(root, workload) {
|
|
|
3241
3215
|
function filterRuntimeOutputFiles(files) {
|
|
3242
3216
|
return files.filter((file) => !isWorkbenchInternalOutputPath(file.path));
|
|
3243
3217
|
}
|
|
3244
|
-
function
|
|
3218
|
+
function createRemoteWorkloadShellCommand(root, command, prefix = "", okExitCodes = [0]) {
|
|
3245
3219
|
const outputPrefix = prefix ? `${prefix}_` : "";
|
|
3246
3220
|
const okExpression = [...new Set(okExitCodes)]
|
|
3247
3221
|
.sort((left, right) => left - right)
|
|
@@ -3261,7 +3235,7 @@ function createHostedWorkloadShellCommand(root, command, prefix = "", okExitCode
|
|
|
3261
3235
|
'exit "$status"',
|
|
3262
3236
|
].join("; ");
|
|
3263
3237
|
}
|
|
3264
|
-
async function
|
|
3238
|
+
async function resetRemoteWorkloadStepOutput(root) {
|
|
3265
3239
|
const fs = await importNodeModule(nodeBuiltin("fs/promises"));
|
|
3266
3240
|
await fs
|
|
3267
3241
|
.rm(workbenchAdapterOperationResultPath(outputDir(root)), { force: true })
|
|
@@ -3359,8 +3333,8 @@ function requireImproveEdits(spec) {
|
|
|
3359
3333
|
}
|
|
3360
3334
|
return edits;
|
|
3361
3335
|
}
|
|
3362
|
-
function
|
|
3363
|
-
const env =
|
|
3336
|
+
function createRemoteWorkloadAdapterEnv(root, adapterRequestPath, adapterEnv = {}, options = {}, runtimeEnv = {}) {
|
|
3337
|
+
const env = createRemoteWorkloadBaseEnv();
|
|
3364
3338
|
env.WORKBENCH_ADAPTER_REQUEST = adapterRequestPath;
|
|
3365
3339
|
env.WORKBENCH_OUTPUT = outputDir(root);
|
|
3366
3340
|
env.WORKBENCH_RESULT = workbenchAdapterOperationResultPath(outputDir(root));
|
|
@@ -3376,12 +3350,12 @@ function createHostedWorkloadAdapterEnv(root, adapterRequestPath, adapterEnv = {
|
|
|
3376
3350
|
Object.assign(env, runtimeEnv);
|
|
3377
3351
|
return env;
|
|
3378
3352
|
}
|
|
3379
|
-
function
|
|
3380
|
-
const env =
|
|
3353
|
+
function createRemoteWorkloadPrepareEnv(root) {
|
|
3354
|
+
const env = createRemoteWorkloadBaseEnv();
|
|
3381
3355
|
env.WORKBENCH_OUTPUT = outputDir(root);
|
|
3382
3356
|
return env;
|
|
3383
3357
|
}
|
|
3384
|
-
function
|
|
3358
|
+
function createRemoteWorkloadBaseEnv() {
|
|
3385
3359
|
const env = {};
|
|
3386
3360
|
for (const [key, value] of Object.entries(process.env)) {
|
|
3387
3361
|
if (typeof value === "string") {
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import { type WorkbenchEvaluationComparison } from "@workbench-ai/workbench-contract";
|
|
2
|
+
import type { AuthoredWorkbenchSourceDocument, CandidateCaseReview, CandidateFilePreview, CandidateFileSummary, CandidateLineageGraph, CandidateRecord, EvaluationScorecard, RemoteWorkbenchJob, RunSummary, RuntimeSnapshot, SurfaceSnapshotFile, WorkbenchExecutionEventRole, WorkbenchExecutionTrace, WorkbenchExecutionTraceDetail, WorkbenchTraceSession } from "@workbench-ai/workbench-contract";
|
|
3
|
+
export interface WorkbenchInspectionErrorOptions {
|
|
4
|
+
status?: number;
|
|
5
|
+
}
|
|
6
|
+
export declare class WorkbenchInspectionError extends Error {
|
|
7
|
+
readonly status: number;
|
|
8
|
+
readonly statusCode: number;
|
|
9
|
+
constructor(message: string, options?: WorkbenchInspectionErrorOptions);
|
|
10
|
+
}
|
|
11
|
+
export interface WorkbenchInspectionFileListInput {
|
|
12
|
+
fingerprint?: string | null;
|
|
13
|
+
}
|
|
14
|
+
export interface WorkbenchInspectionPreviewInput {
|
|
15
|
+
path: string;
|
|
16
|
+
view: "diff" | "raw" | "rendered";
|
|
17
|
+
}
|
|
18
|
+
export interface WorkbenchInspectionFilePreviewInput extends WorkbenchInspectionFileListInput, WorkbenchInspectionPreviewInput {
|
|
19
|
+
}
|
|
20
|
+
export interface WorkbenchInspectionCandidateInput {
|
|
21
|
+
id: string;
|
|
22
|
+
}
|
|
23
|
+
export interface WorkbenchInspectionCandidatePreviewInput extends WorkbenchInspectionCandidateInput, WorkbenchInspectionPreviewInput {
|
|
24
|
+
}
|
|
25
|
+
export interface WorkbenchInspectionEvaluationInput {
|
|
26
|
+
id: string;
|
|
27
|
+
}
|
|
28
|
+
export interface WorkbenchInspectionCaseReviewInput {
|
|
29
|
+
candidateId: string;
|
|
30
|
+
caseId: string;
|
|
31
|
+
runId: string;
|
|
32
|
+
sampleIndex?: number;
|
|
33
|
+
}
|
|
34
|
+
export interface WorkbenchInspectionRunInput {
|
|
35
|
+
id: string;
|
|
36
|
+
includeJobs?: boolean;
|
|
37
|
+
}
|
|
38
|
+
export interface WorkbenchInspectionExecutionInput {
|
|
39
|
+
runId: string;
|
|
40
|
+
jobId: string;
|
|
41
|
+
}
|
|
42
|
+
export interface WorkbenchInspectionExecutionPreviewInput extends WorkbenchInspectionExecutionInput {
|
|
43
|
+
path: string;
|
|
44
|
+
view: "diff" | "raw" | "rendered";
|
|
45
|
+
}
|
|
46
|
+
export interface WorkbenchInspectionRunDetail {
|
|
47
|
+
run: RunSummary;
|
|
48
|
+
jobs?: RemoteWorkbenchJob[];
|
|
49
|
+
}
|
|
50
|
+
export type WorkbenchFailureKind = "run" | "evaluation" | "sample" | "case" | "job";
|
|
51
|
+
export interface WorkbenchFailureDetail {
|
|
52
|
+
kind: WorkbenchFailureKind;
|
|
53
|
+
id: string;
|
|
54
|
+
status?: string;
|
|
55
|
+
runId?: string;
|
|
56
|
+
candidateId?: string;
|
|
57
|
+
evaluationId?: string;
|
|
58
|
+
jobId?: string;
|
|
59
|
+
caseId?: string;
|
|
60
|
+
sampleIndex?: number;
|
|
61
|
+
attemptIndex?: number;
|
|
62
|
+
error?: string;
|
|
63
|
+
}
|
|
64
|
+
export interface WorkbenchFailureDiagnosis {
|
|
65
|
+
targetId: string | null;
|
|
66
|
+
failures: WorkbenchFailureDetail[];
|
|
67
|
+
failedRunCount: number;
|
|
68
|
+
failedEvaluationCount: number;
|
|
69
|
+
failedJobCount: number;
|
|
70
|
+
}
|
|
71
|
+
export interface WorkbenchInspectionBackend {
|
|
72
|
+
projectId: string;
|
|
73
|
+
snapshot(): Promise<RuntimeSnapshot>;
|
|
74
|
+
spec(input: WorkbenchInspectionFileListInput): Promise<AuthoredWorkbenchSourceDocument>;
|
|
75
|
+
sourceFiles(input: WorkbenchInspectionFileListInput): Promise<SurfaceSnapshotFile[]>;
|
|
76
|
+
candidate(input: WorkbenchInspectionCandidateInput): Promise<CandidateRecord>;
|
|
77
|
+
candidateFiles(input: WorkbenchInspectionCandidateInput): Promise<{
|
|
78
|
+
files: SurfaceSnapshotFile[];
|
|
79
|
+
changedPaths: readonly string[];
|
|
80
|
+
}>;
|
|
81
|
+
evaluation(input: WorkbenchInspectionEvaluationInput): Promise<EvaluationScorecard>;
|
|
82
|
+
run(input: WorkbenchInspectionRunInput): Promise<WorkbenchInspectionRunDetail>;
|
|
83
|
+
jobInRun?(input: WorkbenchInspectionExecutionInput): Promise<RemoteWorkbenchJob>;
|
|
84
|
+
executionFiles(input: WorkbenchInspectionExecutionInput): Promise<SurfaceSnapshotFile[]>;
|
|
85
|
+
caseReview?(input: WorkbenchInspectionCaseReviewInput): Promise<CandidateCaseReview>;
|
|
86
|
+
executionTrace?(input: WorkbenchInspectionExecutionInput): Promise<WorkbenchExecutionTraceDetail>;
|
|
87
|
+
traceForJob?(job: RemoteWorkbenchJob, role: WorkbenchExecutionEventRole): WorkbenchExecutionTrace;
|
|
88
|
+
traceSessionsForJob?(job: RemoteWorkbenchJob, role: WorkbenchExecutionEventRole): WorkbenchTraceSession[];
|
|
89
|
+
}
|
|
90
|
+
export interface WorkbenchInspection {
|
|
91
|
+
snapshot(): Promise<RuntimeSnapshot>;
|
|
92
|
+
spec(input?: WorkbenchInspectionFileListInput): Promise<AuthoredWorkbenchSourceDocument>;
|
|
93
|
+
sourceFiles(input?: WorkbenchInspectionFileListInput): Promise<CandidateFileSummary[]>;
|
|
94
|
+
sourcePreview(input: WorkbenchInspectionFilePreviewInput): Promise<CandidateFilePreview>;
|
|
95
|
+
candidate(input: WorkbenchInspectionCandidateInput): Promise<CandidateRecord>;
|
|
96
|
+
candidateFiles(input: WorkbenchInspectionCandidateInput): Promise<CandidateFileSummary[]>;
|
|
97
|
+
candidatePreview(input: WorkbenchInspectionCandidatePreviewInput): Promise<CandidateFilePreview>;
|
|
98
|
+
evaluations(): Promise<WorkbenchEvaluationComparison>;
|
|
99
|
+
evaluation(input: WorkbenchInspectionEvaluationInput): Promise<EvaluationScorecard>;
|
|
100
|
+
caseReview(input: WorkbenchInspectionCaseReviewInput): Promise<CandidateCaseReview>;
|
|
101
|
+
run(input: WorkbenchInspectionRunInput): Promise<WorkbenchInspectionRunDetail>;
|
|
102
|
+
executionTrace(input: WorkbenchInspectionExecutionInput): Promise<WorkbenchExecutionTraceDetail>;
|
|
103
|
+
executionFiles(input: WorkbenchInspectionExecutionInput): Promise<CandidateFileSummary[]>;
|
|
104
|
+
executionPreview(input: WorkbenchInspectionExecutionPreviewInput): Promise<CandidateFilePreview>;
|
|
105
|
+
lineage(): Promise<CandidateLineageGraph>;
|
|
106
|
+
diagnose(input?: {
|
|
107
|
+
targetId?: string | null;
|
|
108
|
+
}): Promise<WorkbenchFailureDiagnosis>;
|
|
109
|
+
}
|
|
110
|
+
export declare function createWorkbenchInspection(backend: WorkbenchInspectionBackend): WorkbenchInspection;
|
|
111
|
+
//# sourceMappingURL=inspection.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"inspection.d.ts","sourceRoot":"","sources":["../src/inspection.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,KAAK,6BAA6B,EACnC,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,+BAA+B,EAC/B,mBAAmB,EACnB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,eAAe,EACf,mBAAmB,EAEnB,kBAAkB,EAElB,UAAU,EACV,eAAe,EACf,mBAAmB,EACnB,2BAA2B,EAC3B,uBAAuB,EACvB,6BAA6B,EAC7B,qBAAqB,EACtB,MAAM,kCAAkC,CAAC;AAc1C,MAAM,WAAW,+BAA+B;IAC9C,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,qBAAa,wBAAyB,SAAQ,KAAK;IACjD,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;gBAEhB,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,+BAAoC;CAM3E;AAED,MAAM,WAAW,gCAAgC;IAC/C,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B;AAED,MAAM,WAAW,+BAA+B;IAC9C,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACnC;AAED,MAAM,WAAW,mCAAoC,SAAQ,gCAAgC,EAAE,+BAA+B;CAAG;AAEjI,MAAM,WAAW,iCAAiC;IAChD,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,wCAAyC,SAAQ,iCAAiC,EAAE,+BAA+B;CAAG;AAEvI,MAAM,WAAW,kCAAkC;IACjD,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,kCAAkC;IACjD,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,2BAA2B;IAC1C,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED,MAAM,WAAW,iCAAiC;IAChD,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,wCAAyC,SAAQ,iCAAiC;IACjG,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACnC;AAED,MAAM,WAAW,4BAA4B;IAC3C,GAAG,EAAE,UAAU,CAAC;IAChB,IAAI,CAAC,EAAE,kBAAkB,EAAE,CAAC;CAC7B;AAED,MAAM,MAAM,oBAAoB,GAAG,KAAK,GAAG,YAAY,GAAG,QAAQ,GAAG,MAAM,GAAG,KAAK,CAAC;AAEpF,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,yBAAyB;IACxC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,QAAQ,EAAE,sBAAsB,EAAE,CAAC;IACnC,cAAc,EAAE,MAAM,CAAC;IACvB,qBAAqB,EAAE,MAAM,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,0BAA0B;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,IAAI,OAAO,CAAC,eAAe,CAAC,CAAC;IACrC,IAAI,CAAC,KAAK,EAAE,gCAAgC,GAAG,OAAO,CAAC,+BAA+B,CAAC,CAAC;IACxF,WAAW,CAAC,KAAK,EAAE,gCAAgC,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAAC;IACrF,SAAS,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAC9E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC;QAChE,KAAK,EAAE,mBAAmB,EAAE,CAAC;QAC7B,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;KACjC,CAAC,CAAC;IACH,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,GAAG,CAAC,KAAK,EAAE,2BAA2B,GAAG,OAAO,CAAC,4BAA4B,CAAC,CAAC;IAC/E,QAAQ,CAAC,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACjF,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAAC;IACzF,UAAU,CAAC,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACrF,cAAc,CAAC,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;IAClG,WAAW,CAAC,CACV,GAAG,EAAE,kBAAkB,EACvB,IAAI,EAAE,2BAA2B,GAChC,uBAAuB,CAAC;IAC3B,mBAAmB,CAAC,CAClB,GAAG,EAAE,kBAAkB,EACvB,IAAI,EAAE,2BAA2B,GAChC,qBAAqB,EAAE,CAAC;CAC5B;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,IAAI,OAAO,CAAC,eAAe,CAAC,CAAC;IACrC,IAAI,CAAC,KAAK,CAAC,EAAE,gCAAgC,GAAG,OAAO,CAAC,+BAA+B,CAAC,CAAC;IACzF,WAAW,CAAC,KAAK,CAAC,EAAE,gCAAgC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IACvF,aAAa,CAAC,KAAK,EAAE,mCAAmC,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACzF,SAAS,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAC9E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,gBAAgB,CAAC,KAAK,EAAE,wCAAwC,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACjG,WAAW,IAAI,OAAO,CAAC,6BAA6B,CAAC,CAAC;IACtD,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,GAAG,CAAC,KAAK,EAAE,2BAA2B,GAAG,OAAO,CAAC,4BAA4B,CAAC,CAAC;IAC/E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;IACjG,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,gBAAgB,CAAC,KAAK,EAAE,wCAAwC,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACjG,OAAO,IAAI,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAC1C,QAAQ,CAAC,KAAK,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAC;CACpF;AAED,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,0BAA0B,GAClC,mBAAmB,CAiGrB"}
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import { buildCandidateLineage, buildWorkbenchEvaluationComparison, } from "@workbench-ai/workbench-contract";
|
|
2
|
+
import { buildCandidateCaseExecutionRefs, buildWorkbenchExecutionEvidence, } from "./execution-evidence.js";
|
|
3
|
+
import { candidateRecordWithoutDerivedFields, createCandidateFilePreview, createCaseReview, summarizeCandidateFiles, } from "./index.js";
|
|
4
|
+
export class WorkbenchInspectionError extends Error {
|
|
5
|
+
status;
|
|
6
|
+
statusCode;
|
|
7
|
+
constructor(message, options = {}) {
|
|
8
|
+
super(message);
|
|
9
|
+
this.name = "WorkbenchInspectionError";
|
|
10
|
+
this.status = options.status ?? 400;
|
|
11
|
+
this.statusCode = this.status;
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
export function createWorkbenchInspection(backend) {
|
|
15
|
+
return {
|
|
16
|
+
snapshot: () => backend.snapshot(),
|
|
17
|
+
spec: (input = {}) => backend.spec(input),
|
|
18
|
+
sourceFiles: async (input = {}) => {
|
|
19
|
+
const files = await backend.sourceFiles(input);
|
|
20
|
+
return summarizeCandidateFiles(files, files.map((file) => file.path));
|
|
21
|
+
},
|
|
22
|
+
sourcePreview: async (input) => createCandidateFilePreview({
|
|
23
|
+
files: await backend.sourceFiles(input),
|
|
24
|
+
path: input.path,
|
|
25
|
+
view: input.view,
|
|
26
|
+
}),
|
|
27
|
+
candidate: async (input) => candidateRecordWithoutDerivedFields(await backend.candidate(input)),
|
|
28
|
+
candidateFiles: async (input) => {
|
|
29
|
+
const result = await backend.candidateFiles(input);
|
|
30
|
+
return summarizeCandidateFiles(result.files, result.changedPaths);
|
|
31
|
+
},
|
|
32
|
+
candidatePreview: async (input) => createCandidateFilePreview({
|
|
33
|
+
files: (await backend.candidateFiles(input)).files,
|
|
34
|
+
path: input.path,
|
|
35
|
+
view: input.view,
|
|
36
|
+
}),
|
|
37
|
+
evaluations: async () => {
|
|
38
|
+
const snapshot = await backend.snapshot();
|
|
39
|
+
return buildWorkbenchEvaluationComparison(snapshot.evaluations);
|
|
40
|
+
},
|
|
41
|
+
evaluation: (input) => backend.evaluation(input),
|
|
42
|
+
caseReview: async (input) => {
|
|
43
|
+
if (backend.caseReview) {
|
|
44
|
+
return await backend.caseReview(input);
|
|
45
|
+
}
|
|
46
|
+
const candidate = await backend.candidate({ id: input.candidateId });
|
|
47
|
+
const jobs = (await backend.run({ id: input.runId, includeJobs: true })).jobs ?? [];
|
|
48
|
+
return createCaseReview({
|
|
49
|
+
candidate,
|
|
50
|
+
caseId: input.caseId,
|
|
51
|
+
executions: buildCandidateCaseExecutionRefs({
|
|
52
|
+
jobs,
|
|
53
|
+
candidateId: input.candidateId,
|
|
54
|
+
caseId: input.caseId,
|
|
55
|
+
sampleIndex: input.sampleIndex,
|
|
56
|
+
}),
|
|
57
|
+
});
|
|
58
|
+
},
|
|
59
|
+
run: (input) => backend.run(input),
|
|
60
|
+
executionTrace: async (input) => {
|
|
61
|
+
if (backend.executionTrace) {
|
|
62
|
+
return await backend.executionTrace(input);
|
|
63
|
+
}
|
|
64
|
+
if (!backend.jobInRun || !backend.traceForJob) {
|
|
65
|
+
throw new WorkbenchInspectionError("Execution traces are not available for this Workbench inspection backend.", { status: 404 });
|
|
66
|
+
}
|
|
67
|
+
const jobs = [await backend.jobInRun(input)];
|
|
68
|
+
return {
|
|
69
|
+
projectId: backend.projectId,
|
|
70
|
+
runId: input.runId,
|
|
71
|
+
executions: buildWorkbenchExecutionEvidence({
|
|
72
|
+
jobs,
|
|
73
|
+
traceIdPrefix: `${backend.projectId}-execution`,
|
|
74
|
+
traceForJob: backend.traceForJob,
|
|
75
|
+
traceSessionsForJob: backend.traceSessionsForJob,
|
|
76
|
+
}),
|
|
77
|
+
};
|
|
78
|
+
},
|
|
79
|
+
executionFiles: async (input) => {
|
|
80
|
+
const files = await backend.executionFiles(input);
|
|
81
|
+
return summarizeCandidateFiles(files, files.map((file) => file.path));
|
|
82
|
+
},
|
|
83
|
+
executionPreview: async (input) => createCandidateFilePreview({
|
|
84
|
+
files: await backend.executionFiles(input),
|
|
85
|
+
path: input.path,
|
|
86
|
+
view: input.view,
|
|
87
|
+
}),
|
|
88
|
+
lineage: async () => {
|
|
89
|
+
const snapshot = await backend.snapshot();
|
|
90
|
+
return buildCandidateLineage({
|
|
91
|
+
summaries: snapshot.summaries,
|
|
92
|
+
activeId: snapshot.activeId,
|
|
93
|
+
});
|
|
94
|
+
},
|
|
95
|
+
diagnose: async (input = {}) => {
|
|
96
|
+
const snapshot = await backend.snapshot();
|
|
97
|
+
return await diagnoseWorkbenchFailures({
|
|
98
|
+
snapshot,
|
|
99
|
+
backend,
|
|
100
|
+
targetId: input.targetId?.trim() || null,
|
|
101
|
+
});
|
|
102
|
+
},
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
async function diagnoseWorkbenchFailures(args) {
|
|
106
|
+
const targetRun = args.targetId
|
|
107
|
+
? args.snapshot.runs.find((run) => run.id === args.targetId)
|
|
108
|
+
: null;
|
|
109
|
+
const targetEvaluation = args.targetId
|
|
110
|
+
? args.snapshot.evaluations.find((evaluation) => evaluation.id === args.targetId)
|
|
111
|
+
: null;
|
|
112
|
+
const failures = [];
|
|
113
|
+
if (args.targetId && targetRun) {
|
|
114
|
+
const detail = await args.backend.run({ id: targetRun.id, includeJobs: true });
|
|
115
|
+
failures.push(...runFailures(detail.run));
|
|
116
|
+
failures.push(...jobFailures(detail.jobs ?? []));
|
|
117
|
+
}
|
|
118
|
+
else if (args.targetId && targetEvaluation) {
|
|
119
|
+
const evaluation = await args.backend.evaluation({ id: targetEvaluation.id });
|
|
120
|
+
failures.push(...evaluationFailures(evaluation));
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
for (const run of args.snapshot.runs) {
|
|
124
|
+
failures.push(...runFailures(run));
|
|
125
|
+
}
|
|
126
|
+
for (const evaluation of args.snapshot.evaluations) {
|
|
127
|
+
failures.push(...evaluationSummaryFailures(evaluation));
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
return {
|
|
131
|
+
targetId: args.targetId,
|
|
132
|
+
failures,
|
|
133
|
+
failedRunCount: failures.filter((failure) => failure.kind === "run").length,
|
|
134
|
+
failedEvaluationCount: failures.filter((failure) => failure.kind === "evaluation").length,
|
|
135
|
+
failedJobCount: failures.filter((failure) => failure.kind === "job").length,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
function runFailures(run) {
|
|
139
|
+
if (run.status !== "finished" || (run.outcome !== "error" && run.outcome !== "cancelled")) {
|
|
140
|
+
return [];
|
|
141
|
+
}
|
|
142
|
+
return [{
|
|
143
|
+
kind: "run",
|
|
144
|
+
id: run.id,
|
|
145
|
+
runId: run.id,
|
|
146
|
+
candidateId: run.outputCandidateId ?? run.candidateId ?? undefined,
|
|
147
|
+
status: run.outcome,
|
|
148
|
+
...(run.error ? { error: run.error } : {}),
|
|
149
|
+
}];
|
|
150
|
+
}
|
|
151
|
+
function evaluationSummaryFailures(evaluation) {
|
|
152
|
+
if (evaluation.status === "completed" &&
|
|
153
|
+
evaluation.errorSampleCount === 0 &&
|
|
154
|
+
!evaluation.error) {
|
|
155
|
+
return [];
|
|
156
|
+
}
|
|
157
|
+
return [{
|
|
158
|
+
kind: "evaluation",
|
|
159
|
+
id: evaluation.id,
|
|
160
|
+
evaluationId: evaluation.id,
|
|
161
|
+
runId: evaluation.runId,
|
|
162
|
+
candidateId: evaluation.candidateId,
|
|
163
|
+
status: evaluation.status,
|
|
164
|
+
...(evaluation.error ? { error: evaluation.error } : {}),
|
|
165
|
+
}];
|
|
166
|
+
}
|
|
167
|
+
function evaluationFailures(evaluation) {
|
|
168
|
+
const failures = evaluationSummaryFailures(evaluation);
|
|
169
|
+
for (const sample of evaluation.evaluation.samples) {
|
|
170
|
+
if (!sample.error && !(sample.cases ?? []).some((entry) => entry.status && entry.status !== "completed")) {
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
failures.push({
|
|
174
|
+
kind: "sample",
|
|
175
|
+
id: `${evaluation.id}:sample:${sample.index}`,
|
|
176
|
+
evaluationId: evaluation.id,
|
|
177
|
+
runId: evaluation.runId,
|
|
178
|
+
candidateId: evaluation.candidateId,
|
|
179
|
+
sampleIndex: sample.index,
|
|
180
|
+
status: sample.status,
|
|
181
|
+
...(sample.error ? { error: sample.error } : {}),
|
|
182
|
+
});
|
|
183
|
+
for (const result of sample.cases ?? []) {
|
|
184
|
+
if (!result.status || result.status === "completed") {
|
|
185
|
+
continue;
|
|
186
|
+
}
|
|
187
|
+
failures.push({
|
|
188
|
+
kind: "case",
|
|
189
|
+
id: `${evaluation.id}:case:${result.id}:sample:${sample.index}`,
|
|
190
|
+
evaluationId: evaluation.id,
|
|
191
|
+
runId: evaluation.runId,
|
|
192
|
+
candidateId: evaluation.candidateId,
|
|
193
|
+
caseId: result.id,
|
|
194
|
+
sampleIndex: sample.index,
|
|
195
|
+
status: result.status,
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
return failures;
|
|
200
|
+
}
|
|
201
|
+
function jobFailures(jobs) {
|
|
202
|
+
return jobs
|
|
203
|
+
.filter((job) => isFailedJobStatus(job.status))
|
|
204
|
+
.map((job) => ({
|
|
205
|
+
kind: "job",
|
|
206
|
+
id: job.id,
|
|
207
|
+
jobId: job.id,
|
|
208
|
+
runId: job.runId,
|
|
209
|
+
candidateId: job.candidateId,
|
|
210
|
+
status: job.status,
|
|
211
|
+
attemptIndex: typeof job.attempt === "number" ? job.attempt : undefined,
|
|
212
|
+
...(job.error ? { error: job.error } : {}),
|
|
213
|
+
}));
|
|
214
|
+
}
|
|
215
|
+
function isFailedJobStatus(status) {
|
|
216
|
+
return status === "failed" || status === "cancelled";
|
|
217
|
+
}
|
|
@@ -1,37 +1,29 @@
|
|
|
1
1
|
import type { WorkbenchExecutionRuntimeInput } from "../execution-runtime-types.ts";
|
|
2
2
|
import type { SandboxBackendCapabilities, SandboxExecutionFileStore, SandboxPlane } from "../sandbox-plane.ts";
|
|
3
|
-
import { type
|
|
4
|
-
export { DOCKER_SANDBOX_BACKEND,
|
|
3
|
+
import { type WorkbenchSandboxBackendName } from "./names.ts";
|
|
4
|
+
export { DOCKER_SANDBOX_BACKEND, resolveWorkbenchSandboxBackendName, type WorkbenchSandboxBackendName, } from "./names.ts";
|
|
5
5
|
export { createDockerSandboxBackendDescriptor, createDockerSandboxPlane, } from "./docker.ts";
|
|
6
6
|
export interface SandboxHostHealthExpectation {
|
|
7
|
-
|
|
8
|
-
backend: string;
|
|
7
|
+
backend: WorkbenchSandboxBackendName;
|
|
9
8
|
capabilities: SandboxBackendCapabilities;
|
|
10
9
|
}
|
|
11
|
-
export interface
|
|
10
|
+
export interface SandboxBackendRequestedResources {
|
|
12
11
|
cpu: number;
|
|
13
12
|
memoryGb: number;
|
|
14
13
|
diskGb?: number;
|
|
15
14
|
timeoutMinutes?: number;
|
|
16
15
|
}
|
|
17
|
-
export interface
|
|
16
|
+
export interface SandboxBackendHostCost {
|
|
18
17
|
cpu: number;
|
|
19
18
|
memoryGb: number;
|
|
20
19
|
diskGb: number;
|
|
21
20
|
}
|
|
22
|
-
export interface
|
|
23
|
-
|
|
24
|
-
|
|
21
|
+
export interface SandboxBackendAdmission {
|
|
22
|
+
backend: WorkbenchSandboxBackendName;
|
|
23
|
+
hostCost: SandboxBackendHostCost;
|
|
25
24
|
}
|
|
26
|
-
export
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
}
|
|
31
|
-
export declare function createSandboxBackendPlaneForProvider(provider: string, args: WorkbenchExecutionRuntimeInput, startedAt: string, fileStore: SandboxExecutionFileStore): SandboxPlane;
|
|
32
|
-
export declare function sandboxHostHealthExpectationForProvider(provider: WorkbenchSandboxProviderName): SandboxHostHealthExpectation;
|
|
33
|
-
export declare function assertSandboxHostHealthForProvider(value: unknown, provider: WorkbenchSandboxProviderName): void;
|
|
34
|
-
export declare function sandboxProviderDefaultMaxConcurrentJobs(_provider: WorkbenchSandboxProviderName): number | null;
|
|
35
|
-
export declare function sandboxProviderAdmissionForResources(provider: WorkbenchSandboxProviderName, resources: SandboxProviderRequestedResources): SandboxProviderAdmission;
|
|
36
|
-
export declare function sandboxProviderLeaseScope(provider: WorkbenchSandboxProviderName): string;
|
|
25
|
+
export declare function createSandboxBackendPlaneForBackend(backend: string, args: WorkbenchExecutionRuntimeInput, startedAt: string, fileStore: SandboxExecutionFileStore): SandboxPlane;
|
|
26
|
+
export declare function sandboxHostHealthExpectationForBackend(backend: WorkbenchSandboxBackendName): SandboxHostHealthExpectation;
|
|
27
|
+
export declare function assertSandboxHostHealthForBackend(value: unknown, backend: WorkbenchSandboxBackendName): void;
|
|
28
|
+
export declare function sandboxBackendAdmissionForResources(backend: WorkbenchSandboxBackendName, resources: SandboxBackendRequestedResources): SandboxBackendAdmission;
|
|
37
29
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sandbox-backends/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,8BAA8B,EAC/B,MAAM,+BAA+B,CAAC;AACvC,OAAO,KAAK,EACV,0BAA0B,EAE1B,yBAAyB,EACzB,YAAY,EACb,MAAM,qBAAqB,CAAC;AAK7B,OAAO,EAEL,KAAK,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sandbox-backends/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,8BAA8B,EAC/B,MAAM,+BAA+B,CAAC;AACvC,OAAO,KAAK,EACV,0BAA0B,EAE1B,yBAAyB,EACzB,YAAY,EACb,MAAM,qBAAqB,CAAC;AAK7B,OAAO,EAEL,KAAK,2BAA2B,EAEjC,MAAM,YAAY,CAAC;AAEpB,OAAO,EACL,sBAAsB,EACtB,kCAAkC,EAClC,KAAK,2BAA2B,GACjC,MAAM,YAAY,CAAC;AACpB,OAAO,EACL,oCAAoC,EACpC,wBAAwB,GACzB,MAAM,aAAa,CAAC;AAErB,MAAM,WAAW,4BAA4B;IAC3C,OAAO,EAAE,2BAA2B,CAAC;IACrC,YAAY,EAAE,0BAA0B,CAAC;CAC1C;AAED,MAAM,WAAW,gCAAgC;IAC/C,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,sBAAsB;IACrC,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,2BAA2B,CAAC;IACrC,QAAQ,EAAE,sBAAsB,CAAC;CAClC;AAED,wBAAgB,mCAAmC,CACjD,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,8BAA8B,EACpC,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,yBAAyB,GACnC,YAAY,CAMd;AAED,wBAAgB,sCAAsC,CACpD,OAAO,EAAE,2BAA2B,GACnC,4BAA4B,CAQ9B;AAED,wBAAgB,iCAAiC,CAC/C,KAAK,EAAE,OAAO,EACd,OAAO,EAAE,2BAA2B,GACnC,IAAI,CAYN;AAED,wBAAgB,mCAAmC,CACjD,OAAO,EAAE,2BAA2B,EACpC,SAAS,EAAE,gCAAgC,GAC1C,uBAAuB,CAiBzB"}
|