@workbench-ai/workbench-core 0.0.67 → 0.0.69
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/coded-errors.d.ts +27 -0
- package/dist/coded-errors.d.ts.map +1 -0
- package/dist/coded-errors.js +52 -0
- package/dist/execution-events.d.ts +5 -1
- package/dist/execution-events.d.ts.map +1 -1
- package/dist/execution-events.js +13 -3
- package/dist/execution-graph.d.ts +4 -3
- package/dist/execution-graph.d.ts.map +1 -1
- package/dist/execution-graph.js +15 -14
- package/dist/execution-jobs.d.ts +5 -20
- package/dist/execution-jobs.d.ts.map +1 -1
- package/dist/execution-jobs.js +7 -91
- package/dist/execution-outputs.d.ts +2 -2
- package/dist/execution-outputs.d.ts.map +1 -1
- package/dist/execution-outputs.js +10 -10
- package/dist/execution-runtime-types.d.ts +1 -1
- package/dist/execution-runtime-types.d.ts.map +1 -1
- package/dist/execution-scheduler.d.ts +5 -3
- package/dist/execution-scheduler.d.ts.map +1 -1
- package/dist/execution-scheduler.js +33 -9
- package/dist/execution-traces.js +1 -1
- package/dist/generic-spec.d.ts +7 -61
- package/dist/generic-spec.d.ts.map +1 -1
- package/dist/generic-spec.js +0 -679
- package/dist/index.d.ts +377 -220
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7887 -3881
- package/dist/remote-model.d.ts +17 -0
- package/dist/remote-model.d.ts.map +1 -0
- package/dist/remote-model.js +86 -0
- package/dist/runtime-dockerfile.d.ts +1 -1
- package/dist/runtime-dockerfile.d.ts.map +1 -1
- package/dist/runtime-dockerfile.js +4 -4
- package/dist/sandbox-backends/docker.d.ts.map +1 -1
- package/dist/sandbox-backends/docker.js +34 -16
- package/dist/sandbox-inputs.js +3 -3
- package/dist/sandbox-plane.d.ts.map +1 -1
- package/dist/sandbox-plane.js +13 -9
- package/dist/skill-patch.d.ts +8 -0
- package/dist/skill-patch.d.ts.map +1 -0
- package/dist/{candidate-patch.js → skill-patch.js} +5 -5
- package/package.json +3 -3
- package/worker/sandbox-adapter-runner.cjs +2 -2
- package/dist/candidate-patch.d.ts +0 -8
- package/dist/candidate-patch.d.ts.map +0 -1
- package/dist/execution-evidence.d.ts +0 -22
- package/dist/execution-evidence.d.ts.map +0 -1
- package/dist/execution-evidence.js +0 -302
- package/dist/inspection.d.ts +0 -117
- package/dist/inspection.d.ts.map +0 -1
- package/dist/inspection.js +0 -224
|
@@ -37,14 +37,17 @@ export async function runWorkbenchExecutionDag(args) {
|
|
|
37
37
|
throw new Error(`Job ${job.id} has unsupported initial DAG status ${job.status}.`);
|
|
38
38
|
}
|
|
39
39
|
pending.set(job.id, job);
|
|
40
|
-
args.onJobQueued
|
|
40
|
+
await runJobHook(args.onJobQueued, job);
|
|
41
41
|
}
|
|
42
42
|
while (pending.size > 0 || running.size > 0) {
|
|
43
|
-
const progressed = startReadyJobs();
|
|
43
|
+
const progressed = await startReadyJobs();
|
|
44
44
|
if (running.size === 0) {
|
|
45
45
|
if (pending.size === 0) {
|
|
46
46
|
break;
|
|
47
47
|
}
|
|
48
|
+
if (await cancelTerminalBlockedPendingJobs()) {
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
48
51
|
const ready = readyPendingJobs();
|
|
49
52
|
if (ready.length > 0) {
|
|
50
53
|
const blocked = ready[0];
|
|
@@ -69,7 +72,7 @@ export async function runWorkbenchExecutionDag(args) {
|
|
|
69
72
|
startedJobCount,
|
|
70
73
|
cancelledJobCount,
|
|
71
74
|
};
|
|
72
|
-
function startReadyJobs() {
|
|
75
|
+
async function startReadyJobs() {
|
|
73
76
|
let progressed = false;
|
|
74
77
|
for (const job of [...pending.values()]) {
|
|
75
78
|
const dependencyStatus = dependencyTerminalStatus(job);
|
|
@@ -77,7 +80,7 @@ export async function runWorkbenchExecutionDag(args) {
|
|
|
77
80
|
continue;
|
|
78
81
|
}
|
|
79
82
|
if (dependencyStatus !== "ready") {
|
|
80
|
-
cancelPendingJob(job, dependencyStatus);
|
|
83
|
+
await cancelPendingJob(job, dependencyStatus);
|
|
81
84
|
progressed = true;
|
|
82
85
|
continue;
|
|
83
86
|
}
|
|
@@ -97,7 +100,7 @@ export async function runWorkbenchExecutionDag(args) {
|
|
|
97
100
|
};
|
|
98
101
|
startedJobCount += 1;
|
|
99
102
|
maxConcurrency = Math.max(maxConcurrency, running.size + 1);
|
|
100
|
-
args.onJobStarted
|
|
103
|
+
await runJobHook(args.onJobStarted, runningJob);
|
|
101
104
|
const promise = finishJob(runningJob, cost);
|
|
102
105
|
running.set(job.id, { cost, promise });
|
|
103
106
|
progressed = true;
|
|
@@ -107,6 +110,18 @@ export async function runWorkbenchExecutionDag(args) {
|
|
|
107
110
|
function readyPendingJobs() {
|
|
108
111
|
return [...pending.values()].filter((job) => dependencyTerminalStatus(job) === "ready");
|
|
109
112
|
}
|
|
113
|
+
async function cancelTerminalBlockedPendingJobs() {
|
|
114
|
+
let cancelled = false;
|
|
115
|
+
for (const job of [...pending.values()]) {
|
|
116
|
+
const dependencyStatus = dependencyTerminalStatus(job);
|
|
117
|
+
if (dependencyStatus !== "failed" && dependencyStatus !== "cancelled") {
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
await cancelPendingJob(job, dependencyStatus);
|
|
121
|
+
cancelled = true;
|
|
122
|
+
}
|
|
123
|
+
return cancelled;
|
|
124
|
+
}
|
|
110
125
|
function dependencyTerminalStatus(job) {
|
|
111
126
|
const jobDependencies = dependencies.get(job.id) ?? [];
|
|
112
127
|
let blocked = false;
|
|
@@ -128,7 +143,7 @@ export async function runWorkbenchExecutionDag(args) {
|
|
|
128
143
|
}
|
|
129
144
|
return blocked ? "blocked" : "ready";
|
|
130
145
|
}
|
|
131
|
-
function cancelPendingJob(job, dependencyStatus) {
|
|
146
|
+
async function cancelPendingJob(job, dependencyStatus) {
|
|
132
147
|
pending.delete(job.id);
|
|
133
148
|
const finishedAt = now();
|
|
134
149
|
const cancelled = {
|
|
@@ -141,7 +156,7 @@ export async function runWorkbenchExecutionDag(args) {
|
|
|
141
156
|
cancelledJobCount += 1;
|
|
142
157
|
terminal.set(job.id, cancelled);
|
|
143
158
|
results.set(job.id, cancelled);
|
|
144
|
-
args.onJobFinished
|
|
159
|
+
await runJobHook(args.onJobFinished, cancelled);
|
|
145
160
|
}
|
|
146
161
|
async function finishJob(runningJob, cost) {
|
|
147
162
|
let completed;
|
|
@@ -164,9 +179,15 @@ export async function runWorkbenchExecutionDag(args) {
|
|
|
164
179
|
}
|
|
165
180
|
terminal.set(runningJob.id, completed);
|
|
166
181
|
results.set(runningJob.id, completed);
|
|
167
|
-
args.onJobFinished
|
|
182
|
+
await runJobHook(args.onJobFinished, completed);
|
|
168
183
|
}
|
|
169
184
|
}
|
|
185
|
+
async function runJobHook(hook, job) {
|
|
186
|
+
if (!hook) {
|
|
187
|
+
return;
|
|
188
|
+
}
|
|
189
|
+
await hook(job);
|
|
190
|
+
}
|
|
170
191
|
export function workbenchJobDependencies(job) {
|
|
171
192
|
const input = jsonRecord(job.input);
|
|
172
193
|
const dependsOn = input.dependsOn;
|
|
@@ -175,7 +196,10 @@ export function workbenchJobDependencies(job) {
|
|
|
175
196
|
: [];
|
|
176
197
|
}
|
|
177
198
|
export function workbenchJobResources(job) {
|
|
178
|
-
const
|
|
199
|
+
const input = jsonRecord(job.input);
|
|
200
|
+
const resources = input.kind === "workbench.skill.eval.job.v1"
|
|
201
|
+
? input.resources
|
|
202
|
+
: jsonRecord(jsonRecord(input.execution).policy).resources;
|
|
179
203
|
const record = jsonRecord(resources);
|
|
180
204
|
return {
|
|
181
205
|
cpu: readPositiveResource(record.cpu, job.id, "resources.cpu"),
|
package/dist/execution-traces.js
CHANGED
|
@@ -140,7 +140,7 @@ export function readWorkbenchExecutionTraceFiles(files) {
|
|
|
140
140
|
export function traceSessionLabel(filePath, role) {
|
|
141
141
|
const innerPath = traceSessionInnerPath(filePath);
|
|
142
142
|
if (innerPath === "runner/session") {
|
|
143
|
-
return "
|
|
143
|
+
return "Skill run";
|
|
144
144
|
}
|
|
145
145
|
if (innerPath === "improver/session") {
|
|
146
146
|
return "Improver";
|
package/dist/generic-spec.d.ts
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
|
-
import
|
|
1
|
+
import type { SurfaceSnapshotFile, WorkbenchAdapterInvocation, WorkbenchExecutionNetworkPolicy, WorkbenchExecutionResources } from "@workbench-ai/workbench-contract";
|
|
2
2
|
import type { WorkbenchEngineCase, WorkbenchEngineCaseSpec } from "@workbench-ai/workbench-protocol";
|
|
3
|
-
export declare const BENCHMARK_SPEC_FILE = "benchmark.yaml";
|
|
4
|
-
export declare const CANDIDATE_SPEC_FILE = "candidate.yaml";
|
|
5
3
|
export interface WorkbenchRuntimeSpec {
|
|
6
4
|
dockerfile: string;
|
|
7
5
|
workdir?: string;
|
|
@@ -16,17 +14,10 @@ export interface WorkbenchRuntimeSpec {
|
|
|
16
14
|
export interface WorkbenchPathRef {
|
|
17
15
|
path: string;
|
|
18
16
|
}
|
|
19
|
-
export interface
|
|
17
|
+
export interface WorkbenchSkillPrepareSpec {
|
|
20
18
|
command: string;
|
|
21
19
|
}
|
|
22
|
-
export interface
|
|
23
|
-
version: 4;
|
|
24
|
-
name: string;
|
|
25
|
-
description: string;
|
|
26
|
-
adapters: string[];
|
|
27
|
-
engine: WorkbenchAdapterInvocation;
|
|
28
|
-
}
|
|
29
|
-
export interface WorkbenchCandidateRunSpec extends WorkbenchAdapterInvocation {
|
|
20
|
+
export interface WorkbenchSkillAgentSpec extends WorkbenchAdapterInvocation {
|
|
30
21
|
name: string;
|
|
31
22
|
}
|
|
32
23
|
export interface WorkbenchCaseSelector {
|
|
@@ -37,48 +28,21 @@ export interface WorkbenchSelectionSpec {
|
|
|
37
28
|
metric: string;
|
|
38
29
|
cases?: WorkbenchCaseSelector;
|
|
39
30
|
}
|
|
40
|
-
export interface WorkbenchCandidateImproveSpec extends WorkbenchAdapterInvocation {
|
|
41
|
-
edits: string[];
|
|
42
|
-
optimizeOn?: WorkbenchCaseSelector;
|
|
43
|
-
selectBy?: WorkbenchSelectionSpec;
|
|
44
|
-
}
|
|
45
|
-
export interface WorkbenchCandidateManifestSpec {
|
|
46
|
-
version: 4;
|
|
47
|
-
name: string;
|
|
48
|
-
description?: string;
|
|
49
|
-
files: WorkbenchPathRef;
|
|
50
|
-
prepare?: WorkbenchCandidatePrepareSpec;
|
|
51
|
-
adapters: string[];
|
|
52
|
-
defaultRun?: string;
|
|
53
|
-
runs: Record<string, WorkbenchCandidateRunSpec>;
|
|
54
|
-
improve?: WorkbenchCandidateImproveSpec;
|
|
55
|
-
}
|
|
56
|
-
export interface ResolvedCandidateSpec extends WorkbenchCandidateManifestSpec {
|
|
57
|
-
selectedRunId: string;
|
|
58
|
-
}
|
|
59
|
-
export interface WorkbenchResolvedSource {
|
|
60
|
-
version: 4;
|
|
61
|
-
benchmark: AuthoredBenchmarkSpec;
|
|
62
|
-
candidate: ResolvedCandidateSpec;
|
|
63
|
-
}
|
|
64
31
|
export interface GenericRunSpec {
|
|
65
32
|
version: 4;
|
|
66
33
|
name: string;
|
|
67
34
|
description: string;
|
|
68
|
-
|
|
35
|
+
eval: {
|
|
69
36
|
name: string;
|
|
70
37
|
description: string;
|
|
71
38
|
engine: WorkbenchAdapterInvocation;
|
|
72
39
|
};
|
|
73
|
-
|
|
40
|
+
skill: {
|
|
74
41
|
name: string;
|
|
75
42
|
description?: string;
|
|
76
43
|
files: WorkbenchPathRef;
|
|
77
|
-
prepare?:
|
|
78
|
-
|
|
79
|
-
selectedRunId: string;
|
|
80
|
-
selectedRunName: string;
|
|
81
|
-
runs: Record<string, WorkbenchCandidateRunSpec>;
|
|
44
|
+
prepare?: WorkbenchSkillPrepareSpec;
|
|
45
|
+
agents: Record<string, WorkbenchSkillAgentSpec>;
|
|
82
46
|
improve?: {
|
|
83
47
|
edits: string[];
|
|
84
48
|
optimizeOn?: WorkbenchCaseSelector;
|
|
@@ -101,33 +65,15 @@ export interface ResolvedEngineCaseExecutionConfig {
|
|
|
101
65
|
run: WorkbenchAdapterInvocation;
|
|
102
66
|
}
|
|
103
67
|
export declare const DEFAULT_EXECUTION_RESOURCES: WorkbenchExecutionResources;
|
|
104
|
-
export declare function validateWorkbenchResolvedSourceYaml(source: string): WorkbenchSpecValidation;
|
|
105
|
-
export declare function resolveWorkbenchResolvedSourceYaml(source: string): GenericRunSpec;
|
|
106
|
-
export declare function engineResolveBindingForSourceYaml(source: string): EngineResolveBinding;
|
|
107
|
-
export declare function engineResolveBindingForSpec(spec: GenericRunSpec): EngineResolveBinding;
|
|
108
|
-
export declare function resolveWorkbenchSourceFiles(args: {
|
|
109
|
-
benchmarkSource: string;
|
|
110
|
-
candidateSource: string;
|
|
111
|
-
runId?: string | null;
|
|
112
|
-
}): GenericRunSpec;
|
|
113
|
-
export declare function parseWorkbenchSourceFiles(args: {
|
|
114
|
-
benchmarkSource: string;
|
|
115
|
-
candidateSource?: string;
|
|
116
|
-
runId?: string | null;
|
|
117
|
-
}): WorkbenchResolvedSource;
|
|
118
|
-
export declare function serializeWorkbenchResolvedSourceYaml(source: WorkbenchResolvedSource): string;
|
|
119
|
-
export declare function isWorkbenchCandidateManifestPath(filePath: string): boolean;
|
|
120
68
|
export declare function resolveEngineCaseExecutionConfig(args: {
|
|
121
69
|
spec: GenericRunSpec;
|
|
122
70
|
engineCase: GenericEngineCaseSpec;
|
|
123
71
|
}): ResolvedEngineCaseExecutionConfig;
|
|
124
|
-
export declare function engineResolveInvocationForSpec(spec: GenericRunSpec): WorkbenchAdapterInvocation;
|
|
125
72
|
export declare function engineCaseFilesForRuntimeInput(args: {
|
|
126
73
|
spec: GenericRunSpec;
|
|
127
74
|
engineCase: WorkbenchEngineCase;
|
|
128
75
|
}): SurfaceSnapshotFile[];
|
|
129
76
|
export declare function engineCasePublicFiles(engineCase: WorkbenchEngineCase): SurfaceSnapshotFile[];
|
|
130
|
-
export declare function engineCasePrivateFiles(engineCase: WorkbenchEngineCase): SurfaceSnapshotFile[];
|
|
131
77
|
export declare function runtimeResources(runtime: WorkbenchRuntimeSpec): WorkbenchExecutionResources;
|
|
132
78
|
export declare function runtimeNetwork(runtime: WorkbenchRuntimeSpec): WorkbenchExecutionNetworkPolicy;
|
|
133
79
|
export declare function runtimeSandboxRef(runtime: WorkbenchRuntimeSpec): string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAEV,mBAAmB,EACnB,0BAA0B,EAC1B,+BAA+B,EAC/B,2BAA2B,EAC5B,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,mBAAmB,EACnB,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAE1C,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE;QACV,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;IACF,OAAO,CAAC,EAAE,+BAA+B,CAAC;CAC3C;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,yBAAyB;IACxC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,uBAAwB,SAAQ,0BAA0B;IACzE,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,qBAAqB;IACpC,GAAG,CAAC,EAAE,IAAI,CAAC;IACX,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,sBAAsB;IACrC,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,qBAAqB,CAAC;CAC/B;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,MAAM,EAAE,0BAA0B,CAAC;KACpC,CAAC;IACF,KAAK,EAAE;QACL,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,gBAAgB,CAAC;QACxB,OAAO,CAAC,EAAE,yBAAyB,CAAC;QACpC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,uBAAuB,CAAC,CAAC;QAChD,OAAO,CAAC,EAAE;YACR,KAAK,EAAE,MAAM,EAAE,CAAC;YAChB,UAAU,CAAC,EAAE,qBAAqB,CAAC;YACnC,QAAQ,CAAC,EAAE,sBAAsB,CAAC;SACnC,CAAC;KACH,CAAC;IACF,WAAW,EAAE,oBAAoB,CAAC;IAClC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;IACnC,aAAa,EAAE,0BAA0B,CAAC;IAC1C,OAAO,CAAC,EAAE,0BAA0B,CAAC;IACrC,GAAG,EAAE,0BAA0B,CAAC;IAChC,SAAS,EAAE,0BAA0B,CAAC;CACvC;AAED,MAAM,MAAM,qBAAqB,GAAG,uBAAuB,CAAC;AAC5D,YAAY,EAAE,mBAAmB,EAAE,MAAM,kCAAkC,CAAC;AAE5E,MAAM,WAAW,iCAAiC;IAChD,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,oBAAoB,CAAC;IAClC,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAED,eAAO,MAAM,2BAA2B,EAAE,2BAKzC,CAAC;AAEF,wBAAgB,gCAAgC,CAAC,IAAI,EAAE;IACrD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,qBAAqB,CAAC;CACnC,GAAG,iCAAiC,CAMpC;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE;IACnD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,mBAAmB,CAAC;CACjC,GAAG,mBAAmB,EAAE,CAGxB;AAED,wBAAgB,qBAAqB,CACnC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,oBAAoB,GAC5B,2BAA2B,CAiB7B;AAED,wBAAgB,cAAc,CAC5B,OAAO,EAAE,oBAAoB,GAC5B,+BAA+B,CAEjC;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM,CAEvE"}
|