@workbench-ai/workbench-core 0.0.46 → 0.0.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/execution-events.d.ts +2 -2
- package/dist/execution-events.d.ts.map +1 -1
- package/dist/execution-events.js +3 -3
- package/dist/{execution-phases.d.ts → execution-evidence.d.ts} +8 -7
- package/dist/execution-evidence.d.ts.map +1 -0
- package/dist/{execution-phases.js → execution-evidence.js} +91 -51
- package/dist/execution-graph.js +1 -2
- package/dist/execution-jobs.js +1 -1
- package/dist/execution-outputs.d.ts.map +1 -1
- package/dist/execution-outputs.js +5 -10
- package/dist/execution-runtime-types.d.ts +7 -3
- package/dist/execution-runtime-types.d.ts.map +1 -1
- package/dist/execution-traces.d.ts +11 -1
- package/dist/execution-traces.d.ts.map +1 -1
- package/dist/execution-traces.js +305 -2
- package/dist/generic-spec.d.ts +8 -3
- package/dist/generic-spec.d.ts.map +1 -1
- package/dist/generic-spec.js +26 -37
- package/dist/index.d.ts +22 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +868 -214
- package/dist/runtime-dockerfile.d.ts +14 -0
- package/dist/runtime-dockerfile.d.ts.map +1 -0
- package/dist/runtime-dockerfile.js +65 -0
- package/dist/sandbox-backends/docker.d.ts.map +1 -1
- package/dist/sandbox-backends/docker.js +9 -12
- package/dist/sandbox-backends/index.d.ts.map +1 -1
- package/dist/sandbox-backends/index.js +2 -1
- package/dist/sandbox-inputs.d.ts.map +1 -1
- package/dist/sandbox-inputs.js +1 -0
- package/dist/sandbox-plane.d.ts +1 -0
- package/dist/sandbox-plane.d.ts.map +1 -1
- package/dist/sandbox-plane.js +12 -22
- package/dist/trace-files.d.ts +2 -2
- package/dist/trace-files.d.ts.map +1 -1
- package/dist/trace-files.js +4 -4
- package/package.json +3 -3
- package/worker/sandbox-adapter-runner.cjs +22 -13
- package/dist/execution-phases.d.ts.map +0 -1
package/dist/execution-traces.js
CHANGED
|
@@ -55,13 +55,14 @@ export function mergeWorkbenchExecutionTracesByJob(args) {
|
|
|
55
55
|
const summaries = [];
|
|
56
56
|
for (const job of args.jobs) {
|
|
57
57
|
const prefix = sanitizeTraceComponent(job.id);
|
|
58
|
+
const traceJobId = job.jobId ?? job.id;
|
|
58
59
|
spans.push(...job.trace.spans.map((span) => ({
|
|
59
60
|
...span,
|
|
60
61
|
id: `${prefix}:${span.id}`,
|
|
61
62
|
parent_id: span.parent_id ? `${prefix}:${span.parent_id}` : null,
|
|
62
63
|
stage_id: args.stageId ?? span.stage_id,
|
|
63
64
|
stage_run_index: null,
|
|
64
|
-
attributes: withTraceJobId(span.attributes,
|
|
65
|
+
attributes: withTraceJobId(span.attributes, traceJobId),
|
|
65
66
|
})));
|
|
66
67
|
events.push(...job.trace.events.map((event) => ({
|
|
67
68
|
...event,
|
|
@@ -69,7 +70,7 @@ export function mergeWorkbenchExecutionTracesByJob(args) {
|
|
|
69
70
|
span_id: `${prefix}:${event.span_id}`,
|
|
70
71
|
stage_id: args.stageId ?? event.stage_id,
|
|
71
72
|
stage_run_index: null,
|
|
72
|
-
attributes: withTraceJobId(event.attributes,
|
|
73
|
+
attributes: withTraceJobId(event.attributes, traceJobId),
|
|
73
74
|
})));
|
|
74
75
|
summaries.push(...job.trace.summaries.map((summary) => ({
|
|
75
76
|
...summary,
|
|
@@ -84,6 +85,308 @@ export function mergeWorkbenchExecutionTracesByJob(args) {
|
|
|
84
85
|
summaries: summaries.sort(compareTraceSummaries),
|
|
85
86
|
};
|
|
86
87
|
}
|
|
88
|
+
export function buildWorkbenchTraceSessionsFromFiles(args) {
|
|
89
|
+
return args.files
|
|
90
|
+
.filter((file) => file.encoding === "utf8" && file.path.endsWith("/trace.json"))
|
|
91
|
+
.sort((left, right) => traceFileDisplayOrder(left.path) - traceFileDisplayOrder(right.path) ||
|
|
92
|
+
left.path.localeCompare(right.path))
|
|
93
|
+
.flatMap((file, index) => {
|
|
94
|
+
const trace = readWorkbenchExecutionTraceFile(file);
|
|
95
|
+
if (!trace) {
|
|
96
|
+
return [];
|
|
97
|
+
}
|
|
98
|
+
const prefix = traceFilePrefix(file.path, index);
|
|
99
|
+
const role = traceRoleForFilePath(file.path, args.purpose ?? null, args.fallbackRole);
|
|
100
|
+
return [{
|
|
101
|
+
id: `${args.job.id}:${prefix}`,
|
|
102
|
+
jobId: args.job.id,
|
|
103
|
+
role,
|
|
104
|
+
kind: traceSessionKindForFilePath(file.path, role),
|
|
105
|
+
label: traceSessionLabel(file.path, role),
|
|
106
|
+
sourcePath: file.path,
|
|
107
|
+
trace: prefixTraceFileIds(trace, prefix),
|
|
108
|
+
metadata: {
|
|
109
|
+
trace_file: file.path,
|
|
110
|
+
},
|
|
111
|
+
}];
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
export function combineWorkbenchTraceSessions(sessions) {
|
|
115
|
+
return {
|
|
116
|
+
trace_id: sessions.length === 1 ? sessions[0].trace.trace_id : "combined-job-trace",
|
|
117
|
+
spans: sessions.flatMap((session) => session.trace.spans).sort(compareTraceSpans),
|
|
118
|
+
events: sessions.flatMap((session) => session.trace.events).sort(compareTraceEvents),
|
|
119
|
+
summaries: sessions.flatMap((session) => session.trace.summaries).sort(compareTraceSummaries),
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
export function readWorkbenchExecutionTraceFiles(files) {
|
|
123
|
+
const traces = files
|
|
124
|
+
.filter((file) => file.encoding === "utf8" && file.path.endsWith("/trace.json"))
|
|
125
|
+
.sort((left, right) => left.path.localeCompare(right.path))
|
|
126
|
+
.flatMap((file, index) => {
|
|
127
|
+
const trace = readWorkbenchExecutionTraceFile(file);
|
|
128
|
+
return trace ? [prefixTraceFileIds(trace, traceFilePrefix(file.path, index))] : [];
|
|
129
|
+
});
|
|
130
|
+
if (traces.length === 0) {
|
|
131
|
+
return null;
|
|
132
|
+
}
|
|
133
|
+
return {
|
|
134
|
+
trace_id: traces.length === 1 ? traces[0].trace_id : "combined-job-trace",
|
|
135
|
+
spans: traces.flatMap((trace) => trace.spans).sort(compareTraceSpans),
|
|
136
|
+
events: traces.flatMap((trace) => trace.events).sort(compareTraceEvents),
|
|
137
|
+
summaries: traces.flatMap((trace) => trace.summaries).sort(compareTraceSummaries),
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
export function traceSessionLabel(filePath, role) {
|
|
141
|
+
const innerPath = traceSessionInnerPath(filePath);
|
|
142
|
+
if (innerPath === "runner/session") {
|
|
143
|
+
return "Subject runner";
|
|
144
|
+
}
|
|
145
|
+
if (innerPath === "optimizer/session") {
|
|
146
|
+
return "Optimizer";
|
|
147
|
+
}
|
|
148
|
+
const parts = innerPath
|
|
149
|
+
.split("/")
|
|
150
|
+
.filter((part) => part.length > 0 && part !== "session");
|
|
151
|
+
const label = parts.slice(-2).join(" ") || role;
|
|
152
|
+
return formatTraceLabelText(label);
|
|
153
|
+
}
|
|
154
|
+
function traceSessionKindForFilePath(filePath, role) {
|
|
155
|
+
const innerPath = traceSessionInnerPath(filePath);
|
|
156
|
+
const parts = innerPath
|
|
157
|
+
.split("/")
|
|
158
|
+
.filter((part) => part.length > 0 && part !== "session");
|
|
159
|
+
return parts.at(-1) ?? role;
|
|
160
|
+
}
|
|
161
|
+
function readWorkbenchExecutionTraceFile(file) {
|
|
162
|
+
const traceRecord = parseJsonObject(file.content);
|
|
163
|
+
if (!traceRecord) {
|
|
164
|
+
return null;
|
|
165
|
+
}
|
|
166
|
+
const spans = Array.isArray(traceRecord.spans)
|
|
167
|
+
? traceRecord.spans.map(readTraceSpan).filter((span) => span !== null)
|
|
168
|
+
: [];
|
|
169
|
+
const events = Array.isArray(traceRecord.events)
|
|
170
|
+
? traceRecord.events.map(readTraceEvent).filter((event) => event !== null)
|
|
171
|
+
: [];
|
|
172
|
+
const summaries = Array.isArray(traceRecord.summaries)
|
|
173
|
+
? traceRecord.summaries.map(readTraceSummary).filter((summary) => summary !== null)
|
|
174
|
+
: [];
|
|
175
|
+
if (spans.length === 0 && events.length === 0 && summaries.length === 0) {
|
|
176
|
+
return null;
|
|
177
|
+
}
|
|
178
|
+
return {
|
|
179
|
+
trace_id: readString(traceRecord.trace_id) ?? "agent-trace",
|
|
180
|
+
spans,
|
|
181
|
+
events,
|
|
182
|
+
summaries,
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
function prefixTraceFileIds(trace, prefix) {
|
|
186
|
+
return {
|
|
187
|
+
trace_id: `${prefix}:${trace.trace_id}`,
|
|
188
|
+
spans: trace.spans.map((span) => ({
|
|
189
|
+
...span,
|
|
190
|
+
id: `${prefix}:${span.id}`,
|
|
191
|
+
parent_id: span.parent_id ? `${prefix}:${span.parent_id}` : null,
|
|
192
|
+
attributes: {
|
|
193
|
+
...span.attributes,
|
|
194
|
+
trace_file: prefix,
|
|
195
|
+
},
|
|
196
|
+
})),
|
|
197
|
+
events: trace.events.map((event) => ({
|
|
198
|
+
...event,
|
|
199
|
+
id: `${prefix}:${event.id}`,
|
|
200
|
+
span_id: `${prefix}:${event.span_id}`,
|
|
201
|
+
attributes: {
|
|
202
|
+
...event.attributes,
|
|
203
|
+
trace_file: prefix,
|
|
204
|
+
},
|
|
205
|
+
})),
|
|
206
|
+
summaries: trace.summaries.map((summary) => ({ ...summary })),
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
function traceFilePrefix(filePath, index) {
|
|
210
|
+
const safe = filePath
|
|
211
|
+
.replace(/^\.workbench\/traces\//u, "")
|
|
212
|
+
.replace(/\/trace\.json$/u, "")
|
|
213
|
+
.replace(/[^A-Za-z0-9_-]+/g, "-")
|
|
214
|
+
.replace(/^-+|-+$/g, "");
|
|
215
|
+
return safe || `trace-${index + 1}`;
|
|
216
|
+
}
|
|
217
|
+
function traceFileDisplayOrder(filePath) {
|
|
218
|
+
if (filePath.includes("/runner/") || filePath.includes("/optimizer/")) {
|
|
219
|
+
return 0;
|
|
220
|
+
}
|
|
221
|
+
return 1;
|
|
222
|
+
}
|
|
223
|
+
function traceRoleForFilePath(filePath, purpose, fallbackRole) {
|
|
224
|
+
if (filePath.includes("/runner/")) {
|
|
225
|
+
return "runner";
|
|
226
|
+
}
|
|
227
|
+
if (filePath.includes("/optimizer/") || purpose === "improve") {
|
|
228
|
+
return "optimizer";
|
|
229
|
+
}
|
|
230
|
+
if (filePath.includes("/engine/")) {
|
|
231
|
+
return "engine";
|
|
232
|
+
}
|
|
233
|
+
return fallbackRole;
|
|
234
|
+
}
|
|
235
|
+
function traceSessionInnerPath(filePath) {
|
|
236
|
+
const withoutTraceFile = filePath.replace(/\/trace\.json$/u, "");
|
|
237
|
+
const markerIndexes = [
|
|
238
|
+
withoutTraceFile.indexOf("/runner/"),
|
|
239
|
+
withoutTraceFile.indexOf("/optimizer/"),
|
|
240
|
+
withoutTraceFile.indexOf("/engine/"),
|
|
241
|
+
].filter((index) => index >= 0);
|
|
242
|
+
const firstMarker = Math.min(...markerIndexes);
|
|
243
|
+
if (Number.isFinite(firstMarker)) {
|
|
244
|
+
return withoutTraceFile.slice(firstMarker + 1);
|
|
245
|
+
}
|
|
246
|
+
return withoutTraceFile.replace(/^\.workbench\/traces\/[^/]+\//u, "");
|
|
247
|
+
}
|
|
248
|
+
function formatTraceLabelText(value) {
|
|
249
|
+
return value
|
|
250
|
+
.replace(/[_-]+/g, " ")
|
|
251
|
+
.replace(/\s+/g, " ")
|
|
252
|
+
.trim()
|
|
253
|
+
.replace(/^\w/u, (match) => match.toUpperCase());
|
|
254
|
+
}
|
|
255
|
+
function readTraceSpan(value) {
|
|
256
|
+
const record = jsonRecord(value);
|
|
257
|
+
if (!record) {
|
|
258
|
+
return null;
|
|
259
|
+
}
|
|
260
|
+
const id = readString(record.id);
|
|
261
|
+
const kind = traceSpanKind(record.kind);
|
|
262
|
+
const status = traceStatus(record.status);
|
|
263
|
+
const startedAt = readString(record.started_at);
|
|
264
|
+
if (!id || !kind || !status || !startedAt) {
|
|
265
|
+
return null;
|
|
266
|
+
}
|
|
267
|
+
return {
|
|
268
|
+
id,
|
|
269
|
+
parent_id: readString(record.parent_id),
|
|
270
|
+
attempt_number: readPositiveInteger(record.attempt_number) ?? 1,
|
|
271
|
+
stage_id: readString(record.stage_id),
|
|
272
|
+
stage_run_index: readInteger(record.stage_run_index),
|
|
273
|
+
kind,
|
|
274
|
+
title: readString(record.title) ?? id,
|
|
275
|
+
status,
|
|
276
|
+
started_at: startedAt,
|
|
277
|
+
ended_at: readString(record.ended_at),
|
|
278
|
+
attributes: (jsonRecord(record.attributes) ?? {}),
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
function readTraceEvent(value) {
|
|
282
|
+
const record = jsonRecord(value);
|
|
283
|
+
if (!record) {
|
|
284
|
+
return null;
|
|
285
|
+
}
|
|
286
|
+
const id = readString(record.id);
|
|
287
|
+
const spanId = readString(record.span_id);
|
|
288
|
+
const kind = traceEventKind(record.kind);
|
|
289
|
+
const at = readString(record.at);
|
|
290
|
+
if (!id || !spanId || !kind || !at) {
|
|
291
|
+
return null;
|
|
292
|
+
}
|
|
293
|
+
return {
|
|
294
|
+
id,
|
|
295
|
+
span_id: spanId,
|
|
296
|
+
attempt_number: readPositiveInteger(record.attempt_number) ?? 1,
|
|
297
|
+
stage_id: readString(record.stage_id),
|
|
298
|
+
stage_run_index: readInteger(record.stage_run_index),
|
|
299
|
+
kind,
|
|
300
|
+
at,
|
|
301
|
+
message: readString(record.message) ?? kind,
|
|
302
|
+
attributes: (jsonRecord(record.attributes) ?? {}),
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
function readTraceSummary(value) {
|
|
306
|
+
const record = jsonRecord(value);
|
|
307
|
+
if (!record) {
|
|
308
|
+
return null;
|
|
309
|
+
}
|
|
310
|
+
const status = traceStatus(record.status);
|
|
311
|
+
const startedAt = readString(record.started_at);
|
|
312
|
+
if (!status || !startedAt) {
|
|
313
|
+
return null;
|
|
314
|
+
}
|
|
315
|
+
return {
|
|
316
|
+
attempt_number: readPositiveInteger(record.attempt_number) ?? 1,
|
|
317
|
+
stage_id: readString(record.stage_id),
|
|
318
|
+
stage_run_index: readInteger(record.stage_run_index),
|
|
319
|
+
status,
|
|
320
|
+
started_at: startedAt,
|
|
321
|
+
ended_at: readString(record.ended_at),
|
|
322
|
+
duration_ms: readNonNegativeInteger(record.duration_ms) ?? 0,
|
|
323
|
+
tool_call_count: readNonNegativeInteger(record.tool_call_count) ?? 0,
|
|
324
|
+
input_tokens: readNonNegativeInteger(record.input_tokens),
|
|
325
|
+
output_tokens: readNonNegativeInteger(record.output_tokens),
|
|
326
|
+
usage: jsonRecord(record.usage),
|
|
327
|
+
final_output_present: record.final_output_present === true,
|
|
328
|
+
error_message: readString(record.error_message),
|
|
329
|
+
};
|
|
330
|
+
}
|
|
331
|
+
function parseJsonObject(source) {
|
|
332
|
+
try {
|
|
333
|
+
return jsonRecord(JSON.parse(source));
|
|
334
|
+
}
|
|
335
|
+
catch {
|
|
336
|
+
return null;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
function jsonRecord(value) {
|
|
340
|
+
return value && typeof value === "object" && !Array.isArray(value)
|
|
341
|
+
? value
|
|
342
|
+
: null;
|
|
343
|
+
}
|
|
344
|
+
function readString(value) {
|
|
345
|
+
return typeof value === "string" && value.length > 0 ? value : null;
|
|
346
|
+
}
|
|
347
|
+
function readInteger(value) {
|
|
348
|
+
return typeof value === "number" && Number.isInteger(value) ? value : null;
|
|
349
|
+
}
|
|
350
|
+
function readPositiveInteger(value) {
|
|
351
|
+
const integer = readInteger(value);
|
|
352
|
+
return integer !== null && integer > 0 ? integer : null;
|
|
353
|
+
}
|
|
354
|
+
function readNonNegativeInteger(value) {
|
|
355
|
+
const integer = readInteger(value);
|
|
356
|
+
return integer !== null && integer >= 0 ? integer : null;
|
|
357
|
+
}
|
|
358
|
+
function traceSpanKind(value) {
|
|
359
|
+
return value === "hook" ||
|
|
360
|
+
value === "stage" ||
|
|
361
|
+
value === "turn" ||
|
|
362
|
+
value === "tool_call" ||
|
|
363
|
+
value === "assistant_output" ||
|
|
364
|
+
value === "usage" ||
|
|
365
|
+
value === "gate" ||
|
|
366
|
+
value === "action" ||
|
|
367
|
+
value === "error"
|
|
368
|
+
? value
|
|
369
|
+
: null;
|
|
370
|
+
}
|
|
371
|
+
function traceEventKind(value) {
|
|
372
|
+
return value === "status" ||
|
|
373
|
+
value === "message" ||
|
|
374
|
+
value === "output" ||
|
|
375
|
+
value === "usage" ||
|
|
376
|
+
value === "error" ||
|
|
377
|
+
value === "note"
|
|
378
|
+
? value
|
|
379
|
+
: null;
|
|
380
|
+
}
|
|
381
|
+
function traceStatus(value) {
|
|
382
|
+
return value === "running" ||
|
|
383
|
+
value === "completed" ||
|
|
384
|
+
value === "failed" ||
|
|
385
|
+
value === "canceled" ||
|
|
386
|
+
value === "warning"
|
|
387
|
+
? value
|
|
388
|
+
: null;
|
|
389
|
+
}
|
|
87
390
|
function withTraceJobId(attributes, jobId) {
|
|
88
391
|
return {
|
|
89
392
|
...attributes,
|
package/dist/generic-spec.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type EngineResolveBinding, type SurfaceSnapshotFile, type WorkbenchAdapterInvocation, type WorkbenchExecutionNetworkPolicy, type WorkbenchExecutionResources, type WorkbenchSpecValidation } from "@workbench-ai/workbench-contract";
|
|
2
2
|
import type { WorkbenchEngineCase, WorkbenchEngineCaseSpec } from "@workbench-ai/workbench-protocol";
|
|
3
3
|
export declare const BENCHMARK_SPEC_FILE = "benchmark.yaml";
|
|
4
4
|
export interface WorkbenchRuntimeSpec {
|
|
@@ -15,6 +15,9 @@ export interface WorkbenchRuntimeSpec {
|
|
|
15
15
|
export interface WorkbenchPathRef {
|
|
16
16
|
path: string;
|
|
17
17
|
}
|
|
18
|
+
export interface WorkbenchSubjectPrepareSpec {
|
|
19
|
+
command: string;
|
|
20
|
+
}
|
|
18
21
|
export interface AuthoredBenchmarkSpec {
|
|
19
22
|
version: 3;
|
|
20
23
|
name: string;
|
|
@@ -27,6 +30,7 @@ export interface WorkbenchSubjectManifestSpec {
|
|
|
27
30
|
name: string;
|
|
28
31
|
description?: string;
|
|
29
32
|
files: WorkbenchPathRef;
|
|
33
|
+
prepare?: WorkbenchSubjectPrepareSpec;
|
|
30
34
|
adapters: string[];
|
|
31
35
|
run: WorkbenchAdapterInvocation;
|
|
32
36
|
}
|
|
@@ -58,6 +62,7 @@ export interface GenericRunSpec {
|
|
|
58
62
|
name: string;
|
|
59
63
|
description?: string;
|
|
60
64
|
files: WorkbenchPathRef;
|
|
65
|
+
prepare?: WorkbenchSubjectPrepareSpec;
|
|
61
66
|
};
|
|
62
67
|
optimizer?: {
|
|
63
68
|
name: string;
|
|
@@ -105,8 +110,8 @@ export declare function engineCaseFilesForRuntimeInput(args: {
|
|
|
105
110
|
spec: GenericRunSpec;
|
|
106
111
|
engineCase: WorkbenchEngineCase;
|
|
107
112
|
}): SurfaceSnapshotFile[];
|
|
108
|
-
export declare function
|
|
109
|
-
export declare function
|
|
113
|
+
export declare function engineCasePublicFiles(engineCase: WorkbenchEngineCase): SurfaceSnapshotFile[];
|
|
114
|
+
export declare function engineCasePrivateFiles(engineCase: WorkbenchEngineCase): SurfaceSnapshotFile[];
|
|
110
115
|
export declare function runtimeResources(runtime: WorkbenchRuntimeSpec): WorkbenchExecutionResources;
|
|
111
116
|
export declare function runtimeNetwork(runtime: WorkbenchRuntimeSpec): WorkbenchExecutionNetworkPolicy;
|
|
112
117
|
export declare function runtimeSandboxRef(runtime: WorkbenchRuntimeSpec): string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,KAAK,oBAAoB,EAEzB,KAAK,mBAAmB,EACxB,KAAK,0BAA0B,EAC/B,KAAK,+BAA+B,EACpC,KAAK,2BAA2B,EAChC,KAAK,uBAAuB,EAC7B,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,mBAAmB,EACnB,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAG1C,eAAO,MAAM,mBAAmB,mBAAmB,CAAC;AAEpD,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE;QACV,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;IACF,OAAO,CAAC,EAAE,+BAA+B,CAAC;CAC3C;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,2BAA2B;IAC1C,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;CACpC;AAED,MAAM,WAAW,4BAA4B;IAC3C,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,gBAAgB,CAAC;IACxB,OAAO,CAAC,EAAE,2BAA2B,CAAC;IACtC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAED,MAAM,MAAM,mBAAmB,GAAG,4BAA4B,CAAC;AAE/D,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,EAAE,0BAA0B,CAAC;CACrC;AAED,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,CAAC,CAAC;IACX,SAAS,EAAE,qBAAqB,CAAC;IACjC,OAAO,EAAE,mBAAmB,CAAC;IAC7B,SAAS,CAAC,EAAE,qBAAqB,CAAC;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE;QACT,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,MAAM,EAAE,0BAA0B,CAAC;KACpC,CAAC;IACF,OAAO,EAAE;QACP,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,gBAAgB,CAAC;QACxB,OAAO,CAAC,EAAE,2BAA2B,CAAC;KACvC,CAAC;IACF,SAAS,CAAC,EAAE;QACV,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,MAAM,EAAE,CAAC;KACjB,CAAC;IACF,WAAW,EAAE,oBAAoB,CAAC;IAClC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;IACnC,aAAa,EAAE,0BAA0B,CAAC;IAC1C,OAAO,CAAC,EAAE,0BAA0B,CAAC;IACrC,GAAG,EAAE,0BAA0B,CAAC;IAChC,SAAS,EAAE,0BAA0B,CAAC;CACvC;AAED,MAAM,MAAM,qBAAqB,GAAG,uBAAuB,CAAC;AAC5D,YAAY,EAAE,mBAAmB,EAAE,MAAM,kCAAkC,CAAC;AAE5E,MAAM,WAAW,iCAAiC;IAChD,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,oBAAoB,CAAC;IAClC,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAQD,eAAO,MAAM,2BAA2B,EAAE,2BAKzC,CAAC;AAEF,wBAAgB,mCAAmC,CACjD,MAAM,EAAE,MAAM,GACb,uBAAuB,CAmBzB;AAED,wBAAgB,kCAAkC,CAChD,MAAM,EAAE,MAAM,GACb,cAAc,CAsChB;AAED,wBAAgB,iCAAiC,CAC/C,MAAM,EAAE,MAAM,GACb,oBAAoB,CAEtB;AAED,wBAAgB,2BAA2B,CACzC,IAAI,EAAE,cAAc,GACnB,oBAAoB,CAStB;AAED,wBAAgB,2BAA2B,CAAC,IAAI,EAAE;IAChD,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACjC,GAAG,cAAc,CAMjB;AAED,wBAAgB,yBAAyB,CAAC,IAAI,EAAE;IAC9C,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACjC,GAAG,uBAAuB,CA4B1B;AAED,wBAAgB,oCAAoC,CAClD,MAAM,EAAE,uBAAuB,GAC9B,MAAM,CAER;AAED,wBAAgB,8BAA8B,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAIxE;AAED,wBAAgB,gCAAgC,CAAC,IAAI,EAAE;IACrD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,qBAAqB,CAAC;CACnC,GAAG,iCAAiC,CAMpC;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE,cAAc,GAAG,0BAA0B,CAE/F;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE;IACnD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,mBAAmB,CAAC;CACjC,GAAG,mBAAmB,EAAE,CAGxB;AAED,wBAAgB,qBAAqB,CACnC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,sBAAsB,CACpC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,oBAAoB,GAC5B,2BAA2B,CAiB7B;AAED,wBAAgB,cAAc,CAC5B,OAAO,EAAE,oBAAoB,GAC5B,+BAA+B,CAEjC;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM,CAEvE"}
|
package/dist/generic-spec.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { createHash } from "node:crypto";
|
|
2
|
+
import { isWorkbenchExecutionNetworkEgress, } from "@workbench-ai/workbench-contract";
|
|
2
3
|
import YAML from "yaml";
|
|
3
4
|
export const BENCHMARK_SPEC_FILE = "benchmark.yaml";
|
|
4
5
|
export const DEFAULT_EXECUTION_RESOURCES = {
|
|
@@ -110,15 +111,15 @@ export function engineResolveInvocationForSpec(spec) {
|
|
|
110
111
|
}
|
|
111
112
|
export function engineCaseFilesForRuntimeInput(args) {
|
|
112
113
|
void args.spec;
|
|
113
|
-
return
|
|
114
|
+
return engineCasePublicFiles(args.engineCase);
|
|
114
115
|
}
|
|
115
|
-
export function
|
|
116
|
-
return (engineCase.files.
|
|
116
|
+
export function engineCasePublicFiles(engineCase) {
|
|
117
|
+
return (engineCase.files.public ?? [])
|
|
117
118
|
.map((file) => ({ ...file }))
|
|
118
119
|
.sort((left, right) => left.path.localeCompare(right.path));
|
|
119
120
|
}
|
|
120
|
-
export function
|
|
121
|
-
return (engineCase.files.
|
|
121
|
+
export function engineCasePrivateFiles(engineCase) {
|
|
122
|
+
return (engineCase.files.private ?? [])
|
|
122
123
|
.map((file) => ({ ...file }))
|
|
123
124
|
.sort((left, right) => left.path.localeCompare(right.path));
|
|
124
125
|
}
|
|
@@ -154,6 +155,7 @@ function genericSpecFromAuthoredBundle(source) {
|
|
|
154
155
|
name: source.subject.name,
|
|
155
156
|
...(source.subject.description ? { description: source.subject.description } : {}),
|
|
156
157
|
files: cloneJson(source.subject.files),
|
|
158
|
+
...(source.subject.prepare ? { prepare: cloneJson(source.subject.prepare) } : {}),
|
|
157
159
|
},
|
|
158
160
|
...(source.optimizer
|
|
159
161
|
? {
|
|
@@ -229,6 +231,7 @@ function normalizeSubjectRecord(record, label, errors) {
|
|
|
229
231
|
"name",
|
|
230
232
|
"description",
|
|
231
233
|
"files",
|
|
234
|
+
"prepare",
|
|
232
235
|
"adapters",
|
|
233
236
|
"run",
|
|
234
237
|
], errors);
|
|
@@ -236,6 +239,7 @@ function normalizeSubjectRecord(record, label, errors) {
|
|
|
236
239
|
const name = readRequiredString(record.name, `${label}.name`, errors);
|
|
237
240
|
const description = readOptionalString(record.description, `${label}.description`, errors);
|
|
238
241
|
const files = normalizePathRef(record.files, `${label}.files`, errors);
|
|
242
|
+
const prepare = normalizeSubjectPrepare(record.prepare, `${label}.prepare`, errors);
|
|
239
243
|
const adapters = normalizeAdapterSources(record.adapters, `${label}.adapters`, errors);
|
|
240
244
|
const run = normalizePhaseAdapter(record.run, `${label}.run`, errors);
|
|
241
245
|
return name && files && run
|
|
@@ -244,11 +248,24 @@ function normalizeSubjectRecord(record, label, errors) {
|
|
|
244
248
|
name,
|
|
245
249
|
...(description ? { description } : {}),
|
|
246
250
|
files,
|
|
251
|
+
...(prepare ? { prepare } : {}),
|
|
247
252
|
adapters,
|
|
248
253
|
run,
|
|
249
254
|
}
|
|
250
255
|
: null;
|
|
251
256
|
}
|
|
257
|
+
function normalizeSubjectPrepare(value, label, errors) {
|
|
258
|
+
if (value === undefined) {
|
|
259
|
+
return undefined;
|
|
260
|
+
}
|
|
261
|
+
const record = readRequiredRecord(value, label, errors);
|
|
262
|
+
if (!record) {
|
|
263
|
+
return undefined;
|
|
264
|
+
}
|
|
265
|
+
rejectUnknownKeys(record, label, ["command"], errors);
|
|
266
|
+
const command = readRequiredString(record.command, `${label}.command`, errors);
|
|
267
|
+
return command ? { command } : undefined;
|
|
268
|
+
}
|
|
252
269
|
function normalizeOptimizerRecord(record, label, errors) {
|
|
253
270
|
if (!record) {
|
|
254
271
|
return null;
|
|
@@ -438,41 +455,13 @@ function normalizeAdapterSources(value, label, errors) {
|
|
|
438
455
|
return [...new Set(sources)];
|
|
439
456
|
}
|
|
440
457
|
function normalizeNetworkConfig(network, label, errors) {
|
|
441
|
-
rejectUnknownKeys(network, label, ["egress"
|
|
458
|
+
rejectUnknownKeys(network, label, ["egress"], errors);
|
|
442
459
|
const egress = readOptionalString(network.egress, `${label}.egress`, errors) ?? "open";
|
|
443
|
-
if (egress
|
|
444
|
-
errors.push(`${label}.egress must be none
|
|
460
|
+
if (!isWorkbenchExecutionNetworkEgress(egress)) {
|
|
461
|
+
errors.push(`${label}.egress must be none or open.`);
|
|
445
462
|
return null;
|
|
446
463
|
}
|
|
447
|
-
|
|
448
|
-
? undefined
|
|
449
|
-
: normalizeNetworkAllowList(network.allow, `${label}.allow`, errors);
|
|
450
|
-
if (egress !== "allowlist") {
|
|
451
|
-
if (network.allow !== undefined) {
|
|
452
|
-
errors.push(`${label}.allow is only supported when ${label}.egress is allowlist.`);
|
|
453
|
-
}
|
|
454
|
-
return { egress };
|
|
455
|
-
}
|
|
456
|
-
if (!allow || allow.length === 0) {
|
|
457
|
-
errors.push(`${label}.allow must contain at least one host when ${label}.egress is allowlist.`);
|
|
458
|
-
}
|
|
459
|
-
return {
|
|
460
|
-
egress,
|
|
461
|
-
...(allow && allow.length > 0 ? { allow } : {}),
|
|
462
|
-
};
|
|
463
|
-
}
|
|
464
|
-
function normalizeNetworkAllowList(value, label, errors) {
|
|
465
|
-
if (!Array.isArray(value)) {
|
|
466
|
-
errors.push(`${label} must be an array of hosts.`);
|
|
467
|
-
return [];
|
|
468
|
-
}
|
|
469
|
-
return value.flatMap((entry, index) => {
|
|
470
|
-
if (typeof entry !== "string" || entry.trim().length === 0) {
|
|
471
|
-
errors.push(`${label}[${index}] must be a non-empty string.`);
|
|
472
|
-
return [];
|
|
473
|
-
}
|
|
474
|
-
return [entry.trim()];
|
|
475
|
-
});
|
|
464
|
+
return { egress };
|
|
476
465
|
}
|
|
477
466
|
function normalizePhaseAdapter(value, label, errors) {
|
|
478
467
|
const spec = readAdapterRecord(value, label, errors);
|
package/dist/index.d.ts
CHANGED
|
@@ -1,16 +1,18 @@
|
|
|
1
|
-
import type { AuthoredWorkbenchSourceDocument,
|
|
1
|
+
import type { AuthoredWorkbenchSourceDocument, SubjectCaseExecutionRef, SubjectCaseReview, SubjectFilePreview, SubjectFileSummary, SubjectLineageGraph, SubjectRecord, SubjectSummary, EvalCaseResult, EvaluationScorecard, HostedWorkbenchEnvironment, HostedWorkbenchEnvironmentVersion, HostedWorkbenchFileInput, HostedWorkbenchJob, Json, RuntimeEvent, SurfaceSnapshotFile, UsageSummary, WorkbenchSubjectPatch, WorkbenchExecutionCapability, WorkbenchExecutionSpec, WorkbenchResult } from "@workbench-ai/workbench-contract";
|
|
2
|
+
import { type WorkbenchAdapterOperationExecutor, type WorkbenchAdapterOperationResult } from "@workbench-ai/workbench-protocol";
|
|
2
3
|
import { type GenericEngineCaseSpec, type GenericRunSpec, type WorkbenchEngineCase } from "./generic-spec.ts";
|
|
3
4
|
import type { WorkbenchExecutionRuntimeInput } from "./execution-runtime-types.ts";
|
|
4
5
|
import { createWorkbenchExecutionCapability, type SandboxExecutionFileStore, type SandboxPlane } from "./sandbox-plane.ts";
|
|
5
|
-
export { BENCHMARK_SPEC_FILE, DEFAULT_EXECUTION_RESOURCES,
|
|
6
|
-
export {
|
|
6
|
+
export { BENCHMARK_SPEC_FILE, DEFAULT_EXECUTION_RESOURCES, engineCasePrivateFiles, engineCaseFilesForRuntimeInput, engineCasePublicFiles, engineResolveInvocationForSpec, engineResolveBindingForSpec, engineResolveBindingForSourceYaml, isWorkbenchSubjectManifestPath, parseWorkbenchSourceFiles, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, resolveWorkbenchSourceFiles, runtimeNetwork, runtimeResources, serializeWorkbenchResolvedSourceYaml, validateWorkbenchResolvedSourceYaml, type AuthoredBenchmarkSpec, type AuthoredOptimizerSpec, type GenericRunSpec, type GenericEngineCaseSpec, type ResolvedSubjectSpec, type WorkbenchEngineCase, type WorkbenchResolvedSource, type WorkbenchSubjectManifestSpec, } from "./generic-spec.ts";
|
|
7
|
+
export { composeRuntimeDockerfileWithAdapterInstallers, type WorkbenchRuntimeAdapterInstaller, type WorkbenchRuntimeAdapterInstallerFile, } from "./runtime-dockerfile.ts";
|
|
8
|
+
export { adapterCommandName, cloneWorkbenchAdapterManifest, collectWorkbenchAdapterAuthRequirements, collectWorkbenchAdapterInvocations, parseWorkbenchAdapterManifest, workbenchAdapterManifestRequiresAuth, workbenchAdapterManifestSupportsOperation, workbenchAdapterOperationCommand, workbenchAdapterOperationExecutor, withDefaultWorkbenchAdapterAuth, withDefaultWorkbenchAdapterAuthProfiles, type WorkbenchPrimitiveAdapterOperation, type WorkbenchAdapterOperation, type WorkbenchAdapterOperationExecutor, type WorkbenchAdapterOperationManifest, type WorkbenchAdapterSlotManifest, type WorkbenchAdapterAuthRequirement, type WorkbenchAdapterAuthManifest, type WorkbenchAdapterAuthMethodManifest, type WorkbenchAdapterInvocationLike, type WorkbenchAdapterManifest, } from "@workbench-ai/workbench-protocol";
|
|
7
9
|
export { adapterAuthEnv, createWorkbenchAdapterAuthBundle, defaultWorkbenchAdapterAuthStoreRoot, localWorkbenchAdapterAuthStore, normalizeWorkbenchAdapterAuthTarget, parseWorkbenchAdapterAuthTarget, sanitizeWorkbenchAdapterAuthBundle, type WorkbenchAdapterAuthBundle, type WorkbenchAdapterAuthEnvVar, type WorkbenchAdapterAuthFile, type WorkbenchAdapterAuthStatus, type WorkbenchAdapterAuthStatusRecord, type WorkbenchAdapterAuthStore, type WorkbenchAdapterAuthTarget, } from "./adapter-auth.ts";
|
|
8
|
-
export type { WorkbenchExecutionRuntimeInput,
|
|
10
|
+
export type { WorkbenchExecutionRuntimeInput, WorkbenchWorkloadStepCommand, } from "./execution-runtime-types.ts";
|
|
9
11
|
export { asRuntimeRecord, importNodeModule, nodeBuiltin, normalizeWorkbenchWorkerId, normalizeRuntimeRegistry, quoteShellArg, resolveDockerRuntimeImageRef, resolveWorkbenchWorkerId, } from "./runtime-utils.ts";
|
|
10
12
|
export { assignUsageRole, extractExecutionUsageFromTrace, mergeUsageSummaries, } from "./execution-usage.ts";
|
|
11
13
|
export { createWorkbenchProgressStdoutParser, publishWorkbenchProgressStdoutEnvelope, } from "./execution-events.ts";
|
|
12
14
|
export { resolveSandboxTemplateImage, } from "./sandbox-backends/template-images.ts";
|
|
13
|
-
export { readOutputTraceFiles,
|
|
15
|
+
export { readOutputTraceFiles, workbenchTraceExecutionDirectory, workbenchTraceRunDirectory, workbenchTraceRunDirectoryName, } from "./trace-files.ts";
|
|
14
16
|
export { assertWorkbenchAdapterOperationSupport, assertWorkbenchAdapterOperationResultOk, collectWorkbenchAdapterOperationIssues, collectWorkbenchAdapterOperationRequirements, ensureWorkbenchAdapterOutputDir, WORKBENCH_ADAPTER_RESULT_FILE, normalizeWorkbenchAdapterOperationRequest, normalizeWorkbenchAdapterOperationResult, readWorkbenchAdapterOperationRequest, readWorkbenchAdapterOperationResult, workbenchAdapterOperationResultPath, writeWorkbenchAdapterOperationResult, type WorkbenchAdapterOperationRequest, type WorkbenchAdapterOperationResult, type WorkbenchAdapterOperationResultValue, type WorkbenchAdapterOperationRequirement, type WorkbenchEngineResolveResult, type WorkbenchEngineCaseSpec, } from "@workbench-ai/workbench-protocol";
|
|
15
17
|
export { applyWorkbenchSubjectPatch, type ApplyWorkbenchSubjectPatchInput, } from "./subject-patch.ts";
|
|
16
18
|
export { createWorkbenchSandboxFileStore, createSandboxAdapterRequest, executionResultFromCompletedSandboxJob, materializeWorkbenchSandboxInput, readWorkbenchExecutionSpec, sanitizeWorkbenchExecutionJobForSandbox, } from "./sandbox-inputs.ts";
|
|
@@ -19,15 +21,15 @@ export { createBaselineSubjectExecution, createBaselineSubjectJob, createWorkben
|
|
|
19
21
|
export { addCapacity, capacityFits, runWorkbenchExecutionDag, subtractCapacity, workbenchJobDependencies, workbenchJobHostCost, workbenchJobResources, type WorkbenchExecutionDagCapacity, type WorkbenchExecutionDagResult, type WorkbenchExecutionDagRunInput, } from "./execution-scheduler.ts";
|
|
20
22
|
export { assertWorkbenchExecutionIsolation, collectWorkbenchExecutionIsolationIssues, validateWorkbenchExecutionOutputPayloads, type WorkbenchExecutionOutputPayloads, } from "./execution-outputs.ts";
|
|
21
23
|
export { collectSandboxAllocationScopeIssues, collectExecutionCapabilityScopeIssues, collectSandboxHandleScopeIssues, createWorkbenchSandboxAllocation, createWorkbenchSandboxExecutionMetadata, createWorkbenchExecutionCapability, executeValidatedSandboxExecution, type SandboxExecutionFileStore, type SandboxExecutionOptions, type SandboxBackendCapabilities, type SandboxBackendDescriptor, type SandboxCreateRequest, type SandboxEnvironmentImage, type SandboxExecRequest, type SandboxHandle, type SandboxMaterializedInput, type SandboxPlane, type ValidatedSandboxExecutionResult, } from "./sandbox-plane.ts";
|
|
22
|
-
export {
|
|
23
|
-
export { finalizeWorkbenchExecutionTraceForJob, mergeWorkbenchExecutionTracesByJob, type WorkbenchTraceMergeJob, } from "./execution-traces.ts";
|
|
24
|
+
export { buildSubjectCaseExecutionRefs, buildWorkbenchExecutionEvidence, isWorkbenchExecutionActive, readWorkbenchExecutionId, readWorkbenchExecutionMetadataNumber, readWorkbenchExecutionMetadataString, readWorkbenchExecutionPurpose, resolveWorkbenchJobGroupStatus, } from "./execution-evidence.ts";
|
|
25
|
+
export { buildWorkbenchTraceSessionsFromFiles, combineWorkbenchTraceSessions, finalizeWorkbenchExecutionTraceForJob, mergeWorkbenchExecutionTracesByJob, readWorkbenchExecutionTraceFiles, traceSessionLabel, type WorkbenchTraceMergeJob, } from "./execution-traces.ts";
|
|
24
26
|
export { DOCKER_SANDBOX_BACKEND, assertSandboxHostHealthForProvider, createDockerSandboxBackendDescriptor, createDockerSandboxPlane, resolveWorkbenchSandboxProviderName, sandboxProviderAdmissionForResources, sandboxProviderDefaultMaxConcurrentJobs, sandboxProviderLeaseScope, sandboxHostHealthExpectationForProvider, type SandboxProviderAdmission, type SandboxProviderHostCost, type SandboxProviderLeaseRequest, type SandboxProviderRequestedResources, type SandboxHostHealthExpectation, type WorkbenchSandboxProviderName, } from "./sandbox-backends/index.ts";
|
|
25
27
|
export type { WorkbenchExecutionEventPublisher, WorkbenchExecutionProgressTarget, } from "./execution-events.ts";
|
|
26
|
-
export type { SubjectCaseReview, SubjectRecord, EngineResolveBinding,
|
|
28
|
+
export type { SubjectCaseReview, SubjectRecord, EngineResolveBinding, EvaluationScorecard, HostedWorkbenchJob, Json, RunSummary, RuntimeEvent, SurfaceSnapshotFile, WorkbenchExecutionCapability, WorkbenchExecutionTrace, WorkbenchTraceSession, WorkbenchSandboxHandle, WorkbenchSandboxExecutionMetadata, } from "@workbench-ai/workbench-contract";
|
|
27
29
|
export interface WorkbenchRunMaterialization {
|
|
28
30
|
subjects: SubjectRecord[];
|
|
29
31
|
subjectFiles: Record<string, SurfaceSnapshotFile[]>;
|
|
30
|
-
evaluations:
|
|
32
|
+
evaluations: EvaluationScorecard[];
|
|
31
33
|
activeSubjectId: string | null;
|
|
32
34
|
selectedSubject: SubjectRecord | null;
|
|
33
35
|
completedJobCount: number;
|
|
@@ -52,6 +54,8 @@ export interface WorkbenchRunWorkload {
|
|
|
52
54
|
export interface RuntimeWorkloadResult {
|
|
53
55
|
files: SurfaceSnapshotFile[];
|
|
54
56
|
fileChanges: string[];
|
|
57
|
+
operationResults?: WorkbenchAdapterOperationResult[];
|
|
58
|
+
workspaceFiles?: SurfaceSnapshotFile[];
|
|
55
59
|
subjectPatch?: WorkbenchSubjectPatch;
|
|
56
60
|
result?: WorkbenchResult;
|
|
57
61
|
metrics?: Record<string, number>;
|
|
@@ -86,6 +90,7 @@ export declare function materializeWorkbenchRunResult(args: {
|
|
|
86
90
|
previousSubject?: SubjectRecord | null;
|
|
87
91
|
existingSubjectCount: number;
|
|
88
92
|
}): WorkbenchRunMaterialization;
|
|
93
|
+
export declare function evaluationScorecardId(runId: string, subjectId: string): string;
|
|
89
94
|
export declare function selectExecutionOutputFilesForInspection(args: {
|
|
90
95
|
purpose: string | null | undefined;
|
|
91
96
|
files: readonly SurfaceSnapshotFile[];
|
|
@@ -97,6 +102,10 @@ export declare function createSubjectRevisionTraceInputFiles(args: {
|
|
|
97
102
|
jobs: readonly HostedWorkbenchJob[];
|
|
98
103
|
events: readonly RuntimeEvent[];
|
|
99
104
|
}): SurfaceSnapshotFile[];
|
|
105
|
+
export declare function createSubjectEvaluationTraceInputFiles(args: {
|
|
106
|
+
subject?: SubjectRecord | null;
|
|
107
|
+
path?: string;
|
|
108
|
+
}): SurfaceSnapshotFile[];
|
|
100
109
|
export interface WorkbenchProjectSourceFilesInput {
|
|
101
110
|
specSource?: string;
|
|
102
111
|
specFiles?: readonly SurfaceSnapshotFile[];
|
|
@@ -134,7 +143,7 @@ export declare function createSubjectFilePreview(args: {
|
|
|
134
143
|
export declare function createCaseReview(args: {
|
|
135
144
|
subject: SubjectRecord;
|
|
136
145
|
caseId: string;
|
|
137
|
-
|
|
146
|
+
executions?: SubjectCaseExecutionRef[];
|
|
138
147
|
}): SubjectCaseReview;
|
|
139
148
|
export declare function createWorkbenchRunWorkload(args: {
|
|
140
149
|
job: HostedWorkbenchJob;
|
|
@@ -150,8 +159,10 @@ export interface WorkbenchExecutionJobOptions {
|
|
|
150
159
|
createSandboxPlaneForProvider?: (provider: string, args: WorkbenchExecutionRuntimeInput, startedAt: string, fileStore: SandboxExecutionFileStore) => SandboxPlane;
|
|
151
160
|
}
|
|
152
161
|
export declare function executeWorkbenchExecutionJob(args: WorkbenchExecutionRuntimeInput, options: WorkbenchExecutionJobOptions): Promise<HostedWorkbenchJob>;
|
|
162
|
+
export declare function workbenchExecutionExecutorForRuntimeInput(args: Pick<WorkbenchExecutionRuntimeInput, "job" | "adapterManifests" | "runtimeControlOperation">): WorkbenchAdapterOperationExecutor;
|
|
153
163
|
export declare function workbenchExecutionPurpose(job: HostedWorkbenchJob): WorkbenchExecutionSpec["purpose"] | null;
|
|
154
|
-
export declare function
|
|
164
|
+
export declare function executeAdapterInCurrentRuntime(args: WorkbenchExecutionRuntimeInput, execution: WorkbenchExecutionSpec, startedAt: string, capability: ReturnType<typeof createWorkbenchExecutionCapability>): Promise<HostedWorkbenchJob>;
|
|
165
|
+
export declare function executeRuntimeControlOperationSequenceInCurrentRuntime(args: WorkbenchExecutionRuntimeInput, execution: WorkbenchExecutionSpec, startedAt: string, capability?: WorkbenchExecutionCapability): Promise<HostedWorkbenchJob>;
|
|
155
166
|
export declare function stageWorkbenchRunWorkload(root: string, workload: WorkbenchRunWorkload): Promise<void>;
|
|
156
167
|
export declare function workloadTimeoutMs(spec: GenericRunSpec): number;
|
|
157
168
|
export declare function findEnvironmentVersionForImage(image: string, versions: readonly HostedWorkbenchEnvironmentVersion[]): HostedWorkbenchEnvironmentVersion | null;
|