@workbench-ai/workbench-core 0.0.46 → 0.0.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/execution-events.d.ts +2 -2
  2. package/dist/execution-events.d.ts.map +1 -1
  3. package/dist/execution-events.js +3 -3
  4. package/dist/{execution-phases.d.ts → execution-evidence.d.ts} +8 -7
  5. package/dist/execution-evidence.d.ts.map +1 -0
  6. package/dist/{execution-phases.js → execution-evidence.js} +91 -51
  7. package/dist/execution-graph.js +1 -2
  8. package/dist/execution-jobs.js +1 -1
  9. package/dist/execution-outputs.d.ts.map +1 -1
  10. package/dist/execution-outputs.js +5 -10
  11. package/dist/execution-runtime-types.d.ts +7 -3
  12. package/dist/execution-runtime-types.d.ts.map +1 -1
  13. package/dist/execution-traces.d.ts +11 -1
  14. package/dist/execution-traces.d.ts.map +1 -1
  15. package/dist/execution-traces.js +305 -2
  16. package/dist/generic-spec.d.ts +8 -3
  17. package/dist/generic-spec.d.ts.map +1 -1
  18. package/dist/generic-spec.js +26 -37
  19. package/dist/index.d.ts +22 -11
  20. package/dist/index.d.ts.map +1 -1
  21. package/dist/index.js +868 -214
  22. package/dist/runtime-dockerfile.d.ts +14 -0
  23. package/dist/runtime-dockerfile.d.ts.map +1 -0
  24. package/dist/runtime-dockerfile.js +65 -0
  25. package/dist/sandbox-backends/docker.d.ts.map +1 -1
  26. package/dist/sandbox-backends/docker.js +9 -12
  27. package/dist/sandbox-backends/index.d.ts.map +1 -1
  28. package/dist/sandbox-backends/index.js +2 -1
  29. package/dist/sandbox-inputs.d.ts.map +1 -1
  30. package/dist/sandbox-inputs.js +1 -0
  31. package/dist/sandbox-plane.d.ts +1 -0
  32. package/dist/sandbox-plane.d.ts.map +1 -1
  33. package/dist/sandbox-plane.js +12 -22
  34. package/dist/trace-files.d.ts +2 -2
  35. package/dist/trace-files.d.ts.map +1 -1
  36. package/dist/trace-files.js +4 -4
  37. package/package.json +3 -3
  38. package/worker/sandbox-adapter-runner.cjs +22 -13
  39. package/dist/execution-phases.d.ts.map +0 -1
@@ -55,13 +55,14 @@ export function mergeWorkbenchExecutionTracesByJob(args) {
55
55
  const summaries = [];
56
56
  for (const job of args.jobs) {
57
57
  const prefix = sanitizeTraceComponent(job.id);
58
+ const traceJobId = job.jobId ?? job.id;
58
59
  spans.push(...job.trace.spans.map((span) => ({
59
60
  ...span,
60
61
  id: `${prefix}:${span.id}`,
61
62
  parent_id: span.parent_id ? `${prefix}:${span.parent_id}` : null,
62
63
  stage_id: args.stageId ?? span.stage_id,
63
64
  stage_run_index: null,
64
- attributes: withTraceJobId(span.attributes, job.id),
65
+ attributes: withTraceJobId(span.attributes, traceJobId),
65
66
  })));
66
67
  events.push(...job.trace.events.map((event) => ({
67
68
  ...event,
@@ -69,7 +70,7 @@ export function mergeWorkbenchExecutionTracesByJob(args) {
69
70
  span_id: `${prefix}:${event.span_id}`,
70
71
  stage_id: args.stageId ?? event.stage_id,
71
72
  stage_run_index: null,
72
- attributes: withTraceJobId(event.attributes, job.id),
73
+ attributes: withTraceJobId(event.attributes, traceJobId),
73
74
  })));
74
75
  summaries.push(...job.trace.summaries.map((summary) => ({
75
76
  ...summary,
@@ -84,6 +85,308 @@ export function mergeWorkbenchExecutionTracesByJob(args) {
84
85
  summaries: summaries.sort(compareTraceSummaries),
85
86
  };
86
87
  }
88
+ export function buildWorkbenchTraceSessionsFromFiles(args) {
89
+ return args.files
90
+ .filter((file) => file.encoding === "utf8" && file.path.endsWith("/trace.json"))
91
+ .sort((left, right) => traceFileDisplayOrder(left.path) - traceFileDisplayOrder(right.path) ||
92
+ left.path.localeCompare(right.path))
93
+ .flatMap((file, index) => {
94
+ const trace = readWorkbenchExecutionTraceFile(file);
95
+ if (!trace) {
96
+ return [];
97
+ }
98
+ const prefix = traceFilePrefix(file.path, index);
99
+ const role = traceRoleForFilePath(file.path, args.purpose ?? null, args.fallbackRole);
100
+ return [{
101
+ id: `${args.job.id}:${prefix}`,
102
+ jobId: args.job.id,
103
+ role,
104
+ kind: traceSessionKindForFilePath(file.path, role),
105
+ label: traceSessionLabel(file.path, role),
106
+ sourcePath: file.path,
107
+ trace: prefixTraceFileIds(trace, prefix),
108
+ metadata: {
109
+ trace_file: file.path,
110
+ },
111
+ }];
112
+ });
113
+ }
114
+ export function combineWorkbenchTraceSessions(sessions) {
115
+ return {
116
+ trace_id: sessions.length === 1 ? sessions[0].trace.trace_id : "combined-job-trace",
117
+ spans: sessions.flatMap((session) => session.trace.spans).sort(compareTraceSpans),
118
+ events: sessions.flatMap((session) => session.trace.events).sort(compareTraceEvents),
119
+ summaries: sessions.flatMap((session) => session.trace.summaries).sort(compareTraceSummaries),
120
+ };
121
+ }
122
+ export function readWorkbenchExecutionTraceFiles(files) {
123
+ const traces = files
124
+ .filter((file) => file.encoding === "utf8" && file.path.endsWith("/trace.json"))
125
+ .sort((left, right) => left.path.localeCompare(right.path))
126
+ .flatMap((file, index) => {
127
+ const trace = readWorkbenchExecutionTraceFile(file);
128
+ return trace ? [prefixTraceFileIds(trace, traceFilePrefix(file.path, index))] : [];
129
+ });
130
+ if (traces.length === 0) {
131
+ return null;
132
+ }
133
+ return {
134
+ trace_id: traces.length === 1 ? traces[0].trace_id : "combined-job-trace",
135
+ spans: traces.flatMap((trace) => trace.spans).sort(compareTraceSpans),
136
+ events: traces.flatMap((trace) => trace.events).sort(compareTraceEvents),
137
+ summaries: traces.flatMap((trace) => trace.summaries).sort(compareTraceSummaries),
138
+ };
139
+ }
140
+ export function traceSessionLabel(filePath, role) {
141
+ const innerPath = traceSessionInnerPath(filePath);
142
+ if (innerPath === "runner/session") {
143
+ return "Subject runner";
144
+ }
145
+ if (innerPath === "optimizer/session") {
146
+ return "Optimizer";
147
+ }
148
+ const parts = innerPath
149
+ .split("/")
150
+ .filter((part) => part.length > 0 && part !== "session");
151
+ const label = parts.slice(-2).join(" ") || role;
152
+ return formatTraceLabelText(label);
153
+ }
154
+ function traceSessionKindForFilePath(filePath, role) {
155
+ const innerPath = traceSessionInnerPath(filePath);
156
+ const parts = innerPath
157
+ .split("/")
158
+ .filter((part) => part.length > 0 && part !== "session");
159
+ return parts.at(-1) ?? role;
160
+ }
161
+ function readWorkbenchExecutionTraceFile(file) {
162
+ const traceRecord = parseJsonObject(file.content);
163
+ if (!traceRecord) {
164
+ return null;
165
+ }
166
+ const spans = Array.isArray(traceRecord.spans)
167
+ ? traceRecord.spans.map(readTraceSpan).filter((span) => span !== null)
168
+ : [];
169
+ const events = Array.isArray(traceRecord.events)
170
+ ? traceRecord.events.map(readTraceEvent).filter((event) => event !== null)
171
+ : [];
172
+ const summaries = Array.isArray(traceRecord.summaries)
173
+ ? traceRecord.summaries.map(readTraceSummary).filter((summary) => summary !== null)
174
+ : [];
175
+ if (spans.length === 0 && events.length === 0 && summaries.length === 0) {
176
+ return null;
177
+ }
178
+ return {
179
+ trace_id: readString(traceRecord.trace_id) ?? "agent-trace",
180
+ spans,
181
+ events,
182
+ summaries,
183
+ };
184
+ }
185
+ function prefixTraceFileIds(trace, prefix) {
186
+ return {
187
+ trace_id: `${prefix}:${trace.trace_id}`,
188
+ spans: trace.spans.map((span) => ({
189
+ ...span,
190
+ id: `${prefix}:${span.id}`,
191
+ parent_id: span.parent_id ? `${prefix}:${span.parent_id}` : null,
192
+ attributes: {
193
+ ...span.attributes,
194
+ trace_file: prefix,
195
+ },
196
+ })),
197
+ events: trace.events.map((event) => ({
198
+ ...event,
199
+ id: `${prefix}:${event.id}`,
200
+ span_id: `${prefix}:${event.span_id}`,
201
+ attributes: {
202
+ ...event.attributes,
203
+ trace_file: prefix,
204
+ },
205
+ })),
206
+ summaries: trace.summaries.map((summary) => ({ ...summary })),
207
+ };
208
+ }
209
+ function traceFilePrefix(filePath, index) {
210
+ const safe = filePath
211
+ .replace(/^\.workbench\/traces\//u, "")
212
+ .replace(/\/trace\.json$/u, "")
213
+ .replace(/[^A-Za-z0-9_-]+/g, "-")
214
+ .replace(/^-+|-+$/g, "");
215
+ return safe || `trace-${index + 1}`;
216
+ }
217
+ function traceFileDisplayOrder(filePath) {
218
+ if (filePath.includes("/runner/") || filePath.includes("/optimizer/")) {
219
+ return 0;
220
+ }
221
+ return 1;
222
+ }
223
+ function traceRoleForFilePath(filePath, purpose, fallbackRole) {
224
+ if (filePath.includes("/runner/")) {
225
+ return "runner";
226
+ }
227
+ if (filePath.includes("/optimizer/") || purpose === "improve") {
228
+ return "optimizer";
229
+ }
230
+ if (filePath.includes("/engine/")) {
231
+ return "engine";
232
+ }
233
+ return fallbackRole;
234
+ }
235
+ function traceSessionInnerPath(filePath) {
236
+ const withoutTraceFile = filePath.replace(/\/trace\.json$/u, "");
237
+ const markerIndexes = [
238
+ withoutTraceFile.indexOf("/runner/"),
239
+ withoutTraceFile.indexOf("/optimizer/"),
240
+ withoutTraceFile.indexOf("/engine/"),
241
+ ].filter((index) => index >= 0);
242
+ const firstMarker = Math.min(...markerIndexes);
243
+ if (Number.isFinite(firstMarker)) {
244
+ return withoutTraceFile.slice(firstMarker + 1);
245
+ }
246
+ return withoutTraceFile.replace(/^\.workbench\/traces\/[^/]+\//u, "");
247
+ }
248
+ function formatTraceLabelText(value) {
249
+ return value
250
+ .replace(/[_-]+/g, " ")
251
+ .replace(/\s+/g, " ")
252
+ .trim()
253
+ .replace(/^\w/u, (match) => match.toUpperCase());
254
+ }
255
+ function readTraceSpan(value) {
256
+ const record = jsonRecord(value);
257
+ if (!record) {
258
+ return null;
259
+ }
260
+ const id = readString(record.id);
261
+ const kind = traceSpanKind(record.kind);
262
+ const status = traceStatus(record.status);
263
+ const startedAt = readString(record.started_at);
264
+ if (!id || !kind || !status || !startedAt) {
265
+ return null;
266
+ }
267
+ return {
268
+ id,
269
+ parent_id: readString(record.parent_id),
270
+ attempt_number: readPositiveInteger(record.attempt_number) ?? 1,
271
+ stage_id: readString(record.stage_id),
272
+ stage_run_index: readInteger(record.stage_run_index),
273
+ kind,
274
+ title: readString(record.title) ?? id,
275
+ status,
276
+ started_at: startedAt,
277
+ ended_at: readString(record.ended_at),
278
+ attributes: (jsonRecord(record.attributes) ?? {}),
279
+ };
280
+ }
281
+ function readTraceEvent(value) {
282
+ const record = jsonRecord(value);
283
+ if (!record) {
284
+ return null;
285
+ }
286
+ const id = readString(record.id);
287
+ const spanId = readString(record.span_id);
288
+ const kind = traceEventKind(record.kind);
289
+ const at = readString(record.at);
290
+ if (!id || !spanId || !kind || !at) {
291
+ return null;
292
+ }
293
+ return {
294
+ id,
295
+ span_id: spanId,
296
+ attempt_number: readPositiveInteger(record.attempt_number) ?? 1,
297
+ stage_id: readString(record.stage_id),
298
+ stage_run_index: readInteger(record.stage_run_index),
299
+ kind,
300
+ at,
301
+ message: readString(record.message) ?? kind,
302
+ attributes: (jsonRecord(record.attributes) ?? {}),
303
+ };
304
+ }
305
+ function readTraceSummary(value) {
306
+ const record = jsonRecord(value);
307
+ if (!record) {
308
+ return null;
309
+ }
310
+ const status = traceStatus(record.status);
311
+ const startedAt = readString(record.started_at);
312
+ if (!status || !startedAt) {
313
+ return null;
314
+ }
315
+ return {
316
+ attempt_number: readPositiveInteger(record.attempt_number) ?? 1,
317
+ stage_id: readString(record.stage_id),
318
+ stage_run_index: readInteger(record.stage_run_index),
319
+ status,
320
+ started_at: startedAt,
321
+ ended_at: readString(record.ended_at),
322
+ duration_ms: readNonNegativeInteger(record.duration_ms) ?? 0,
323
+ tool_call_count: readNonNegativeInteger(record.tool_call_count) ?? 0,
324
+ input_tokens: readNonNegativeInteger(record.input_tokens),
325
+ output_tokens: readNonNegativeInteger(record.output_tokens),
326
+ usage: jsonRecord(record.usage),
327
+ final_output_present: record.final_output_present === true,
328
+ error_message: readString(record.error_message),
329
+ };
330
+ }
331
+ function parseJsonObject(source) {
332
+ try {
333
+ return jsonRecord(JSON.parse(source));
334
+ }
335
+ catch {
336
+ return null;
337
+ }
338
+ }
339
+ function jsonRecord(value) {
340
+ return value && typeof value === "object" && !Array.isArray(value)
341
+ ? value
342
+ : null;
343
+ }
344
+ function readString(value) {
345
+ return typeof value === "string" && value.length > 0 ? value : null;
346
+ }
347
+ function readInteger(value) {
348
+ return typeof value === "number" && Number.isInteger(value) ? value : null;
349
+ }
350
+ function readPositiveInteger(value) {
351
+ const integer = readInteger(value);
352
+ return integer !== null && integer > 0 ? integer : null;
353
+ }
354
+ function readNonNegativeInteger(value) {
355
+ const integer = readInteger(value);
356
+ return integer !== null && integer >= 0 ? integer : null;
357
+ }
358
+ function traceSpanKind(value) {
359
+ return value === "hook" ||
360
+ value === "stage" ||
361
+ value === "turn" ||
362
+ value === "tool_call" ||
363
+ value === "assistant_output" ||
364
+ value === "usage" ||
365
+ value === "gate" ||
366
+ value === "action" ||
367
+ value === "error"
368
+ ? value
369
+ : null;
370
+ }
371
+ function traceEventKind(value) {
372
+ return value === "status" ||
373
+ value === "message" ||
374
+ value === "output" ||
375
+ value === "usage" ||
376
+ value === "error" ||
377
+ value === "note"
378
+ ? value
379
+ : null;
380
+ }
381
+ function traceStatus(value) {
382
+ return value === "running" ||
383
+ value === "completed" ||
384
+ value === "failed" ||
385
+ value === "canceled" ||
386
+ value === "warning"
387
+ ? value
388
+ : null;
389
+ }
87
390
  function withTraceJobId(attributes, jobId) {
88
391
  return {
89
392
  ...attributes,
@@ -1,4 +1,4 @@
1
- import type { EngineResolveBinding, SurfaceSnapshotFile, WorkbenchAdapterInvocation, WorkbenchExecutionNetworkPolicy, WorkbenchExecutionResources, WorkbenchSpecValidation } from "@workbench-ai/workbench-contract";
1
+ import { type EngineResolveBinding, type SurfaceSnapshotFile, type WorkbenchAdapterInvocation, type WorkbenchExecutionNetworkPolicy, type WorkbenchExecutionResources, type WorkbenchSpecValidation } from "@workbench-ai/workbench-contract";
2
2
  import type { WorkbenchEngineCase, WorkbenchEngineCaseSpec } from "@workbench-ai/workbench-protocol";
3
3
  export declare const BENCHMARK_SPEC_FILE = "benchmark.yaml";
4
4
  export interface WorkbenchRuntimeSpec {
@@ -15,6 +15,9 @@ export interface WorkbenchRuntimeSpec {
15
15
  export interface WorkbenchPathRef {
16
16
  path: string;
17
17
  }
18
+ export interface WorkbenchSubjectPrepareSpec {
19
+ command: string;
20
+ }
18
21
  export interface AuthoredBenchmarkSpec {
19
22
  version: 3;
20
23
  name: string;
@@ -27,6 +30,7 @@ export interface WorkbenchSubjectManifestSpec {
27
30
  name: string;
28
31
  description?: string;
29
32
  files: WorkbenchPathRef;
33
+ prepare?: WorkbenchSubjectPrepareSpec;
30
34
  adapters: string[];
31
35
  run: WorkbenchAdapterInvocation;
32
36
  }
@@ -58,6 +62,7 @@ export interface GenericRunSpec {
58
62
  name: string;
59
63
  description?: string;
60
64
  files: WorkbenchPathRef;
65
+ prepare?: WorkbenchSubjectPrepareSpec;
61
66
  };
62
67
  optimizer?: {
63
68
  name: string;
@@ -105,8 +110,8 @@ export declare function engineCaseFilesForRuntimeInput(args: {
105
110
  spec: GenericRunSpec;
106
111
  engineCase: WorkbenchEngineCase;
107
112
  }): SurfaceSnapshotFile[];
108
- export declare function engineCaseSubjectVisibleFiles(engineCase: WorkbenchEngineCase): SurfaceSnapshotFile[];
109
- export declare function engineCaseEnginePrivateFiles(engineCase: WorkbenchEngineCase): SurfaceSnapshotFile[];
113
+ export declare function engineCasePublicFiles(engineCase: WorkbenchEngineCase): SurfaceSnapshotFile[];
114
+ export declare function engineCasePrivateFiles(engineCase: WorkbenchEngineCase): SurfaceSnapshotFile[];
110
115
  export declare function runtimeResources(runtime: WorkbenchRuntimeSpec): WorkbenchExecutionResources;
111
116
  export declare function runtimeNetwork(runtime: WorkbenchRuntimeSpec): WorkbenchExecutionNetworkPolicy;
112
117
  export declare function runtimeSandboxRef(runtime: WorkbenchRuntimeSpec): string;
@@ -1 +1 @@
1
- {"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,oBAAoB,EAEpB,mBAAmB,EACnB,0BAA0B,EAC1B,+BAA+B,EAC/B,2BAA2B,EAC3B,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,mBAAmB,EACnB,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAG1C,eAAO,MAAM,mBAAmB,mBAAmB,CAAC;AAEpD,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE;QACV,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;IACF,OAAO,CAAC,EAAE,+BAA+B,CAAC;CAC3C;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;CACpC;AAED,MAAM,WAAW,4BAA4B;IAC3C,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,gBAAgB,CAAC;IACxB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAED,MAAM,MAAM,mBAAmB,GAAG,4BAA4B,CAAC;AAE/D,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,EAAE,0BAA0B,CAAC;CACrC;AAED,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,CAAC,CAAC;IACX,SAAS,EAAE,qBAAqB,CAAC;IACjC,OAAO,EAAE,mBAAmB,CAAC;IAC7B,SAAS,CAAC,EAAE,qBAAqB,CAAC;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE;QACT,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,MAAM,EAAE,0BAA0B,CAAC;KACpC,CAAC;IACF,OAAO,EAAE;QACP,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,gBAAgB,CAAC;KACzB,CAAC;IACF,SAAS,CAAC,EAAE;QACV,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,MAAM,EAAE,CAAC;KACjB,CAAC;IACF,WAAW,EAAE,oBAAoB,CAAC;IAClC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;IACnC,aAAa,EAAE,0BAA0B,CAAC;IAC1C,OAAO,CAAC,EAAE,0BAA0B,CAAC;IACrC,GAAG,EAAE,0BAA0B,CAAC;IAChC,SAAS,EAAE,0BAA0B,CAAC;CACvC;AAED,MAAM,MAAM,qBAAqB,GAAG,uBAAuB,CAAC;AAC5D,YAAY,EAAE,mBAAmB,EAAE,MAAM,kCAAkC,CAAC;AAE5E,MAAM,WAAW,iCAAiC;IAChD,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,oBAAoB,CAAC;IAClC,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAQD,eAAO,MAAM,2BAA2B,EAAE,2BAKzC,CAAC;AAEF,wBAAgB,mCAAmC,CACjD,MAAM,EAAE,MAAM,GACb,uBAAuB,CAmBzB;AAED,wBAAgB,kCAAkC,CAChD,MAAM,EAAE,MAAM,GACb,cAAc,CAsChB;AAED,wBAAgB,iCAAiC,CAC/C,MAAM,EAAE,MAAM,GACb,oBAAoB,CAEtB;AAED,wBAAgB,2BAA2B,CACzC,IAAI,EAAE,cAAc,GACnB,oBAAoB,CAStB;AAED,wBAAgB,2BAA2B,CAAC,IAAI,EAAE;IAChD,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACjC,GAAG,cAAc,CAMjB;AAED,wBAAgB,yBAAyB,CAAC,IAAI,EAAE;IAC9C,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACjC,GAAG,uBAAuB,CA4B1B;AAED,wBAAgB,oCAAoC,CAClD,MAAM,EAAE,uBAAuB,GAC9B,MAAM,CAER;AAED,wBAAgB,8BAA8B,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAIxE;AAED,wBAAgB,gCAAgC,CAAC,IAAI,EAAE;IACrD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,qBAAqB,CAAC;CACnC,GAAG,iCAAiC,CAMpC;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE,cAAc,GAAG,0BAA0B,CAE/F;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE;IACnD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,mBAAmB,CAAC;CACjC,GAAG,mBAAmB,EAAE,CAGxB;AAED,wBAAgB,6BAA6B,CAC3C,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,4BAA4B,CAC1C,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,oBAAoB,GAC5B,2BAA2B,CAiB7B;AAED,wBAAgB,cAAc,CAC5B,OAAO,EAAE,oBAAoB,GAC5B,+BAA+B,CAEjC;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM,CAEvE"}
1
+ {"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,KAAK,oBAAoB,EAEzB,KAAK,mBAAmB,EACxB,KAAK,0BAA0B,EAC/B,KAAK,+BAA+B,EACpC,KAAK,2BAA2B,EAChC,KAAK,uBAAuB,EAC7B,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,mBAAmB,EACnB,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAG1C,eAAO,MAAM,mBAAmB,mBAAmB,CAAC;AAEpD,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE;QACV,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;IACF,OAAO,CAAC,EAAE,+BAA+B,CAAC;CAC3C;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,2BAA2B;IAC1C,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;CACpC;AAED,MAAM,WAAW,4BAA4B;IAC3C,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,gBAAgB,CAAC;IACxB,OAAO,CAAC,EAAE,2BAA2B,CAAC;IACtC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAED,MAAM,MAAM,mBAAmB,GAAG,4BAA4B,CAAC;AAE/D,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,EAAE,0BAA0B,CAAC;CACrC;AAED,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,CAAC,CAAC;IACX,SAAS,EAAE,qBAAqB,CAAC;IACjC,OAAO,EAAE,mBAAmB,CAAC;IAC7B,SAAS,CAAC,EAAE,qBAAqB,CAAC;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE;QACT,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,MAAM,EAAE,0BAA0B,CAAC;KACpC,CAAC;IACF,OAAO,EAAE;QACP,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,gBAAgB,CAAC;QACxB,OAAO,CAAC,EAAE,2BAA2B,CAAC;KACvC,CAAC;IACF,SAAS,CAAC,EAAE;QACV,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,MAAM,EAAE,CAAC;KACjB,CAAC;IACF,WAAW,EAAE,oBAAoB,CAAC;IAClC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;IACnC,aAAa,EAAE,0BAA0B,CAAC;IAC1C,OAAO,CAAC,EAAE,0BAA0B,CAAC;IACrC,GAAG,EAAE,0BAA0B,CAAC;IAChC,SAAS,EAAE,0BAA0B,CAAC;CACvC;AAED,MAAM,MAAM,qBAAqB,GAAG,uBAAuB,CAAC;AAC5D,YAAY,EAAE,mBAAmB,EAAE,MAAM,kCAAkC,CAAC;AAE5E,MAAM,WAAW,iCAAiC;IAChD,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,oBAAoB,CAAC;IAClC,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAQD,eAAO,MAAM,2BAA2B,EAAE,2BAKzC,CAAC;AAEF,wBAAgB,mCAAmC,CACjD,MAAM,EAAE,MAAM,GACb,uBAAuB,CAmBzB;AAED,wBAAgB,kCAAkC,CAChD,MAAM,EAAE,MAAM,GACb,cAAc,CAsChB;AAED,wBAAgB,iCAAiC,CAC/C,MAAM,EAAE,MAAM,GACb,oBAAoB,CAEtB;AAED,wBAAgB,2BAA2B,CACzC,IAAI,EAAE,cAAc,GACnB,oBAAoB,CAStB;AAED,wBAAgB,2BAA2B,CAAC,IAAI,EAAE;IAChD,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACjC,GAAG,cAAc,CAMjB;AAED,wBAAgB,yBAAyB,CAAC,IAAI,EAAE;IAC9C,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACjC,GAAG,uBAAuB,CA4B1B;AAED,wBAAgB,oCAAoC,CAClD,MAAM,EAAE,uBAAuB,GAC9B,MAAM,CAER;AAED,wBAAgB,8BAA8B,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAIxE;AAED,wBAAgB,gCAAgC,CAAC,IAAI,EAAE;IACrD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,qBAAqB,CAAC;CACnC,GAAG,iCAAiC,CAMpC;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE,cAAc,GAAG,0BAA0B,CAE/F;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE;IACnD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,mBAAmB,CAAC;CACjC,GAAG,mBAAmB,EAAE,CAGxB;AAED,wBAAgB,qBAAqB,CACnC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,sBAAsB,CACpC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,oBAAoB,GAC5B,2BAA2B,CAiB7B;AAED,wBAAgB,cAAc,CAC5B,OAAO,EAAE,oBAAoB,GAC5B,+BAA+B,CAEjC;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM,CAEvE"}
@@ -1,4 +1,5 @@
1
1
  import { createHash } from "node:crypto";
2
+ import { isWorkbenchExecutionNetworkEgress, } from "@workbench-ai/workbench-contract";
2
3
  import YAML from "yaml";
3
4
  export const BENCHMARK_SPEC_FILE = "benchmark.yaml";
4
5
  export const DEFAULT_EXECUTION_RESOURCES = {
@@ -110,15 +111,15 @@ export function engineResolveInvocationForSpec(spec) {
110
111
  }
111
112
  export function engineCaseFilesForRuntimeInput(args) {
112
113
  void args.spec;
113
- return engineCaseSubjectVisibleFiles(args.engineCase);
114
+ return engineCasePublicFiles(args.engineCase);
114
115
  }
115
- export function engineCaseSubjectVisibleFiles(engineCase) {
116
- return (engineCase.files.subjectVisible ?? [])
116
+ export function engineCasePublicFiles(engineCase) {
117
+ return (engineCase.files.public ?? [])
117
118
  .map((file) => ({ ...file }))
118
119
  .sort((left, right) => left.path.localeCompare(right.path));
119
120
  }
120
- export function engineCaseEnginePrivateFiles(engineCase) {
121
- return (engineCase.files.enginePrivate ?? [])
121
+ export function engineCasePrivateFiles(engineCase) {
122
+ return (engineCase.files.private ?? [])
122
123
  .map((file) => ({ ...file }))
123
124
  .sort((left, right) => left.path.localeCompare(right.path));
124
125
  }
@@ -154,6 +155,7 @@ function genericSpecFromAuthoredBundle(source) {
154
155
  name: source.subject.name,
155
156
  ...(source.subject.description ? { description: source.subject.description } : {}),
156
157
  files: cloneJson(source.subject.files),
158
+ ...(source.subject.prepare ? { prepare: cloneJson(source.subject.prepare) } : {}),
157
159
  },
158
160
  ...(source.optimizer
159
161
  ? {
@@ -229,6 +231,7 @@ function normalizeSubjectRecord(record, label, errors) {
229
231
  "name",
230
232
  "description",
231
233
  "files",
234
+ "prepare",
232
235
  "adapters",
233
236
  "run",
234
237
  ], errors);
@@ -236,6 +239,7 @@ function normalizeSubjectRecord(record, label, errors) {
236
239
  const name = readRequiredString(record.name, `${label}.name`, errors);
237
240
  const description = readOptionalString(record.description, `${label}.description`, errors);
238
241
  const files = normalizePathRef(record.files, `${label}.files`, errors);
242
+ const prepare = normalizeSubjectPrepare(record.prepare, `${label}.prepare`, errors);
239
243
  const adapters = normalizeAdapterSources(record.adapters, `${label}.adapters`, errors);
240
244
  const run = normalizePhaseAdapter(record.run, `${label}.run`, errors);
241
245
  return name && files && run
@@ -244,11 +248,24 @@ function normalizeSubjectRecord(record, label, errors) {
244
248
  name,
245
249
  ...(description ? { description } : {}),
246
250
  files,
251
+ ...(prepare ? { prepare } : {}),
247
252
  adapters,
248
253
  run,
249
254
  }
250
255
  : null;
251
256
  }
257
+ function normalizeSubjectPrepare(value, label, errors) {
258
+ if (value === undefined) {
259
+ return undefined;
260
+ }
261
+ const record = readRequiredRecord(value, label, errors);
262
+ if (!record) {
263
+ return undefined;
264
+ }
265
+ rejectUnknownKeys(record, label, ["command"], errors);
266
+ const command = readRequiredString(record.command, `${label}.command`, errors);
267
+ return command ? { command } : undefined;
268
+ }
252
269
  function normalizeOptimizerRecord(record, label, errors) {
253
270
  if (!record) {
254
271
  return null;
@@ -438,41 +455,13 @@ function normalizeAdapterSources(value, label, errors) {
438
455
  return [...new Set(sources)];
439
456
  }
440
457
  function normalizeNetworkConfig(network, label, errors) {
441
- rejectUnknownKeys(network, label, ["egress", "allow"], errors);
458
+ rejectUnknownKeys(network, label, ["egress"], errors);
442
459
  const egress = readOptionalString(network.egress, `${label}.egress`, errors) ?? "open";
443
- if (egress !== "none" && egress !== "open" && egress !== "allowlist") {
444
- errors.push(`${label}.egress must be none, open, or allowlist.`);
460
+ if (!isWorkbenchExecutionNetworkEgress(egress)) {
461
+ errors.push(`${label}.egress must be none or open.`);
445
462
  return null;
446
463
  }
447
- const allow = network.allow === undefined
448
- ? undefined
449
- : normalizeNetworkAllowList(network.allow, `${label}.allow`, errors);
450
- if (egress !== "allowlist") {
451
- if (network.allow !== undefined) {
452
- errors.push(`${label}.allow is only supported when ${label}.egress is allowlist.`);
453
- }
454
- return { egress };
455
- }
456
- if (!allow || allow.length === 0) {
457
- errors.push(`${label}.allow must contain at least one host when ${label}.egress is allowlist.`);
458
- }
459
- return {
460
- egress,
461
- ...(allow && allow.length > 0 ? { allow } : {}),
462
- };
463
- }
464
- function normalizeNetworkAllowList(value, label, errors) {
465
- if (!Array.isArray(value)) {
466
- errors.push(`${label} must be an array of hosts.`);
467
- return [];
468
- }
469
- return value.flatMap((entry, index) => {
470
- if (typeof entry !== "string" || entry.trim().length === 0) {
471
- errors.push(`${label}[${index}] must be a non-empty string.`);
472
- return [];
473
- }
474
- return [entry.trim()];
475
- });
464
+ return { egress };
476
465
  }
477
466
  function normalizePhaseAdapter(value, label, errors) {
478
467
  const spec = readAdapterRecord(value, label, errors);
package/dist/index.d.ts CHANGED
@@ -1,16 +1,18 @@
1
- import type { AuthoredWorkbenchSourceDocument, SubjectCasePhaseRef, SubjectCaseReview, SubjectFilePreview, SubjectFileSummary, SubjectLineageGraph, SubjectRecord, SubjectSummary, EvalCaseResult, EvaluationResultRecord, HostedWorkbenchEnvironment, HostedWorkbenchEnvironmentVersion, HostedWorkbenchFileInput, HostedWorkbenchJob, Json, RuntimeEvent, SurfaceSnapshotFile, UsageSummary, WorkbenchSubjectPatch, WorkbenchExecutionSpec, WorkbenchResult } from "@workbench-ai/workbench-contract";
1
+ import type { AuthoredWorkbenchSourceDocument, SubjectCaseExecutionRef, SubjectCaseReview, SubjectFilePreview, SubjectFileSummary, SubjectLineageGraph, SubjectRecord, SubjectSummary, EvalCaseResult, EvaluationScorecard, HostedWorkbenchEnvironment, HostedWorkbenchEnvironmentVersion, HostedWorkbenchFileInput, HostedWorkbenchJob, Json, RuntimeEvent, SurfaceSnapshotFile, UsageSummary, WorkbenchSubjectPatch, WorkbenchExecutionCapability, WorkbenchExecutionSpec, WorkbenchResult } from "@workbench-ai/workbench-contract";
2
+ import { type WorkbenchAdapterOperationExecutor, type WorkbenchAdapterOperationResult } from "@workbench-ai/workbench-protocol";
2
3
  import { type GenericEngineCaseSpec, type GenericRunSpec, type WorkbenchEngineCase } from "./generic-spec.ts";
3
4
  import type { WorkbenchExecutionRuntimeInput } from "./execution-runtime-types.ts";
4
5
  import { createWorkbenchExecutionCapability, type SandboxExecutionFileStore, type SandboxPlane } from "./sandbox-plane.ts";
5
- export { BENCHMARK_SPEC_FILE, DEFAULT_EXECUTION_RESOURCES, engineCaseEnginePrivateFiles, engineCaseFilesForRuntimeInput, engineCaseSubjectVisibleFiles, engineResolveInvocationForSpec, engineResolveBindingForSpec, engineResolveBindingForSourceYaml, isWorkbenchSubjectManifestPath, parseWorkbenchSourceFiles, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, resolveWorkbenchSourceFiles, serializeWorkbenchResolvedSourceYaml, validateWorkbenchResolvedSourceYaml, type AuthoredBenchmarkSpec, type AuthoredOptimizerSpec, type GenericRunSpec, type GenericEngineCaseSpec, type ResolvedSubjectSpec, type WorkbenchEngineCase, type WorkbenchResolvedSource, type WorkbenchSubjectManifestSpec, } from "./generic-spec.ts";
6
- export { adapterCommandName, cloneWorkbenchAdapterManifest, collectWorkbenchAdapterAuthRequirements, collectWorkbenchAdapterInvocations, parseWorkbenchAdapterManifest, workbenchAdapterManifestRequiresAuth, workbenchAdapterManifestSupportsOperation, workbenchAdapterOperationCommand, withDefaultWorkbenchAdapterAuth, withDefaultWorkbenchAdapterAuthProfiles, type WorkbenchPrimitiveAdapterOperation, type WorkbenchAdapterOperation, type WorkbenchAdapterOperationManifest, type WorkbenchAdapterSlotManifest, type WorkbenchAdapterAuthRequirement, type WorkbenchAdapterAuthManifest, type WorkbenchAdapterAuthMethodManifest, type WorkbenchAdapterInvocationLike, type WorkbenchAdapterManifest, } from "@workbench-ai/workbench-protocol";
6
+ export { BENCHMARK_SPEC_FILE, DEFAULT_EXECUTION_RESOURCES, engineCasePrivateFiles, engineCaseFilesForRuntimeInput, engineCasePublicFiles, engineResolveInvocationForSpec, engineResolveBindingForSpec, engineResolveBindingForSourceYaml, isWorkbenchSubjectManifestPath, parseWorkbenchSourceFiles, resolveEngineCaseExecutionConfig, resolveWorkbenchResolvedSourceYaml, resolveWorkbenchSourceFiles, runtimeNetwork, runtimeResources, serializeWorkbenchResolvedSourceYaml, validateWorkbenchResolvedSourceYaml, type AuthoredBenchmarkSpec, type AuthoredOptimizerSpec, type GenericRunSpec, type GenericEngineCaseSpec, type ResolvedSubjectSpec, type WorkbenchEngineCase, type WorkbenchResolvedSource, type WorkbenchSubjectManifestSpec, } from "./generic-spec.ts";
7
+ export { composeRuntimeDockerfileWithAdapterInstallers, type WorkbenchRuntimeAdapterInstaller, type WorkbenchRuntimeAdapterInstallerFile, } from "./runtime-dockerfile.ts";
8
+ export { adapterCommandName, cloneWorkbenchAdapterManifest, collectWorkbenchAdapterAuthRequirements, collectWorkbenchAdapterInvocations, parseWorkbenchAdapterManifest, workbenchAdapterManifestRequiresAuth, workbenchAdapterManifestSupportsOperation, workbenchAdapterOperationCommand, workbenchAdapterOperationExecutor, withDefaultWorkbenchAdapterAuth, withDefaultWorkbenchAdapterAuthProfiles, type WorkbenchPrimitiveAdapterOperation, type WorkbenchAdapterOperation, type WorkbenchAdapterOperationExecutor, type WorkbenchAdapterOperationManifest, type WorkbenchAdapterSlotManifest, type WorkbenchAdapterAuthRequirement, type WorkbenchAdapterAuthManifest, type WorkbenchAdapterAuthMethodManifest, type WorkbenchAdapterInvocationLike, type WorkbenchAdapterManifest, } from "@workbench-ai/workbench-protocol";
7
9
  export { adapterAuthEnv, createWorkbenchAdapterAuthBundle, defaultWorkbenchAdapterAuthStoreRoot, localWorkbenchAdapterAuthStore, normalizeWorkbenchAdapterAuthTarget, parseWorkbenchAdapterAuthTarget, sanitizeWorkbenchAdapterAuthBundle, type WorkbenchAdapterAuthBundle, type WorkbenchAdapterAuthEnvVar, type WorkbenchAdapterAuthFile, type WorkbenchAdapterAuthStatus, type WorkbenchAdapterAuthStatusRecord, type WorkbenchAdapterAuthStore, type WorkbenchAdapterAuthTarget, } from "./adapter-auth.ts";
8
- export type { WorkbenchExecutionRuntimeInput, WorkbenchWorkloadPhaseCommand, } from "./execution-runtime-types.ts";
10
+ export type { WorkbenchExecutionRuntimeInput, WorkbenchWorkloadStepCommand, } from "./execution-runtime-types.ts";
9
11
  export { asRuntimeRecord, importNodeModule, nodeBuiltin, normalizeWorkbenchWorkerId, normalizeRuntimeRegistry, quoteShellArg, resolveDockerRuntimeImageRef, resolveWorkbenchWorkerId, } from "./runtime-utils.ts";
10
12
  export { assignUsageRole, extractExecutionUsageFromTrace, mergeUsageSummaries, } from "./execution-usage.ts";
11
13
  export { createWorkbenchProgressStdoutParser, publishWorkbenchProgressStdoutEnvelope, } from "./execution-events.ts";
12
14
  export { resolveSandboxTemplateImage, } from "./sandbox-backends/template-images.ts";
13
- export { readOutputTraceFiles, workbenchTracePhaseDirectory, workbenchTraceRunDirectory, workbenchTraceRunDirectoryName, } from "./trace-files.ts";
15
+ export { readOutputTraceFiles, workbenchTraceExecutionDirectory, workbenchTraceRunDirectory, workbenchTraceRunDirectoryName, } from "./trace-files.ts";
14
16
  export { assertWorkbenchAdapterOperationSupport, assertWorkbenchAdapterOperationResultOk, collectWorkbenchAdapterOperationIssues, collectWorkbenchAdapterOperationRequirements, ensureWorkbenchAdapterOutputDir, WORKBENCH_ADAPTER_RESULT_FILE, normalizeWorkbenchAdapterOperationRequest, normalizeWorkbenchAdapterOperationResult, readWorkbenchAdapterOperationRequest, readWorkbenchAdapterOperationResult, workbenchAdapterOperationResultPath, writeWorkbenchAdapterOperationResult, type WorkbenchAdapterOperationRequest, type WorkbenchAdapterOperationResult, type WorkbenchAdapterOperationResultValue, type WorkbenchAdapterOperationRequirement, type WorkbenchEngineResolveResult, type WorkbenchEngineCaseSpec, } from "@workbench-ai/workbench-protocol";
15
17
  export { applyWorkbenchSubjectPatch, type ApplyWorkbenchSubjectPatchInput, } from "./subject-patch.ts";
16
18
  export { createWorkbenchSandboxFileStore, createSandboxAdapterRequest, executionResultFromCompletedSandboxJob, materializeWorkbenchSandboxInput, readWorkbenchExecutionSpec, sanitizeWorkbenchExecutionJobForSandbox, } from "./sandbox-inputs.ts";
@@ -19,15 +21,15 @@ export { createBaselineSubjectExecution, createBaselineSubjectJob, createWorkben
19
21
  export { addCapacity, capacityFits, runWorkbenchExecutionDag, subtractCapacity, workbenchJobDependencies, workbenchJobHostCost, workbenchJobResources, type WorkbenchExecutionDagCapacity, type WorkbenchExecutionDagResult, type WorkbenchExecutionDagRunInput, } from "./execution-scheduler.ts";
20
22
  export { assertWorkbenchExecutionIsolation, collectWorkbenchExecutionIsolationIssues, validateWorkbenchExecutionOutputPayloads, type WorkbenchExecutionOutputPayloads, } from "./execution-outputs.ts";
21
23
  export { collectSandboxAllocationScopeIssues, collectExecutionCapabilityScopeIssues, collectSandboxHandleScopeIssues, createWorkbenchSandboxAllocation, createWorkbenchSandboxExecutionMetadata, createWorkbenchExecutionCapability, executeValidatedSandboxExecution, type SandboxExecutionFileStore, type SandboxExecutionOptions, type SandboxBackendCapabilities, type SandboxBackendDescriptor, type SandboxCreateRequest, type SandboxEnvironmentImage, type SandboxExecRequest, type SandboxHandle, type SandboxMaterializedInput, type SandboxPlane, type ValidatedSandboxExecutionResult, } from "./sandbox-plane.ts";
22
- export { buildSubjectCasePhaseRefs, buildWorkbenchTracePhases, isWorkbenchPhaseActive, readWorkbenchExecutionId, readWorkbenchExecutionMetadataNumber, readWorkbenchExecutionMetadataString, readWorkbenchExecutionPurpose, resolveWorkbenchJobGroupStatus, } from "./execution-phases.ts";
23
- export { finalizeWorkbenchExecutionTraceForJob, mergeWorkbenchExecutionTracesByJob, type WorkbenchTraceMergeJob, } from "./execution-traces.ts";
24
+ export { buildSubjectCaseExecutionRefs, buildWorkbenchExecutionEvidence, isWorkbenchExecutionActive, readWorkbenchExecutionId, readWorkbenchExecutionMetadataNumber, readWorkbenchExecutionMetadataString, readWorkbenchExecutionPurpose, resolveWorkbenchJobGroupStatus, } from "./execution-evidence.ts";
25
+ export { buildWorkbenchTraceSessionsFromFiles, combineWorkbenchTraceSessions, finalizeWorkbenchExecutionTraceForJob, mergeWorkbenchExecutionTracesByJob, readWorkbenchExecutionTraceFiles, traceSessionLabel, type WorkbenchTraceMergeJob, } from "./execution-traces.ts";
24
26
  export { DOCKER_SANDBOX_BACKEND, assertSandboxHostHealthForProvider, createDockerSandboxBackendDescriptor, createDockerSandboxPlane, resolveWorkbenchSandboxProviderName, sandboxProviderAdmissionForResources, sandboxProviderDefaultMaxConcurrentJobs, sandboxProviderLeaseScope, sandboxHostHealthExpectationForProvider, type SandboxProviderAdmission, type SandboxProviderHostCost, type SandboxProviderLeaseRequest, type SandboxProviderRequestedResources, type SandboxHostHealthExpectation, type WorkbenchSandboxProviderName, } from "./sandbox-backends/index.ts";
25
27
  export type { WorkbenchExecutionEventPublisher, WorkbenchExecutionProgressTarget, } from "./execution-events.ts";
26
- export type { SubjectCaseReview, SubjectRecord, EngineResolveBinding, EvaluationResultRecord, HostedWorkbenchJob, Json, RunSummary, RuntimeEvent, SurfaceSnapshotFile, WorkbenchExecutionCapability, WorkbenchExecutionTrace, WorkbenchSandboxHandle, WorkbenchSandboxExecutionMetadata, } from "@workbench-ai/workbench-contract";
28
+ export type { SubjectCaseReview, SubjectRecord, EngineResolveBinding, EvaluationScorecard, HostedWorkbenchJob, Json, RunSummary, RuntimeEvent, SurfaceSnapshotFile, WorkbenchExecutionCapability, WorkbenchExecutionTrace, WorkbenchTraceSession, WorkbenchSandboxHandle, WorkbenchSandboxExecutionMetadata, } from "@workbench-ai/workbench-contract";
27
29
  export interface WorkbenchRunMaterialization {
28
30
  subjects: SubjectRecord[];
29
31
  subjectFiles: Record<string, SurfaceSnapshotFile[]>;
30
- evaluations: EvaluationResultRecord[];
32
+ evaluations: EvaluationScorecard[];
31
33
  activeSubjectId: string | null;
32
34
  selectedSubject: SubjectRecord | null;
33
35
  completedJobCount: number;
@@ -52,6 +54,8 @@ export interface WorkbenchRunWorkload {
52
54
  export interface RuntimeWorkloadResult {
53
55
  files: SurfaceSnapshotFile[];
54
56
  fileChanges: string[];
57
+ operationResults?: WorkbenchAdapterOperationResult[];
58
+ workspaceFiles?: SurfaceSnapshotFile[];
55
59
  subjectPatch?: WorkbenchSubjectPatch;
56
60
  result?: WorkbenchResult;
57
61
  metrics?: Record<string, number>;
@@ -86,6 +90,7 @@ export declare function materializeWorkbenchRunResult(args: {
86
90
  previousSubject?: SubjectRecord | null;
87
91
  existingSubjectCount: number;
88
92
  }): WorkbenchRunMaterialization;
93
+ export declare function evaluationScorecardId(runId: string, subjectId: string): string;
89
94
  export declare function selectExecutionOutputFilesForInspection(args: {
90
95
  purpose: string | null | undefined;
91
96
  files: readonly SurfaceSnapshotFile[];
@@ -97,6 +102,10 @@ export declare function createSubjectRevisionTraceInputFiles(args: {
97
102
  jobs: readonly HostedWorkbenchJob[];
98
103
  events: readonly RuntimeEvent[];
99
104
  }): SurfaceSnapshotFile[];
105
+ export declare function createSubjectEvaluationTraceInputFiles(args: {
106
+ subject?: SubjectRecord | null;
107
+ path?: string;
108
+ }): SurfaceSnapshotFile[];
100
109
  export interface WorkbenchProjectSourceFilesInput {
101
110
  specSource?: string;
102
111
  specFiles?: readonly SurfaceSnapshotFile[];
@@ -134,7 +143,7 @@ export declare function createSubjectFilePreview(args: {
134
143
  export declare function createCaseReview(args: {
135
144
  subject: SubjectRecord;
136
145
  caseId: string;
137
- phases?: SubjectCasePhaseRef[];
146
+ executions?: SubjectCaseExecutionRef[];
138
147
  }): SubjectCaseReview;
139
148
  export declare function createWorkbenchRunWorkload(args: {
140
149
  job: HostedWorkbenchJob;
@@ -150,8 +159,10 @@ export interface WorkbenchExecutionJobOptions {
150
159
  createSandboxPlaneForProvider?: (provider: string, args: WorkbenchExecutionRuntimeInput, startedAt: string, fileStore: SandboxExecutionFileStore) => SandboxPlane;
151
160
  }
152
161
  export declare function executeWorkbenchExecutionJob(args: WorkbenchExecutionRuntimeInput, options: WorkbenchExecutionJobOptions): Promise<HostedWorkbenchJob>;
162
+ export declare function workbenchExecutionExecutorForRuntimeInput(args: Pick<WorkbenchExecutionRuntimeInput, "job" | "adapterManifests" | "runtimeControlOperation">): WorkbenchAdapterOperationExecutor;
153
163
  export declare function workbenchExecutionPurpose(job: HostedWorkbenchJob): WorkbenchExecutionSpec["purpose"] | null;
154
- export declare function executeAdapterInCurrentSandboxRuntime(args: WorkbenchExecutionRuntimeInput, execution: WorkbenchExecutionSpec, startedAt: string, capability: ReturnType<typeof createWorkbenchExecutionCapability>): Promise<HostedWorkbenchJob>;
164
+ export declare function executeAdapterInCurrentRuntime(args: WorkbenchExecutionRuntimeInput, execution: WorkbenchExecutionSpec, startedAt: string, capability: ReturnType<typeof createWorkbenchExecutionCapability>): Promise<HostedWorkbenchJob>;
165
+ export declare function executeRuntimeControlOperationSequenceInCurrentRuntime(args: WorkbenchExecutionRuntimeInput, execution: WorkbenchExecutionSpec, startedAt: string, capability?: WorkbenchExecutionCapability): Promise<HostedWorkbenchJob>;
155
166
  export declare function stageWorkbenchRunWorkload(root: string, workload: WorkbenchRunWorkload): Promise<void>;
156
167
  export declare function workloadTimeoutMs(spec: GenericRunSpec): number;
157
168
  export declare function findEnvironmentVersionForImage(image: string, versions: readonly HostedWorkbenchEnvironmentVersion[]): HostedWorkbenchEnvironmentVersion | null;