@workbench-ai/workbench-core 0.0.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapter-auth.d.ts +63 -0
- package/dist/adapter-auth.d.ts.map +1 -0
- package/dist/adapter-auth.js +244 -0
- package/dist/execution-events.d.ts +53 -0
- package/dist/execution-events.d.ts.map +1 -0
- package/dist/execution-events.js +195 -0
- package/dist/execution-graph.d.ts +27 -0
- package/dist/execution-graph.d.ts.map +1 -0
- package/dist/execution-graph.js +126 -0
- package/dist/execution-jobs.d.ts +70 -0
- package/dist/execution-jobs.d.ts.map +1 -0
- package/dist/execution-jobs.js +229 -0
- package/dist/execution-outputs.d.ts +9 -0
- package/dist/execution-outputs.d.ts.map +1 -0
- package/dist/execution-outputs.js +393 -0
- package/dist/execution-phases.d.ts +21 -0
- package/dist/execution-phases.d.ts.map +1 -0
- package/dist/execution-phases.js +262 -0
- package/dist/execution-runtime-types.d.ts +35 -0
- package/dist/execution-runtime-types.d.ts.map +1 -0
- package/dist/execution-runtime-types.js +1 -0
- package/dist/execution-scheduler.d.ts +31 -0
- package/dist/execution-scheduler.d.ts.map +1 -0
- package/dist/execution-scheduler.js +241 -0
- package/dist/execution-traces.d.ts +16 -0
- package/dist/execution-traces.d.ts.map +1 -0
- package/dist/execution-traces.js +164 -0
- package/dist/execution-usage.d.ts +12 -0
- package/dist/execution-usage.d.ts.map +1 -0
- package/dist/execution-usage.js +433 -0
- package/dist/generic-spec.d.ts +113 -0
- package/dist/generic-spec.d.ts.map +1 -0
- package/dist/generic-spec.js +656 -0
- package/dist/index.d.ts +160 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +2858 -0
- package/dist/model-prices-litellm.d.ts +9674 -0
- package/dist/model-prices-litellm.d.ts.map +1 -0
- package/dist/model-prices-litellm.js +9668 -0
- package/dist/runtime-utils.d.ts +18 -0
- package/dist/runtime-utils.d.ts.map +1 -0
- package/dist/runtime-utils.js +108 -0
- package/dist/sandbox-backends/docker.d.ts +5 -0
- package/dist/sandbox-backends/docker.d.ts.map +1 -0
- package/dist/sandbox-backends/docker.js +568 -0
- package/dist/sandbox-backends/index.d.ts +37 -0
- package/dist/sandbox-backends/index.d.ts.map +1 -0
- package/dist/sandbox-backends/index.js +79 -0
- package/dist/sandbox-backends/names.d.ts +6 -0
- package/dist/sandbox-backends/names.d.ts.map +1 -0
- package/dist/sandbox-backends/names.js +14 -0
- package/dist/sandbox-backends/template-images.d.ts +4 -0
- package/dist/sandbox-backends/template-images.d.ts.map +1 -0
- package/dist/sandbox-backends/template-images.js +48 -0
- package/dist/sandbox-inputs.d.ts +27 -0
- package/dist/sandbox-inputs.d.ts.map +1 -0
- package/dist/sandbox-inputs.js +220 -0
- package/dist/sandbox-plane.d.ts +89 -0
- package/dist/sandbox-plane.d.ts.map +1 -0
- package/dist/sandbox-plane.js +327 -0
- package/dist/subject-patch.d.ts +8 -0
- package/dist/subject-patch.d.ts.map +1 -0
- package/dist/subject-patch.js +63 -0
- package/dist/trace-files.d.ts +18 -0
- package/dist/trace-files.d.ts.map +1 -0
- package/dist/trace-files.js +94 -0
- package/environments/libreoffice-agent/Dockerfile +13 -0
- package/environments/libreoffice-python/Dockerfile +11 -0
- package/environments/node-22/Dockerfile +3 -0
- package/environments/python-3.12/Dockerfile +8 -0
- package/package.json +42 -0
- package/worker/sandbox-adapter-runner.cjs +275 -0
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
import { normalizeUsageSummary } from "./execution-usage.js";
|
|
2
|
+
export function validateWorkbenchExecutionOutputPayloads(execution, payloads) {
|
|
3
|
+
const issues = [];
|
|
4
|
+
const declaredOutputs = new Map(execution.outputs.map((output) => [output.name, output]));
|
|
5
|
+
for (const output of execution.outputs) {
|
|
6
|
+
if (output.required && !(output.name in payloads)) {
|
|
7
|
+
issues.push(`Execution ${execution.id} did not produce required output ${output.name}.`);
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
for (const outputName of Object.keys(payloads)) {
|
|
11
|
+
if (!declaredOutputs.has(outputName)) {
|
|
12
|
+
issues.push(`Execution ${execution.id} produced undeclared output ${outputName}.`);
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
const validated = {};
|
|
16
|
+
for (const [name, payload] of Object.entries(payloads)) {
|
|
17
|
+
const contract = declaredOutputs.get(name);
|
|
18
|
+
if (!contract) {
|
|
19
|
+
continue;
|
|
20
|
+
}
|
|
21
|
+
switch (contract.schema) {
|
|
22
|
+
case "workbench.subject_patch.v1":
|
|
23
|
+
validated.subjectPatch = normalizeSubjectPatch(payload, execution, contract, issues);
|
|
24
|
+
break;
|
|
25
|
+
case "workbench.result.v1":
|
|
26
|
+
validated.result = normalizeResult(payload, execution, contract, issues);
|
|
27
|
+
break;
|
|
28
|
+
default:
|
|
29
|
+
break;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
if (issues.length > 0) {
|
|
33
|
+
throw new Error(issues.join("\n"));
|
|
34
|
+
}
|
|
35
|
+
return validated;
|
|
36
|
+
}
|
|
37
|
+
export function collectWorkbenchExecutionIsolationIssues(execution) {
|
|
38
|
+
const issues = [];
|
|
39
|
+
if (!execution.policy.tenantId.trim()) {
|
|
40
|
+
issues.push(`Execution ${execution.id} must include a non-empty tenant id.`);
|
|
41
|
+
}
|
|
42
|
+
if (!execution.projectId.trim()) {
|
|
43
|
+
issues.push(`Execution ${execution.id} must include a project id.`);
|
|
44
|
+
}
|
|
45
|
+
if (!execution.runId.trim()) {
|
|
46
|
+
issues.push(`Execution ${execution.id} must include a run id.`);
|
|
47
|
+
}
|
|
48
|
+
if (!execution.sandbox.ref.trim()) {
|
|
49
|
+
issues.push(`Execution ${execution.id} must include a sandbox template ref.`);
|
|
50
|
+
}
|
|
51
|
+
if (execution.sandbox.kind !== "oci" && execution.sandbox.kind !== "snapshot") {
|
|
52
|
+
issues.push(`Execution ${execution.id} has unsupported sandbox kind ${execution.sandbox.kind}.`);
|
|
53
|
+
}
|
|
54
|
+
const inputNames = new Set();
|
|
55
|
+
const mountPaths = new Set();
|
|
56
|
+
const expectedInputs = expectedInputsForPurpose(execution.purpose);
|
|
57
|
+
for (const input of execution.inputs) {
|
|
58
|
+
if (!input.name.trim()) {
|
|
59
|
+
issues.push(`Execution ${execution.id} has an input with an empty name.`);
|
|
60
|
+
}
|
|
61
|
+
if (inputNames.has(input.name)) {
|
|
62
|
+
issues.push(`Execution ${execution.id} declares duplicate input ${input.name}.`);
|
|
63
|
+
}
|
|
64
|
+
inputNames.add(input.name);
|
|
65
|
+
if (!input.ref.trim()) {
|
|
66
|
+
issues.push(`Execution ${execution.id} input ${input.name} must include a ref.`);
|
|
67
|
+
}
|
|
68
|
+
if (!expectedInputs.has(input.name)) {
|
|
69
|
+
issues.push(`Execution ${execution.id} declares unsupported input ${input.name} for purpose ${execution.purpose}.`);
|
|
70
|
+
}
|
|
71
|
+
const expectedMountPath = `/workspace/input/${input.name}`;
|
|
72
|
+
if (input.mountPath !== expectedMountPath) {
|
|
73
|
+
issues.push(`Execution ${execution.id} input ${input.name} must mount at ${expectedMountPath}.`);
|
|
74
|
+
}
|
|
75
|
+
if (mountPaths.has(input.mountPath)) {
|
|
76
|
+
issues.push(`Execution ${execution.id} declares duplicate mount path ${input.mountPath}.`);
|
|
77
|
+
}
|
|
78
|
+
mountPaths.add(input.mountPath);
|
|
79
|
+
if (input.writable) {
|
|
80
|
+
issues.push(`Execution ${execution.id} inputs must be read-only.`);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
for (const expectedInput of expectedInputs) {
|
|
84
|
+
if (!inputNames.has(expectedInput)) {
|
|
85
|
+
issues.push(`Execution ${execution.id} missing required input ${expectedInput} for purpose ${execution.purpose}.`);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
const outputNames = new Set();
|
|
89
|
+
const expectedOutput = expectedOutputForPurpose(execution.purpose);
|
|
90
|
+
for (const output of execution.outputs) {
|
|
91
|
+
if (outputNames.has(output.name)) {
|
|
92
|
+
issues.push(`Execution ${execution.id} declares duplicate output ${output.name}.`);
|
|
93
|
+
}
|
|
94
|
+
outputNames.add(output.name);
|
|
95
|
+
if (expectedOutput === null) {
|
|
96
|
+
issues.push(`Execution ${execution.id} cannot declare outputs for purpose ${execution.purpose}.`);
|
|
97
|
+
}
|
|
98
|
+
else if (output.name !== expectedOutput) {
|
|
99
|
+
issues.push(`Execution ${execution.id} output for purpose ${execution.purpose} must be named ${expectedOutput}.`);
|
|
100
|
+
}
|
|
101
|
+
if (!outputAllowedForPurpose(execution.purpose, output)) {
|
|
102
|
+
issues.push(`Execution ${execution.id} cannot declare ${output.schema} for purpose ${execution.purpose}.`);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
if (expectedOutput !== null && !outputNames.has(expectedOutput)) {
|
|
106
|
+
issues.push(`Execution ${execution.id} missing required output ${expectedOutput} for purpose ${execution.purpose}.`);
|
|
107
|
+
}
|
|
108
|
+
const resources = execution.policy.resources;
|
|
109
|
+
for (const [name, value] of Object.entries(resources)) {
|
|
110
|
+
if (!Number.isFinite(value) || value <= 0) {
|
|
111
|
+
issues.push(`Execution ${execution.id} policy.resources.${name} must be a positive number.`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
if (execution.policy.network.egress === "allowlist" && (!execution.policy.network.allow || execution.policy.network.allow.length === 0)) {
|
|
115
|
+
issues.push(`Execution ${execution.id} allowlist network policy must include at least one allowed host.`);
|
|
116
|
+
}
|
|
117
|
+
return issues;
|
|
118
|
+
}
|
|
119
|
+
function expectedInputsForPurpose(purpose) {
|
|
120
|
+
if (purpose === "improve") {
|
|
121
|
+
return new Set(["subject", "traces"]);
|
|
122
|
+
}
|
|
123
|
+
if (purpose === "attempt") {
|
|
124
|
+
return new Set(["subject", "case"]);
|
|
125
|
+
}
|
|
126
|
+
return new Set();
|
|
127
|
+
}
|
|
128
|
+
function expectedOutputForPurpose(purpose) {
|
|
129
|
+
if (purpose === "improve") {
|
|
130
|
+
return "subject_patch";
|
|
131
|
+
}
|
|
132
|
+
if (purpose === "attempt") {
|
|
133
|
+
return "result";
|
|
134
|
+
}
|
|
135
|
+
return null;
|
|
136
|
+
}
|
|
137
|
+
export function assertWorkbenchExecutionIsolation(execution) {
|
|
138
|
+
const issues = collectWorkbenchExecutionIsolationIssues(execution);
|
|
139
|
+
if (issues.length > 0) {
|
|
140
|
+
throw new Error(issues.join("\n"));
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
function outputAllowedForPurpose(purpose, output) {
|
|
144
|
+
if (purpose === "improve") {
|
|
145
|
+
return output.schema === "workbench.subject_patch.v1";
|
|
146
|
+
}
|
|
147
|
+
if (purpose === "attempt") {
|
|
148
|
+
return output.schema === "workbench.result.v1";
|
|
149
|
+
}
|
|
150
|
+
return false;
|
|
151
|
+
}
|
|
152
|
+
function normalizeSubjectPatch(value, execution, contract, issues) {
|
|
153
|
+
const record = readRecord(value, contract.name, issues);
|
|
154
|
+
const files = normalizeSnapshotFiles(record?.files, `${contract.name}.files`, issues);
|
|
155
|
+
const fileChanges = normalizeStringArray(record?.fileChanges, `${contract.name}.fileChanges`, issues);
|
|
156
|
+
const edits = normalizeMetadataStringArray(execution.metadata.edits);
|
|
157
|
+
if (edits.length === 0) {
|
|
158
|
+
issues.push(`Execution ${execution.id} subject patch validation requires metadata.edits.`);
|
|
159
|
+
}
|
|
160
|
+
for (const file of files) {
|
|
161
|
+
if (!isAllowedEditPath(file.path, edits)) {
|
|
162
|
+
issues.push(`${contract.name}.files contains path outside optimizer edits: ${file.path}.`);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
for (const fileChange of fileChanges) {
|
|
166
|
+
if (!isSafeRelativePath(fileChange)) {
|
|
167
|
+
issues.push(`${contract.name}.fileChanges contains unsafe path ${fileChange}.`);
|
|
168
|
+
}
|
|
169
|
+
else if (!isAllowedEditPath(fileChange, edits)) {
|
|
170
|
+
issues.push(`${contract.name}.fileChanges contains path outside optimizer edits: ${fileChange}.`);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
return {
|
|
174
|
+
files,
|
|
175
|
+
fileChanges,
|
|
176
|
+
...(typeof record?.summary === "string" ? { summary: record.summary } : {}),
|
|
177
|
+
...(isJson(record?.feedback) ? { feedback: record.feedback } : {}),
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
function normalizeResult(value, execution, contract, issues) {
|
|
181
|
+
void execution;
|
|
182
|
+
const record = readRecord(value, contract.name, issues);
|
|
183
|
+
const score = readFiniteNumber(record?.score, `${contract.name}.score`, issues);
|
|
184
|
+
const usage = normalizeUsageSummary(record?.usage);
|
|
185
|
+
return {
|
|
186
|
+
score: score ?? 0,
|
|
187
|
+
...(record?.metrics !== undefined ? { metrics: normalizeNumberRecord(record.metrics, `${contract.name}.metrics`, issues) } : {}),
|
|
188
|
+
...(record?.cases !== undefined ? { cases: normalizeCaseResults(record.cases, `${contract.name}.cases`, issues) } : {}),
|
|
189
|
+
...(usage ? { usage } : {}),
|
|
190
|
+
...(typeof record?.summary === "string" ? { summary: record.summary } : {}),
|
|
191
|
+
...(isJson(record?.feedback) ? { feedback: record.feedback } : {}),
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
function normalizeSnapshotFiles(value, label, issues) {
|
|
195
|
+
if (!Array.isArray(value)) {
|
|
196
|
+
issues.push(`${label} must be an array.`);
|
|
197
|
+
return [];
|
|
198
|
+
}
|
|
199
|
+
return value.flatMap((entry, index) => {
|
|
200
|
+
const itemLabel = `${label}[${index}]`;
|
|
201
|
+
const record = readRecord(entry, itemLabel, issues);
|
|
202
|
+
if (!record) {
|
|
203
|
+
return [];
|
|
204
|
+
}
|
|
205
|
+
const filePath = readSafePath(record.path, `${itemLabel}.path`, issues);
|
|
206
|
+
const encoding = record.encoding === "base64" ? "base64" : "utf8";
|
|
207
|
+
const kind = record.kind === "text" || record.kind === "binary"
|
|
208
|
+
? record.kind
|
|
209
|
+
: encoding === "base64" ? "binary" : "text";
|
|
210
|
+
if (typeof record.content !== "string") {
|
|
211
|
+
issues.push(`${itemLabel}.content must be a string.`);
|
|
212
|
+
}
|
|
213
|
+
if (!filePath || typeof record.content !== "string") {
|
|
214
|
+
return [];
|
|
215
|
+
}
|
|
216
|
+
return [{
|
|
217
|
+
path: filePath,
|
|
218
|
+
kind,
|
|
219
|
+
encoding,
|
|
220
|
+
content: record.content,
|
|
221
|
+
executable: record.executable === true,
|
|
222
|
+
}];
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
function normalizeCaseResults(value, label, issues) {
|
|
226
|
+
if (!Array.isArray(value)) {
|
|
227
|
+
issues.push(`${label} must be an array.`);
|
|
228
|
+
return [];
|
|
229
|
+
}
|
|
230
|
+
return value.flatMap((entry, index) => {
|
|
231
|
+
const itemLabel = `${label}[${index}]`;
|
|
232
|
+
const record = readRecord(entry, itemLabel, issues);
|
|
233
|
+
const id = readNonEmptyString(record?.id, `${itemLabel}.id`, issues);
|
|
234
|
+
if (!record || !id) {
|
|
235
|
+
return [];
|
|
236
|
+
}
|
|
237
|
+
const status = record.status === undefined || record.status === "completed" || record.status === "error"
|
|
238
|
+
? record.status
|
|
239
|
+
: null;
|
|
240
|
+
if (status === null) {
|
|
241
|
+
issues.push(`${itemLabel}.status must be completed or error.`);
|
|
242
|
+
}
|
|
243
|
+
const criteria = record.criteria === undefined
|
|
244
|
+
? undefined
|
|
245
|
+
: normalizeCaseCriteria(record.criteria, `${itemLabel}.criteria`, issues);
|
|
246
|
+
return [{
|
|
247
|
+
id,
|
|
248
|
+
...(typeof record.label === "string" ? { label: record.label } : {}),
|
|
249
|
+
...(typeof record.split === "string" ? { split: record.split } : {}),
|
|
250
|
+
...(status ? { status } : {}),
|
|
251
|
+
...(record.durationMs !== undefined ? { durationMs: readFiniteNumber(record.durationMs, `${itemLabel}.durationMs`, issues) ?? 0 } : {}),
|
|
252
|
+
metrics: normalizeNumberRecord(record.metrics ?? {}, `${itemLabel}.metrics`, issues),
|
|
253
|
+
...(isJson(record.source) && record.source && typeof record.source === "object" && !Array.isArray(record.source)
|
|
254
|
+
? { source: record.source }
|
|
255
|
+
: {}),
|
|
256
|
+
...(isJson(record.feedback) ? { feedback: record.feedback } : {}),
|
|
257
|
+
...(criteria && criteria.length > 0 ? { criteria } : {}),
|
|
258
|
+
}];
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
function normalizeCaseCriteria(value, label, issues) {
|
|
262
|
+
if (!Array.isArray(value)) {
|
|
263
|
+
issues.push(`${label} must be an array.`);
|
|
264
|
+
return [];
|
|
265
|
+
}
|
|
266
|
+
return value.flatMap((entry, index) => {
|
|
267
|
+
const itemLabel = `${label}[${index}]`;
|
|
268
|
+
const record = readRecord(entry, itemLabel, issues);
|
|
269
|
+
const criterionId = readNonEmptyString(record?.criterion_id, `${itemLabel}.criterion_id`, issues);
|
|
270
|
+
const score = readFiniteNumber(record?.score, `${itemLabel}.score`, issues);
|
|
271
|
+
if (!record || !criterionId || score === null) {
|
|
272
|
+
return [];
|
|
273
|
+
}
|
|
274
|
+
const pass = record.pass === undefined
|
|
275
|
+
? score >= 0.5
|
|
276
|
+
: typeof record.pass === "boolean"
|
|
277
|
+
? record.pass
|
|
278
|
+
: null;
|
|
279
|
+
if (pass === null) {
|
|
280
|
+
issues.push(`${itemLabel}.pass must be a boolean.`);
|
|
281
|
+
}
|
|
282
|
+
const errors = record.errors === undefined
|
|
283
|
+
? []
|
|
284
|
+
: Array.isArray(record.errors)
|
|
285
|
+
? record.errors.filter((error) => typeof error === "string")
|
|
286
|
+
: null;
|
|
287
|
+
if (errors === null) {
|
|
288
|
+
issues.push(`${itemLabel}.errors must be an array when provided.`);
|
|
289
|
+
}
|
|
290
|
+
const rationale = typeof record.rationale === "string" && record.rationale.trim().length > 0
|
|
291
|
+
? record.rationale.trim()
|
|
292
|
+
: undefined;
|
|
293
|
+
return [{
|
|
294
|
+
criterion_id: criterionId,
|
|
295
|
+
label: typeof record.label === "string" ? record.label : criterionId,
|
|
296
|
+
score,
|
|
297
|
+
pass: pass ?? false,
|
|
298
|
+
...(errors && errors.length > 0 ? { errors } : {}),
|
|
299
|
+
...(rationale ? { rationale } : {}),
|
|
300
|
+
}];
|
|
301
|
+
});
|
|
302
|
+
}
|
|
303
|
+
function normalizeNumberRecord(value, label, issues) {
|
|
304
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
305
|
+
issues.push(`${label} must be an object.`);
|
|
306
|
+
return {};
|
|
307
|
+
}
|
|
308
|
+
const output = {};
|
|
309
|
+
for (const [key, entry] of Object.entries(value)) {
|
|
310
|
+
const numericValue = readFiniteNumber(entry, `${label}.${key}`, issues);
|
|
311
|
+
if (numericValue !== null) {
|
|
312
|
+
output[key] = numericValue;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
return output;
|
|
316
|
+
}
|
|
317
|
+
function normalizeStringArray(value, label, issues) {
|
|
318
|
+
if (!Array.isArray(value)) {
|
|
319
|
+
issues.push(`${label} must be an array.`);
|
|
320
|
+
return [];
|
|
321
|
+
}
|
|
322
|
+
return value.flatMap((entry, index) => {
|
|
323
|
+
const normalized = readNonEmptyString(entry, `${label}[${index}]`, issues);
|
|
324
|
+
return normalized ? [normalized] : [];
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
function normalizeMetadataStringArray(value) {
|
|
328
|
+
if (!Array.isArray(value)) {
|
|
329
|
+
return [];
|
|
330
|
+
}
|
|
331
|
+
return value.filter((entry) => typeof entry === "string").map((entry) => normalizeRelativePath(entry));
|
|
332
|
+
}
|
|
333
|
+
function readRecord(value, label, issues) {
|
|
334
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
335
|
+
issues.push(`${label} must be an object.`);
|
|
336
|
+
return null;
|
|
337
|
+
}
|
|
338
|
+
return value;
|
|
339
|
+
}
|
|
340
|
+
function readNonEmptyString(value, label, issues) {
|
|
341
|
+
if (typeof value !== "string" || value.trim().length === 0) {
|
|
342
|
+
issues.push(`${label} must be a non-empty string.`);
|
|
343
|
+
return null;
|
|
344
|
+
}
|
|
345
|
+
return value;
|
|
346
|
+
}
|
|
347
|
+
function readSafePath(value, label, issues) {
|
|
348
|
+
const filePath = readNonEmptyString(value, label, issues);
|
|
349
|
+
if (!filePath) {
|
|
350
|
+
return null;
|
|
351
|
+
}
|
|
352
|
+
const normalized = normalizeRelativePath(filePath);
|
|
353
|
+
if (!isSafeRelativePath(normalized)) {
|
|
354
|
+
issues.push(`${label} must be a safe relative path.`);
|
|
355
|
+
return null;
|
|
356
|
+
}
|
|
357
|
+
return normalized;
|
|
358
|
+
}
|
|
359
|
+
function readFiniteNumber(value, label, issues) {
|
|
360
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
361
|
+
issues.push(`${label} must be a finite number.`);
|
|
362
|
+
return null;
|
|
363
|
+
}
|
|
364
|
+
return value;
|
|
365
|
+
}
|
|
366
|
+
function isAllowedEditPath(filePath, edits) {
|
|
367
|
+
const normalizedPath = normalizeRelativePath(filePath);
|
|
368
|
+
return edits.some((entry) => {
|
|
369
|
+
const normalizedEditPath = normalizeRelativePath(entry).replace(/\/+$/u, "");
|
|
370
|
+
return normalizedPath === normalizedEditPath || normalizedPath.startsWith(`${normalizedEditPath}/`);
|
|
371
|
+
});
|
|
372
|
+
}
|
|
373
|
+
function isSafeRelativePath(filePath) {
|
|
374
|
+
const normalized = normalizeRelativePath(filePath);
|
|
375
|
+
return normalized.length > 0
|
|
376
|
+
&& !normalized.startsWith("/")
|
|
377
|
+
&& !normalized.split("/").includes("..");
|
|
378
|
+
}
|
|
379
|
+
function normalizeRelativePath(filePath) {
|
|
380
|
+
return filePath.replace(/\\/gu, "/").replace(/^\.\/+/u, "").replace(/\/+/gu, "/");
|
|
381
|
+
}
|
|
382
|
+
function isJson(value) {
|
|
383
|
+
if (value === null || typeof value === "boolean" || typeof value === "number" || typeof value === "string") {
|
|
384
|
+
return true;
|
|
385
|
+
}
|
|
386
|
+
if (Array.isArray(value)) {
|
|
387
|
+
return value.every(isJson);
|
|
388
|
+
}
|
|
389
|
+
if (value && typeof value === "object") {
|
|
390
|
+
return Object.values(value).every(isJson);
|
|
391
|
+
}
|
|
392
|
+
return false;
|
|
393
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { SubjectCaseReview, HostedWorkbenchJob, HostedWorkbenchJobStatus, WorkbenchExecutionEventRole, WorkbenchExecutionSpec, WorkbenchExecutionTrace, WorkbenchTracePhase } from "@workbench-ai/workbench-contract";
|
|
2
|
+
export declare function buildSubjectCasePhaseRefs(args: {
|
|
3
|
+
jobs: readonly HostedWorkbenchJob[];
|
|
4
|
+
subjectId: string;
|
|
5
|
+
caseId: string;
|
|
6
|
+
sampleIndex?: number;
|
|
7
|
+
}): SubjectCaseReview["phases"];
|
|
8
|
+
export declare function buildWorkbenchTracePhases(args: {
|
|
9
|
+
jobs: readonly HostedWorkbenchJob[];
|
|
10
|
+
traceIdPrefix: string;
|
|
11
|
+
traceForJob: (job: HostedWorkbenchJob, role: WorkbenchExecutionEventRole) => WorkbenchExecutionTrace;
|
|
12
|
+
}): WorkbenchTracePhase[];
|
|
13
|
+
export declare function readWorkbenchExecutionPurpose(job: HostedWorkbenchJob): WorkbenchExecutionSpec["purpose"] | null;
|
|
14
|
+
export declare function readWorkbenchExecutionId(job: HostedWorkbenchJob): string | null;
|
|
15
|
+
export declare function readWorkbenchExecutionMetadataString(job: HostedWorkbenchJob, key: string): string | null;
|
|
16
|
+
export declare function readWorkbenchExecutionMetadataNumber(job: HostedWorkbenchJob, key: string): number | null;
|
|
17
|
+
export declare function isWorkbenchPhaseActive(phase: SubjectCaseReview["phases"][number]): boolean;
|
|
18
|
+
export declare function resolveWorkbenchJobGroupStatus(jobs: readonly {
|
|
19
|
+
status: HostedWorkbenchJobStatus;
|
|
20
|
+
}[]): HostedWorkbenchJobStatus;
|
|
21
|
+
//# sourceMappingURL=execution-phases.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"execution-phases.d.ts","sourceRoot":"","sources":["../src/execution-phases.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,iBAAiB,EACjB,kBAAkB,EAClB,wBAAwB,EAExB,2BAA2B,EAC3B,sBAAsB,EACtB,uBAAuB,EACvB,mBAAmB,EACpB,MAAM,kCAAkC,CAAC;AAI1C,wBAAgB,yBAAyB,CAAC,IAAI,EAAE;IAC9C,IAAI,EAAE,SAAS,kBAAkB,EAAE,CAAC;IACpC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CA0D9B;AAED,wBAAgB,yBAAyB,CAAC,IAAI,EAAE;IAC9C,IAAI,EAAE,SAAS,kBAAkB,EAAE,CAAC;IACpC,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,CACX,GAAG,EAAE,kBAAkB,EACvB,IAAI,EAAE,2BAA2B,KAC9B,uBAAuB,CAAC;CAC9B,GAAG,mBAAmB,EAAE,CAkDxB;AAED,wBAAgB,6BAA6B,CAC3C,GAAG,EAAE,kBAAkB,GACtB,sBAAsB,CAAC,SAAS,CAAC,GAAG,IAAI,CAQ1C;AAED,wBAAgB,wBAAwB,CAAC,GAAG,EAAE,kBAAkB,GAAG,MAAM,GAAG,IAAI,CAG/E;AAED,wBAAgB,oCAAoC,CAClD,GAAG,EAAE,kBAAkB,EACvB,GAAG,EAAE,MAAM,GACV,MAAM,GAAG,IAAI,CAGf;AAED,wBAAgB,oCAAoC,CAClD,GAAG,EAAE,kBAAkB,EACvB,GAAG,EAAE,MAAM,GACV,MAAM,GAAG,IAAI,CAGf;AAED,wBAAgB,sBAAsB,CACpC,KAAK,EAAE,iBAAiB,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,GACzC,OAAO,CAET;AAED,wBAAgB,8BAA8B,CAC5C,IAAI,EAAE,SAAS;IAAE,MAAM,EAAE,wBAAwB,CAAA;CAAE,EAAE,GACpD,wBAAwB,CAc1B"}
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
import { mergeWorkbenchExecutionTracesByJob } from "./execution-traces.js";
|
|
2
|
+
export function buildSubjectCasePhaseRefs(args) {
|
|
3
|
+
const groups = new Map();
|
|
4
|
+
for (const job of args.jobs) {
|
|
5
|
+
const phase = readWorkbenchExecutionPurpose(job);
|
|
6
|
+
const jobSubjectId = job.subjectId ?? readWorkbenchExecutionMetadataString(job, "subjectId");
|
|
7
|
+
const jobCaseId = readWorkbenchExecutionMetadataString(job, "caseId");
|
|
8
|
+
if (jobSubjectId === args.subjectId &&
|
|
9
|
+
phase === "attempt" &&
|
|
10
|
+
caseReviewCaseIdsMatch(jobCaseId, args.caseId) &&
|
|
11
|
+
caseReviewSampleIndicesMatch(readWorkbenchExecutionMetadataNumber(job, "sampleIndex"), args.sampleIndex)) {
|
|
12
|
+
const key = [
|
|
13
|
+
job.runId,
|
|
14
|
+
phase,
|
|
15
|
+
jobCaseId ?? "",
|
|
16
|
+
readWorkbenchExecutionMetadataNumber(job, "sampleIndex") ?? "",
|
|
17
|
+
].join("\0");
|
|
18
|
+
groups.set(key, [...(groups.get(key) ?? []), job]);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
const phases = [...groups.values()]
|
|
22
|
+
.map((group) => group.slice().sort(compareWorkbenchPhaseJobs))
|
|
23
|
+
.flatMap((group) => {
|
|
24
|
+
const first = group[0];
|
|
25
|
+
if (!first) {
|
|
26
|
+
return [];
|
|
27
|
+
}
|
|
28
|
+
const phase = readWorkbenchExecutionPurpose(first);
|
|
29
|
+
if (phase !== "attempt") {
|
|
30
|
+
return [];
|
|
31
|
+
}
|
|
32
|
+
const startedAt = minTimestamp(group.map((job) => job.startedAt));
|
|
33
|
+
const finishedAt = maxTimestamp(group.map((job) => job.finishedAt));
|
|
34
|
+
const durationMs = startedAt && finishedAt
|
|
35
|
+
? Math.max(0, Date.parse(finishedAt) - Date.parse(startedAt))
|
|
36
|
+
: null;
|
|
37
|
+
return [{
|
|
38
|
+
runId: first.runId,
|
|
39
|
+
phase,
|
|
40
|
+
role: "engine",
|
|
41
|
+
status: resolveWorkbenchJobGroupStatus(group),
|
|
42
|
+
jobIds: group.map((job) => job.id),
|
|
43
|
+
createdAt: minTimestamp(group.map((job) => job.createdAt)) ?? first.createdAt,
|
|
44
|
+
...(startedAt ? { startedAt } : {}),
|
|
45
|
+
...(finishedAt ? { finishedAt } : {}),
|
|
46
|
+
...(durationMs !== null ? { durationMs } : {}),
|
|
47
|
+
...optionalNumber("sampleIndex", readWorkbenchExecutionMetadataNumber(first, "sampleIndex")),
|
|
48
|
+
}];
|
|
49
|
+
})
|
|
50
|
+
.sort(compareSubjectCasePhases);
|
|
51
|
+
return selectCurrentPhaseRun(phases);
|
|
52
|
+
}
|
|
53
|
+
export function buildWorkbenchTracePhases(args) {
|
|
54
|
+
const groups = new Map();
|
|
55
|
+
for (const job of args.jobs) {
|
|
56
|
+
const purpose = readWorkbenchExecutionPurpose(job);
|
|
57
|
+
if (!purpose) {
|
|
58
|
+
continue;
|
|
59
|
+
}
|
|
60
|
+
const key = [
|
|
61
|
+
job.runId,
|
|
62
|
+
purpose,
|
|
63
|
+
job.subjectId ?? readWorkbenchExecutionMetadataString(job, "subjectId") ?? "",
|
|
64
|
+
readWorkbenchExecutionMetadataString(job, "caseId") ?? "",
|
|
65
|
+
readWorkbenchExecutionMetadataNumber(job, "sampleIndex") ?? "",
|
|
66
|
+
readWorkbenchExecutionMetadataNumber(job, "attemptIndex") ?? "",
|
|
67
|
+
].join("\0");
|
|
68
|
+
groups.set(key, [...(groups.get(key) ?? []), job]);
|
|
69
|
+
}
|
|
70
|
+
return [...groups.values()]
|
|
71
|
+
.map((group) => group.slice().sort(compareWorkbenchTraceJobs))
|
|
72
|
+
.flatMap((group) => {
|
|
73
|
+
const first = group[0];
|
|
74
|
+
if (!first) {
|
|
75
|
+
return [];
|
|
76
|
+
}
|
|
77
|
+
const purpose = readWorkbenchExecutionPurpose(first);
|
|
78
|
+
if (!purpose) {
|
|
79
|
+
return [];
|
|
80
|
+
}
|
|
81
|
+
const role = traceRoleForPurpose(purpose);
|
|
82
|
+
return [{
|
|
83
|
+
phase: purpose,
|
|
84
|
+
executionId: group.length === 1 ? readWorkbenchExecutionId(first) : null,
|
|
85
|
+
role,
|
|
86
|
+
status: resolveWorkbenchJobGroupStatus(group),
|
|
87
|
+
jobIds: group.map((job) => job.id),
|
|
88
|
+
...(first.subjectId ? { subjectId: first.subjectId } : {}),
|
|
89
|
+
...optionalString("caseId", readWorkbenchExecutionMetadataString(first, "caseId")),
|
|
90
|
+
...optionalNumber("sampleIndex", readWorkbenchExecutionMetadataNumber(first, "sampleIndex")),
|
|
91
|
+
...optionalNumber("attemptIndex", readWorkbenchExecutionMetadataNumber(first, "attemptIndex")),
|
|
92
|
+
trace: mergeWorkbenchExecutionTracesByJob({
|
|
93
|
+
traceIdPrefix: args.traceIdPrefix,
|
|
94
|
+
stageId: purpose,
|
|
95
|
+
jobs: group.map((job) => ({
|
|
96
|
+
id: job.id,
|
|
97
|
+
trace: args.traceForJob(job, role),
|
|
98
|
+
})),
|
|
99
|
+
}),
|
|
100
|
+
}];
|
|
101
|
+
})
|
|
102
|
+
.sort(compareWorkbenchTracePhases);
|
|
103
|
+
}
|
|
104
|
+
export function readWorkbenchExecutionPurpose(job) {
|
|
105
|
+
if (job.kind !== "execute") {
|
|
106
|
+
return null;
|
|
107
|
+
}
|
|
108
|
+
const purpose = readExecutionRecord(job)?.purpose;
|
|
109
|
+
return purpose === "improve" || purpose === "attempt"
|
|
110
|
+
? purpose
|
|
111
|
+
: null;
|
|
112
|
+
}
|
|
113
|
+
export function readWorkbenchExecutionId(job) {
|
|
114
|
+
const id = readExecutionRecord(job)?.id;
|
|
115
|
+
return typeof id === "string" && id.length > 0 ? id : null;
|
|
116
|
+
}
|
|
117
|
+
export function readWorkbenchExecutionMetadataString(job, key) {
|
|
118
|
+
const raw = readWorkbenchExecutionMetadataValue(job, key);
|
|
119
|
+
return typeof raw === "string" && raw.length > 0 ? raw : null;
|
|
120
|
+
}
|
|
121
|
+
export function readWorkbenchExecutionMetadataNumber(job, key) {
|
|
122
|
+
const raw = readWorkbenchExecutionMetadataValue(job, key);
|
|
123
|
+
return typeof raw === "number" && Number.isFinite(raw) ? raw : null;
|
|
124
|
+
}
|
|
125
|
+
export function isWorkbenchPhaseActive(phase) {
|
|
126
|
+
return phase.status === "queued" || phase.status === "running";
|
|
127
|
+
}
|
|
128
|
+
export function resolveWorkbenchJobGroupStatus(jobs) {
|
|
129
|
+
if (jobs.some((job) => job.status === "running")) {
|
|
130
|
+
return "running";
|
|
131
|
+
}
|
|
132
|
+
if (jobs.some((job) => job.status === "queued")) {
|
|
133
|
+
return "queued";
|
|
134
|
+
}
|
|
135
|
+
if (jobs.some((job) => job.status === "failed")) {
|
|
136
|
+
return "failed";
|
|
137
|
+
}
|
|
138
|
+
if (jobs.some((job) => job.status === "cancelled")) {
|
|
139
|
+
return "cancelled";
|
|
140
|
+
}
|
|
141
|
+
return "succeeded";
|
|
142
|
+
}
|
|
143
|
+
function readWorkbenchExecutionMetadataValue(job, key) {
|
|
144
|
+
const input = asRecord(job.input);
|
|
145
|
+
const execution = asRecord(input?.execution);
|
|
146
|
+
const metadata = asRecord(execution?.metadata);
|
|
147
|
+
return metadata?.[key] ?? input?.[key] ?? null;
|
|
148
|
+
}
|
|
149
|
+
function readExecutionRecord(job) {
|
|
150
|
+
const input = asRecord(job.input);
|
|
151
|
+
return asRecord(input?.execution);
|
|
152
|
+
}
|
|
153
|
+
function caseReviewCaseIdsMatch(jobCaseId, reviewCaseId) {
|
|
154
|
+
return Boolean(jobCaseId) &&
|
|
155
|
+
(jobCaseId === reviewCaseId || reviewCaseId.startsWith(`${jobCaseId}__`));
|
|
156
|
+
}
|
|
157
|
+
function caseReviewSampleIndicesMatch(jobSampleIndex, reviewSampleIndex) {
|
|
158
|
+
return typeof reviewSampleIndex !== "number" || jobSampleIndex === reviewSampleIndex;
|
|
159
|
+
}
|
|
160
|
+
function selectCurrentPhaseRun(phases) {
|
|
161
|
+
if (phases.length <= 1) {
|
|
162
|
+
return phases;
|
|
163
|
+
}
|
|
164
|
+
const activeRunId = phases
|
|
165
|
+
.filter(isWorkbenchPhaseActive)
|
|
166
|
+
.sort(comparePhaseRecency)[0]?.runId;
|
|
167
|
+
const selectedRunId = activeRunId ?? phases.slice().sort(comparePhaseRecency)[0]?.runId;
|
|
168
|
+
return selectedRunId
|
|
169
|
+
? phases.filter((phase) => phase.runId === selectedRunId)
|
|
170
|
+
: phases;
|
|
171
|
+
}
|
|
172
|
+
function compareSubjectCasePhases(left, right) {
|
|
173
|
+
return (phasePurposeOrder(left.phase) - phasePurposeOrder(right.phase) ||
|
|
174
|
+
(left.sampleIndex ?? -1) - (right.sampleIndex ?? -1) ||
|
|
175
|
+
readPhaseRecencyMs(right) - readPhaseRecencyMs(left));
|
|
176
|
+
}
|
|
177
|
+
function comparePhaseRecency(left, right) {
|
|
178
|
+
return readPhaseRecencyMs(right) - readPhaseRecencyMs(left);
|
|
179
|
+
}
|
|
180
|
+
function compareWorkbenchPhaseJobs(left, right) {
|
|
181
|
+
return (phasePurposeOrder(readWorkbenchExecutionPurpose(left)) -
|
|
182
|
+
phasePurposeOrder(readWorkbenchExecutionPurpose(right)) ||
|
|
183
|
+
(readWorkbenchExecutionMetadataNumber(left, "sampleIndex") ?? -1) -
|
|
184
|
+
(readWorkbenchExecutionMetadataNumber(right, "sampleIndex") ?? -1) ||
|
|
185
|
+
readJobRecencyMs(right) - readJobRecencyMs(left) ||
|
|
186
|
+
left.id.localeCompare(right.id));
|
|
187
|
+
}
|
|
188
|
+
function compareWorkbenchTraceJobs(left, right) {
|
|
189
|
+
return (phasePurposeOrder(readWorkbenchExecutionPurpose(left)) -
|
|
190
|
+
phasePurposeOrder(readWorkbenchExecutionPurpose(right)) ||
|
|
191
|
+
String(readWorkbenchExecutionMetadataString(left, "caseId") ?? "").localeCompare(String(readWorkbenchExecutionMetadataString(right, "caseId") ?? "")) ||
|
|
192
|
+
(readWorkbenchExecutionMetadataNumber(left, "sampleIndex") ?? -1) -
|
|
193
|
+
(readWorkbenchExecutionMetadataNumber(right, "sampleIndex") ?? -1) ||
|
|
194
|
+
(readWorkbenchExecutionMetadataNumber(left, "attemptIndex") ?? -1) -
|
|
195
|
+
(readWorkbenchExecutionMetadataNumber(right, "attemptIndex") ?? -1) ||
|
|
196
|
+
left.id.localeCompare(right.id));
|
|
197
|
+
}
|
|
198
|
+
function compareWorkbenchTracePhases(left, right) {
|
|
199
|
+
return (phasePurposeOrder(left.phase) - phasePurposeOrder(right.phase) ||
|
|
200
|
+
String(left.caseId ?? "").localeCompare(String(right.caseId ?? "")) ||
|
|
201
|
+
(left.sampleIndex ?? -1) - (right.sampleIndex ?? -1) ||
|
|
202
|
+
(left.attemptIndex ?? -1) - (right.attemptIndex ?? -1) ||
|
|
203
|
+
String(left.jobIds[0] ?? "").localeCompare(String(right.jobIds[0] ?? "")));
|
|
204
|
+
}
|
|
205
|
+
function traceRoleForPurpose(purpose) {
|
|
206
|
+
if (purpose === "improve") {
|
|
207
|
+
return "optimizer";
|
|
208
|
+
}
|
|
209
|
+
return "engine";
|
|
210
|
+
}
|
|
211
|
+
function phasePurposeOrder(purpose) {
|
|
212
|
+
if (purpose === "improve") {
|
|
213
|
+
return 0;
|
|
214
|
+
}
|
|
215
|
+
if (purpose === "attempt") {
|
|
216
|
+
return 1;
|
|
217
|
+
}
|
|
218
|
+
return 3;
|
|
219
|
+
}
|
|
220
|
+
function readPhaseRecencyMs(phase) {
|
|
221
|
+
return (parseTimestampMs(phase.finishedAt) ??
|
|
222
|
+
parseTimestampMs(phase.startedAt) ??
|
|
223
|
+
parseTimestampMs(phase.createdAt) ??
|
|
224
|
+
0);
|
|
225
|
+
}
|
|
226
|
+
function readJobRecencyMs(job) {
|
|
227
|
+
return (parseTimestampMs(job.finishedAt) ??
|
|
228
|
+
parseTimestampMs(job.startedAt) ??
|
|
229
|
+
parseTimestampMs(job.updatedAt) ??
|
|
230
|
+
parseTimestampMs(job.createdAt) ??
|
|
231
|
+
0);
|
|
232
|
+
}
|
|
233
|
+
function minTimestamp(values) {
|
|
234
|
+
const sorted = values
|
|
235
|
+
.filter((value) => typeof value === "string" && value.length > 0)
|
|
236
|
+
.sort();
|
|
237
|
+
return sorted[0] ?? null;
|
|
238
|
+
}
|
|
239
|
+
function maxTimestamp(values) {
|
|
240
|
+
const sorted = values
|
|
241
|
+
.filter((value) => typeof value === "string" && value.length > 0)
|
|
242
|
+
.sort();
|
|
243
|
+
return sorted[sorted.length - 1] ?? null;
|
|
244
|
+
}
|
|
245
|
+
function parseTimestampMs(value) {
|
|
246
|
+
if (!value) {
|
|
247
|
+
return null;
|
|
248
|
+
}
|
|
249
|
+
const parsed = Date.parse(value);
|
|
250
|
+
return Number.isFinite(parsed) ? parsed : null;
|
|
251
|
+
}
|
|
252
|
+
function optionalString(key, value) {
|
|
253
|
+
return value ? { [key]: value } : {};
|
|
254
|
+
}
|
|
255
|
+
function optionalNumber(key, value) {
|
|
256
|
+
return value == null ? {} : { [key]: value };
|
|
257
|
+
}
|
|
258
|
+
function asRecord(value) {
|
|
259
|
+
return value && typeof value === "object" && !Array.isArray(value)
|
|
260
|
+
? value
|
|
261
|
+
: null;
|
|
262
|
+
}
|