@workbench-ai/workbench 0.0.49 → 0.0.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,36 +1,37 @@
1
1
  import { promises as fs } from "node:fs";
2
2
  import path from "node:path";
3
- import { buildWorkbenchTraceSessionsFromFiles, selectExecutionOutputFilesForInspection, } from "@workbench-ai/workbench-core";
3
+ import { buildWorkbenchTraceSessionsFromFiles, candidateRecordWithoutDerivedFields, sanitizeWorkbenchRuntimeCandidateForExchange, sanitizeWorkbenchRuntimeJobForExchange, selectExecutionOutputFilesForInspection, workbenchRuntimeBundleStats, workbenchSurfaceFilesEqualForExchange, } from "@workbench-ai/workbench-core";
4
4
  const RUNTIME_DIR = ".workbench/runtime";
5
+ const CANDIDATE_RECORDS_DIR = "candidates";
5
6
  export function localRuntimeDir(workspace) {
6
7
  return path.join(workspace, RUNTIME_DIR);
7
8
  }
8
9
  export async function loadLocalArchive(workspace) {
9
10
  const index = await loadLocalArchiveIndex(workspace);
10
11
  const root = localRuntimeDir(workspace);
11
- const subjectFiles = {};
12
- await Promise.all(index.subjects.map(async (subject) => {
13
- subjectFiles[subject.id] = await readSurfaceFiles(path.join(root, "subjects", localRecordName(subject.id), "files"));
12
+ const candidateFiles = {};
13
+ await Promise.all(index.candidates.map(async (candidate) => {
14
+ candidateFiles[candidate.id] = await readSurfaceFiles(path.join(root, CANDIDATE_RECORDS_DIR, localRecordName(candidate.id), "files"));
14
15
  }));
15
16
  const snapshot = {
16
17
  ...index,
17
- subjectFiles,
18
+ candidateFiles,
18
19
  };
19
20
  validateLocalArchiveSnapshot(snapshot);
20
21
  return snapshot;
21
22
  }
22
23
  export async function loadLocalArchiveIndex(workspace) {
23
24
  const root = localRuntimeDir(workspace);
24
- const [state, subjects, evaluations, runs, events] = await Promise.all([
25
+ const [state, candidates, evaluations, runs, events] = await Promise.all([
25
26
  readJson(path.join(root, "state.json"), {}),
26
- readRecords(path.join(root, "subjects"), "record.json"),
27
+ readRecords(path.join(root, CANDIDATE_RECORDS_DIR), "record.json"),
27
28
  readFlatRecords(path.join(root, "evaluations")),
28
29
  readFlatRecords(path.join(root, "runs")),
29
30
  readJson(path.join(root, "events.json"), []),
30
31
  ]);
31
32
  const index = {
32
33
  activeId: typeof state.activeId === "string" ? state.activeId : null,
33
- subjects: subjects.sort((left, right) => left.ordinal - right.ordinal || left.id.localeCompare(right.id)),
34
+ candidates: candidates.sort(compareLocalCandidateRecords),
34
35
  evaluations: evaluations.sort((left, right) => left.createdAt.localeCompare(right.createdAt) || left.id.localeCompare(right.id)),
35
36
  runs: runs.sort((left, right) => left.startedAt.localeCompare(right.startedAt) || left.id.localeCompare(right.id)),
36
37
  events: events.sort((left, right) => left.at.localeCompare(right.at) || left.id.localeCompare(right.id)),
@@ -42,19 +43,19 @@ export async function saveLocalArchive(workspace, snapshot) {
42
43
  const root = localRuntimeDir(workspace);
43
44
  await fs.mkdir(root, { recursive: true });
44
45
  await writeJson(path.join(root, "state.json"), { activeId: snapshot.activeId });
45
- await fs.rm(path.join(root, "subjects"), { force: true, recursive: true });
46
+ await fs.rm(path.join(root, CANDIDATE_RECORDS_DIR), { force: true, recursive: true });
46
47
  await fs.rm(path.join(root, "evaluations"), { force: true, recursive: true });
47
48
  await fs.rm(path.join(root, "runs"), { force: true, recursive: true });
48
49
  await Promise.all([
49
- fs.mkdir(path.join(root, "subjects"), { recursive: true }),
50
+ fs.mkdir(path.join(root, CANDIDATE_RECORDS_DIR), { recursive: true }),
50
51
  fs.mkdir(path.join(root, "evaluations"), { recursive: true }),
51
52
  fs.mkdir(path.join(root, "runs"), { recursive: true }),
52
53
  ]);
53
- for (const subject of snapshot.subjects) {
54
- const subjectRoot = path.join(root, "subjects", subject.id);
55
- await fs.mkdir(subjectRoot, { recursive: true });
56
- await writeJson(path.join(subjectRoot, "record.json"), subject);
57
- await writeSurfaceFiles(path.join(subjectRoot, "files"), snapshot.subjectFiles[subject.id] ?? []);
54
+ for (const candidate of snapshot.candidates) {
55
+ const candidateRoot = path.join(root, CANDIDATE_RECORDS_DIR, candidate.id);
56
+ await fs.mkdir(candidateRoot, { recursive: true });
57
+ await writeJson(path.join(candidateRoot, "record.json"), candidateRecordWithoutDerivedFields(candidate));
58
+ await writeSurfaceFiles(path.join(candidateRoot, "files"), snapshot.candidateFiles[candidate.id] ?? []);
58
59
  }
59
60
  for (const evaluation of snapshot.evaluations) {
60
61
  await writeJson(path.join(root, "evaluations", `${evaluation.id}.json`), evaluation);
@@ -65,6 +66,142 @@ export async function saveLocalArchive(workspace, snapshot) {
65
66
  await writeJson(path.join(root, "events.json"), snapshot.events);
66
67
  }
67
68
  export async function saveLocalJobs(workspace, jobs) {
69
+ if (jobs.length === 0) {
70
+ return;
71
+ }
72
+ await writeArchivedLocalJobs(workspace, jobs, new Map());
73
+ }
74
+ export async function exportLocalRuntimeBundle(workspace) {
75
+ const snapshot = await loadLocalArchive(workspace);
76
+ const jobs = (await readLocalJobs(workspace)).map(sanitizeRuntimeJobForExchange);
77
+ const executionFiles = await Promise.all(jobs.map(async (job) => ({
78
+ jobId: job.id,
79
+ files: await readLocalExecutionFiles(workspace, job.id),
80
+ })));
81
+ return {
82
+ schema: "workbench.runtime.bundle.v1",
83
+ activeId: snapshot.activeId,
84
+ candidates: snapshot.candidates.map(sanitizeWorkbenchRuntimeCandidateForExchange),
85
+ candidateFiles: Object.entries(snapshot.candidateFiles).map(([candidateId, files]) => ({
86
+ candidateId,
87
+ files: copySurfaceFiles(files),
88
+ })),
89
+ evaluations: snapshot.evaluations.map((evaluation) => ({ ...evaluation })),
90
+ runs: snapshot.runs.map((run) => ({ ...run })),
91
+ jobs,
92
+ executionFiles,
93
+ events: snapshot.events.map((event) => ({ ...event })),
94
+ };
95
+ }
96
+ export async function importLocalRuntimeBundle(workspace, bundle) {
97
+ validateRuntimeBundleSchema(bundle);
98
+ const snapshot = await loadLocalArchive(workspace);
99
+ const existingJobs = (await readLocalJobs(workspace)).map(sanitizeRuntimeJobForExchange);
100
+ let changed = false;
101
+ const existingCandidates = snapshot.candidates.map(sanitizeWorkbenchRuntimeCandidateForExchange);
102
+ if (JSON.stringify(existingCandidates) !== JSON.stringify(snapshot.candidates)) {
103
+ changed = true;
104
+ }
105
+ const incomingCandidates = bundle.candidates.map(sanitizeWorkbenchRuntimeCandidateForExchange);
106
+ const candidates = mergeRecordsById(existingCandidates, incomingCandidates, (candidate) => candidate.id, (didChange) => {
107
+ changed ||= didChange;
108
+ }).sort(compareLocalCandidateRecords);
109
+ const candidateFiles = { ...snapshot.candidateFiles };
110
+ for (const group of bundle.candidateFiles) {
111
+ const candidateId = localRecordName(group.candidateId);
112
+ const files = copySurfaceFiles(group.files);
113
+ const existing = candidateFiles[candidateId];
114
+ if (existing) {
115
+ if (!workbenchSurfaceFilesEqualForExchange(existing, files)) {
116
+ throw new Error(`Runtime history conflict for candidate files ${candidateId}.`);
117
+ }
118
+ }
119
+ else {
120
+ changed = true;
121
+ }
122
+ candidateFiles[candidateId] = files;
123
+ }
124
+ const candidateIds = new Set(candidates.map((candidate) => candidate.id));
125
+ const activeId = bundle.activeId && candidateIds.has(bundle.activeId)
126
+ ? bundle.activeId
127
+ : snapshot.activeId && candidateIds.has(snapshot.activeId)
128
+ ? snapshot.activeId
129
+ : null;
130
+ if (activeId !== snapshot.activeId) {
131
+ changed = true;
132
+ }
133
+ const evaluations = mergeRecordsById(snapshot.evaluations, bundle.evaluations, (evaluation) => evaluation.id, (didChange) => {
134
+ changed ||= didChange;
135
+ }).sort((left, right) => left.createdAt.localeCompare(right.createdAt) || left.id.localeCompare(right.id));
136
+ const runs = mergeRecordsById(snapshot.runs, bundle.runs, (run) => run.id, (didChange) => {
137
+ changed ||= didChange;
138
+ }).sort((left, right) => left.startedAt.localeCompare(right.startedAt) || left.id.localeCompare(right.id));
139
+ const events = mergeRecordsById(snapshot.events, bundle.events, runtimeEventKey, (didChange) => {
140
+ changed ||= didChange;
141
+ }).sort((left, right) => left.at.localeCompare(right.at) || left.id.localeCompare(right.id));
142
+ const executionFilesByJobId = new Map();
143
+ await Promise.all(existingJobs.map(async (job) => {
144
+ executionFilesByJobId.set(job.id, await readLocalExecutionFiles(workspace, job.id));
145
+ }));
146
+ for (const group of bundle.executionFiles) {
147
+ const jobId = localRecordName(group.jobId);
148
+ const files = copySurfaceFiles(group.files);
149
+ const existing = executionFilesByJobId.get(jobId);
150
+ if (existing) {
151
+ if (!workbenchSurfaceFilesEqualForExchange(existing, files)) {
152
+ throw new Error(`Runtime history conflict for execution files ${jobId}.`);
153
+ }
154
+ }
155
+ else {
156
+ changed = true;
157
+ }
158
+ executionFilesByJobId.set(jobId, files);
159
+ }
160
+ const jobs = mergeRecordsById(existingJobs, bundle.jobs.map(sanitizeRuntimeJobForExchange), (job) => job.id, (didChange) => {
161
+ changed ||= didChange;
162
+ }, runtimeJobsEqualForExchange).sort((left, right) => (left.startedAt ?? left.createdAt).localeCompare(right.startedAt ?? right.createdAt) ||
163
+ left.id.localeCompare(right.id));
164
+ await saveLocalArchive(workspace, {
165
+ activeId,
166
+ candidates,
167
+ candidateFiles,
168
+ evaluations,
169
+ runs,
170
+ events,
171
+ });
172
+ await writeArchivedLocalJobs(workspace, jobs, executionFilesByJobId);
173
+ return {
174
+ changed,
175
+ stats: runtimeBundleStats({
176
+ schema: "workbench.runtime.bundle.v1",
177
+ activeId,
178
+ candidates,
179
+ candidateFiles: Object.entries(candidateFiles).map(([candidateId, files]) => ({
180
+ candidateId,
181
+ files,
182
+ })),
183
+ evaluations,
184
+ runs,
185
+ jobs,
186
+ executionFiles: [...executionFilesByJobId.entries()].map(([jobId, files]) => ({
187
+ jobId,
188
+ files,
189
+ })),
190
+ events,
191
+ }),
192
+ };
193
+ }
194
+ export function runtimeBundleStats(bundle) {
195
+ return workbenchRuntimeBundleStats(bundle);
196
+ }
197
+ export function sanitizeRuntimeJobForExchange(job) {
198
+ return sanitizeWorkbenchRuntimeJobForExchange(job);
199
+ }
200
+ function sanitizeRuntimeJobForArchive(job) {
201
+ const { leaseUntil: _leaseUntil, wakeupLeaseUntil: _wakeupLeaseUntil, hostId: _hostId, workerId: _workerId, claimTokenHash: _claimTokenHash, ...portable } = job;
202
+ return { ...portable };
203
+ }
204
+ async function writeArchivedLocalJobs(workspace, jobs, executionFilesByJobId) {
68
205
  if (jobs.length === 0) {
69
206
  return;
70
207
  }
@@ -76,14 +213,18 @@ export async function saveLocalJobs(workspace, jobs) {
76
213
  fs.mkdir(executionFilesDir, { recursive: true }),
77
214
  ]);
78
215
  for (const job of jobs) {
216
+ const sanitizedJob = sanitizeRuntimeJobForArchive(job);
79
217
  const safeJobId = localRecordName(job.id);
80
- const traceSourceFiles = filterArchivedExecutionFiles(completedJobOutputFiles(job));
81
- const outputFiles = selectExecutionOutputFilesForInspection({
82
- purpose: readExecutionPurpose(job),
83
- files: traceSourceFiles,
84
- output: jsonRecord(job.output),
85
- });
86
- await writeJson(path.join(jobsDir, `${safeJobId}.json`), archivedLocalJob(job, outputFiles, traceSourceFiles));
218
+ const explicitOutputFiles = executionFilesByJobId.get(job.id);
219
+ const traceSourceFiles = filterArchivedExecutionFiles(completedJobOutputFiles(sanitizedJob));
220
+ const outputFiles = explicitOutputFiles
221
+ ? copySurfaceFiles(explicitOutputFiles)
222
+ : selectExecutionOutputFilesForInspection({
223
+ purpose: readExecutionPurpose(sanitizedJob),
224
+ files: traceSourceFiles,
225
+ output: jsonRecord(sanitizedJob.output),
226
+ });
227
+ await writeJson(path.join(jobsDir, `${safeJobId}.json`), archivedLocalJob(sanitizedJob, outputFiles, traceSourceFiles.length > 0 ? traceSourceFiles : outputFiles));
87
228
  const filesRoot = path.join(executionFilesDir, safeJobId);
88
229
  await fs.rm(filesRoot, { force: true, recursive: true });
89
230
  await writeSurfaceFiles(filesRoot, outputFiles);
@@ -92,17 +233,17 @@ export async function saveLocalJobs(workspace, jobs) {
92
233
  export async function readLocalExecutionFiles(workspace, jobId) {
93
234
  return await readSurfaceFiles(path.join(localRuntimeDir(workspace), "execution-files", localRecordName(jobId)));
94
235
  }
95
- export async function readLocalSubjectRecord(workspace, subjectId) {
96
- const subject = await readJson(path.join(localRuntimeDir(workspace), "subjects", localRecordName(subjectId), "record.json"), null);
97
- if (!subject) {
98
- throw new Error(`Subject not found: ${subjectId}`);
236
+ export async function readLocalCandidateRecord(workspace, candidateId) {
237
+ const candidate = await readJson(path.join(localRuntimeDir(workspace), CANDIDATE_RECORDS_DIR, localRecordName(candidateId), "record.json"), null);
238
+ if (!candidate) {
239
+ throw new Error(`Candidate not found: ${candidateId}`);
99
240
  }
100
- validateSubjectRecord(subject);
101
- return subject;
241
+ validateCandidateRecord(candidate);
242
+ return candidate;
102
243
  }
103
- export async function readLocalSubjectFilesForId(workspace, subjectId) {
104
- await readLocalSubjectRecord(workspace, subjectId);
105
- return await readSurfaceFiles(path.join(localRuntimeDir(workspace), "subjects", localRecordName(subjectId), "files"));
244
+ export async function readLocalCandidateFilesForId(workspace, candidateId) {
245
+ await readLocalCandidateRecord(workspace, candidateId);
246
+ return await readSurfaceFiles(path.join(localRuntimeDir(workspace), CANDIDATE_RECORDS_DIR, localRecordName(candidateId), "files"));
106
247
  }
107
248
  export async function readLocalEvaluationRecord(workspace, evaluationId) {
108
249
  const evaluation = await readJson(path.join(localRuntimeDir(workspace), "evaluations", `${localRecordName(evaluationId)}.json`), null);
@@ -131,16 +272,16 @@ export async function readLocalRunJobs(workspace, runId) {
131
272
  export async function readLocalJobInRun(workspace, runId, jobId) {
132
273
  return (await readLocalRunJobs(workspace, runId)).find((job) => job.id === jobId) ?? null;
133
274
  }
134
- export function upsertLocalSubject(snapshot, subject, files) {
275
+ export function upsertLocalCandidate(snapshot, candidate, files) {
135
276
  return {
136
277
  ...snapshot,
137
- subjects: [
138
- ...snapshot.subjects.filter((entry) => entry.id !== subject.id),
139
- subject,
140
- ].sort((left, right) => left.ordinal - right.ordinal || left.id.localeCompare(right.id)),
141
- subjectFiles: {
142
- ...snapshot.subjectFiles,
143
- [subject.id]: files.map((file) => ({ ...file })),
278
+ candidates: [
279
+ ...snapshot.candidates.filter((entry) => entry.id !== candidate.id),
280
+ candidate,
281
+ ].sort(compareLocalCandidateRecords),
282
+ candidateFiles: {
283
+ ...snapshot.candidateFiles,
284
+ [candidate.id]: files.map((file) => ({ ...file })),
144
285
  },
145
286
  };
146
287
  }
@@ -153,7 +294,7 @@ export function upsertLocalEvaluation(snapshot, evaluation) {
153
294
  ].sort((left, right) => left.createdAt.localeCompare(right.createdAt) || left.id.localeCompare(right.id)),
154
295
  };
155
296
  }
156
- export function appendLocalRun(snapshot, run, events) {
297
+ export function upsertLocalRun(snapshot, run, events) {
157
298
  return {
158
299
  ...snapshot,
159
300
  runs: [
@@ -172,66 +313,132 @@ export function setLocalActive(snapshot, activeId) {
172
313
  activeId,
173
314
  };
174
315
  }
175
- export function readLocalSubject(snapshot, subjectId) {
176
- const subject = snapshot.subjects.find((entry) => entry.id === subjectId);
177
- if (!subject) {
178
- throw new Error(`Subject not found: ${subjectId}`);
316
+ export function readLocalCandidate(snapshot, candidateId) {
317
+ const candidate = snapshot.candidates.find((entry) => entry.id === candidateId);
318
+ if (!candidate) {
319
+ throw new Error(`Candidate not found: ${candidateId}`);
179
320
  }
180
- return subject;
321
+ return candidate;
181
322
  }
182
- export function readLocalSubjectFiles(snapshot, subjectId) {
183
- readLocalSubject(snapshot, subjectId);
184
- return (snapshot.subjectFiles[subjectId] ?? []).map((file) => ({ ...file }));
323
+ export function readLocalCandidateFiles(snapshot, candidateId) {
324
+ readLocalCandidate(snapshot, candidateId);
325
+ return (snapshot.candidateFiles[candidateId] ?? []).map((file) => ({ ...file }));
185
326
  }
186
327
  function validateLocalArchiveSnapshot(snapshot) {
187
328
  validateLocalArchiveIndex(snapshot);
188
329
  }
330
+ function validateRuntimeBundleSchema(bundle) {
331
+ if (!bundle || bundle.schema !== "workbench.runtime.bundle.v1") {
332
+ throw new Error("Unsupported Workbench runtime bundle.");
333
+ }
334
+ }
335
+ function mergeRecordsById(existing, incoming, idFor, markChanged, equal = runtimeRecordsEqual) {
336
+ const records = new Map();
337
+ for (const record of existing) {
338
+ records.set(localRecordName(idFor(record)), record);
339
+ }
340
+ for (const record of incoming) {
341
+ const id = localRecordName(idFor(record));
342
+ const previous = records.get(id);
343
+ if (!previous || !equal(previous, record)) {
344
+ if (previous) {
345
+ throw new Error(`Runtime history conflict for id ${id}.`);
346
+ }
347
+ markChanged(true);
348
+ }
349
+ records.set(id, record);
350
+ }
351
+ return [...records.values()];
352
+ }
353
+ function runtimeRecordsEqual(left, right) {
354
+ return JSON.stringify(canonicalRuntimeJson(left)) ===
355
+ JSON.stringify(canonicalRuntimeJson(right));
356
+ }
357
+ function runtimeJobsEqualForExchange(left, right) {
358
+ return runtimeRecordsEqual(runtimeComparableJob(left), runtimeComparableJob(right));
359
+ }
360
+ function runtimeComparableJob(job) {
361
+ const comparable = sanitizeRuntimeJobForExchange(job);
362
+ const output = comparable.output;
363
+ if (!output || typeof output !== "object" || Array.isArray(output)) {
364
+ return comparable;
365
+ }
366
+ const { files: _files, fileSet: _fileSet, ...portableOutput } = output;
367
+ return {
368
+ ...comparable,
369
+ output: portableOutput,
370
+ };
371
+ }
372
+ function canonicalRuntimeJson(value) {
373
+ if (Array.isArray(value)) {
374
+ return value.map(canonicalRuntimeJson);
375
+ }
376
+ if (value && typeof value === "object") {
377
+ return Object.fromEntries(Object.keys(value)
378
+ .sort()
379
+ .map((key) => [key, canonicalRuntimeJson(value[key])]));
380
+ }
381
+ return value;
382
+ }
383
+ function runtimeEventKey(event) {
384
+ return [
385
+ event.runId ?? "_",
386
+ event.jobId ?? "_",
387
+ event.at,
388
+ event.id,
389
+ ].join("#");
390
+ }
391
+ function copySurfaceFiles(files) {
392
+ return files.map((file) => ({ ...file }));
393
+ }
189
394
  function validateLocalArchiveIndex(snapshot) {
190
- const subjectIds = new Set(snapshot.subjects.map((subject) => subject.id));
191
- if (snapshot.activeId && !subjectIds.has(snapshot.activeId)) {
192
- throw new Error(`Active subject not found: ${snapshot.activeId}`);
193
- }
194
- for (const subject of snapshot.subjects) {
195
- validateSubjectRecord(subject);
196
- if (!Array.isArray(subject.referenceIds)) {
197
- throw new Error(`subject ${subject.id}.referenceIds must be an array.`);
395
+ const candidateIds = new Set(snapshot.candidates.map((candidate) => candidate.id));
396
+ if (snapshot.activeId && !candidateIds.has(snapshot.activeId)) {
397
+ throw new Error(`Active candidate not found: ${snapshot.activeId}`);
398
+ }
399
+ for (const candidate of snapshot.candidates) {
400
+ validateCandidateRecord(candidate);
401
+ if (!Array.isArray(candidate.referenceIds)) {
402
+ throw new Error(`candidate ${candidate.id}.referenceIds must be an array.`);
198
403
  }
199
- if (!Array.isArray(subject.fileChanges)) {
200
- throw new Error(`subject ${subject.id}.fileChanges must be an array.`);
404
+ if (!Array.isArray(candidate.fileChanges)) {
405
+ throw new Error(`candidate ${candidate.id}.fileChanges must be an array.`);
201
406
  }
202
- if (subject.baseId && !subjectIds.has(subject.baseId)) {
203
- throw new Error(`subject ${subject.id}.baseId not found: ${subject.baseId}`);
407
+ if (candidate.baseId && !candidateIds.has(candidate.baseId)) {
408
+ throw new Error(`candidate ${candidate.id}.baseId not found: ${candidate.baseId}`);
204
409
  }
205
410
  }
206
411
  for (const evaluation of snapshot.evaluations) {
207
412
  validateEvaluationRecord(evaluation);
208
- const subject = snapshot.subjects.find((entry) => entry.id === evaluation.subjectId);
209
- if (!subject) {
210
- throw new Error(`evaluation ${evaluation.id}.subjectId not found: ${evaluation.subjectId}`);
413
+ const candidate = snapshot.candidates.find((entry) => entry.id === evaluation.candidateId);
414
+ if (!candidate) {
415
+ throw new Error(`evaluation ${evaluation.id}.candidateId not found: ${evaluation.candidateId}`);
211
416
  }
212
- if (subject.benchmarkFingerprint !== evaluation.benchmarkFingerprint) {
213
- throw new Error(`evaluation ${evaluation.id}.benchmarkFingerprint does not match subject ${subject.id}.`);
417
+ if (candidate.benchmarkFingerprint !== evaluation.benchmarkFingerprint) {
418
+ throw new Error(`evaluation ${evaluation.id}.benchmarkFingerprint does not match candidate ${candidate.id}.`);
214
419
  }
215
- if (subject.subjectFingerprint !== evaluation.subjectFingerprint) {
216
- throw new Error(`evaluation ${evaluation.id}.subjectFingerprint does not match subject ${subject.id}.`);
420
+ if (candidate.candidateFingerprint !== evaluation.candidateFingerprint) {
421
+ throw new Error(`evaluation ${evaluation.id}.candidateFingerprint does not match candidate ${candidate.id}.`);
217
422
  }
218
423
  }
219
424
  for (const run of snapshot.runs) {
220
425
  validateRunRecord(run);
221
426
  }
222
427
  }
223
- function validateSubjectRecord(subject) {
224
- requireArchiveString(subject.id, "subject.id");
225
- requireArchiveString(subject.benchmarkFingerprint, `subject ${subject.id}.benchmarkFingerprint`);
226
- requireArchiveString(subject.subjectFingerprint, `subject ${subject.id}.subjectFingerprint`);
227
- requireArchiveString(subject.createdAt, `subject ${subject.id}.createdAt`);
428
+ function validateCandidateRecord(candidate) {
429
+ requireArchiveString(candidate.id, "candidate.id");
430
+ requireArchivePositiveInteger(candidate.version, `candidate ${candidate.id}.version`);
431
+ requireArchivePositiveInteger(candidate.ordinal, `candidate ${candidate.id}.ordinal`);
432
+ requireArchiveString(candidate.benchmarkFingerprint, `candidate ${candidate.id}.benchmarkFingerprint`);
433
+ requireArchiveString(candidate.candidateFingerprint, `candidate ${candidate.id}.candidateFingerprint`);
434
+ requireArchiveString(candidate.createdAt, `candidate ${candidate.id}.createdAt`);
228
435
  }
229
436
  function validateEvaluationRecord(evaluation) {
230
437
  requireArchiveString(evaluation.id, "evaluation.id");
231
438
  requireArchiveString(evaluation.runId, `evaluation ${evaluation.id}.runId`);
232
439
  requireArchiveString(evaluation.benchmarkFingerprint, `evaluation ${evaluation.id}.benchmarkFingerprint`);
233
- requireArchiveString(evaluation.subjectFingerprint, `evaluation ${evaluation.id}.subjectFingerprint`);
234
- requireArchiveString(evaluation.subjectId, `evaluation ${evaluation.id}.subjectId`);
440
+ requireArchiveString(evaluation.candidateFingerprint, `evaluation ${evaluation.id}.candidateFingerprint`);
441
+ requireArchiveString(evaluation.candidateId, `evaluation ${evaluation.id}.candidateId`);
235
442
  }
236
443
  function validateRunRecord(run) {
237
444
  requireArchiveString(run.id, "run.id");
@@ -245,18 +452,53 @@ function requireArchiveString(value, label) {
245
452
  throw new Error(`${label} must be a non-empty string.`);
246
453
  }
247
454
  }
455
+ function requireArchivePositiveInteger(value, label) {
456
+ if (typeof value !== "number" || !Number.isSafeInteger(value) || value <= 0) {
457
+ throw new Error(`${label} must be a positive integer.`);
458
+ }
459
+ }
460
+ function compareLocalCandidateRecords(left, right) {
461
+ return left.version - right.version ||
462
+ left.createdAt.localeCompare(right.createdAt) ||
463
+ left.id.localeCompare(right.id);
464
+ }
248
465
  function archivedLocalJob(job, outputFiles, traceSourceFiles) {
249
466
  const output = jsonRecord(job.output);
250
- const traceSessions = buildLocalJobTraceSessions(job, traceSourceFiles);
467
+ const existingTrace = readExistingTrace(job);
468
+ const existingTraceSessions = readExistingTraceSessions(job);
469
+ const traceSessions = existingTraceSessions.length > 0
470
+ ? existingTraceSessions
471
+ : buildLocalJobTraceSessions(job, traceSourceFiles);
251
472
  return {
252
473
  ...job,
253
474
  ...(Object.keys(output).length > 0
254
- ? { output: { ...output, files: outputFiles } }
475
+ ? { output: { ...output, files: traceSourceFiles } }
255
476
  : {}),
256
- trace: buildLocalJobTrace(job),
477
+ trace: existingTrace ?? buildLocalJobTrace(job),
257
478
  traceSessions,
258
479
  };
259
480
  }
481
+ function readExistingTrace(job) {
482
+ const trace = job.trace;
483
+ if (!trace || typeof trace !== "object" || Array.isArray(trace)) {
484
+ return null;
485
+ }
486
+ return {
487
+ trace_id: typeof trace.trace_id === "string" && trace.trace_id.length > 0
488
+ ? trace.trace_id
489
+ : job.id,
490
+ spans: Array.isArray(trace.spans) ? trace.spans : [],
491
+ events: Array.isArray(trace.events) ? trace.events : [],
492
+ summaries: Array.isArray(trace.summaries) ? trace.summaries : [],
493
+ };
494
+ }
495
+ function readExistingTraceSessions(job) {
496
+ const sessions = job.traceSessions;
497
+ if (!Array.isArray(sessions)) {
498
+ return [];
499
+ }
500
+ return sessions.map((session) => ({ ...session }));
501
+ }
260
502
  function filterArchivedExecutionFiles(files) {
261
503
  return files.filter((file) => file.path.startsWith(".workbench/traces/") ||
262
504
  !isWorkbenchReservedArchivePath(file.path));
@@ -266,7 +508,7 @@ function isWorkbenchReservedArchivePath(filePath) {
266
508
  }
267
509
  function buildLocalJobTrace(job) {
268
510
  const purpose = readExecutionPurpose(job);
269
- const role = purpose === "improve" ? "optimizer" : "engine";
511
+ const role = purpose === "improve" ? "improver" : "engine";
270
512
  const stageId = purpose ?? "execution";
271
513
  const status = traceStatusForJob(job.status);
272
514
  const startedAt = job.startedAt ?? job.createdAt;
@@ -358,7 +600,7 @@ function buildLocalJobTraceSessions(job, outputFiles) {
358
600
  job,
359
601
  files: outputFiles,
360
602
  purpose,
361
- fallbackRole: purpose === "improve" ? "optimizer" : "engine",
603
+ fallbackRole: purpose === "improve" ? "improver" : "engine",
362
604
  });
363
605
  }
364
606
  function completedJobOutputFiles(job) {
@@ -439,8 +681,8 @@ function traceUsageSummary(value) {
439
681
  const record = jsonRecord(value);
440
682
  const usage = Object.keys(jsonRecord(record.total)).length > 0
441
683
  ? jsonRecord(record.total)
442
- : Object.keys(jsonRecord(record.optimizer)).length > 0
443
- ? jsonRecord(record.optimizer)
684
+ : Object.keys(jsonRecord(record.improver)).length > 0
685
+ ? jsonRecord(record.improver)
444
686
  : Object.keys(jsonRecord(record.runner)).length > 0
445
687
  ? jsonRecord(record.runner)
446
688
  : Object.keys(jsonRecord(record.engine)).length > 0
@@ -488,8 +730,8 @@ function localRecordName(value) {
488
730
  }
489
731
  return value;
490
732
  }
491
- export async function materializeSubjectRoot(workspace, subjectRoot, files) {
492
- const root = path.join(workspace, normalizeRelativePath(subjectRoot));
733
+ export async function materializeCandidateRoot(workspace, candidateRoot, files) {
734
+ const root = path.join(workspace, normalizeRelativePath(candidateRoot));
493
735
  const before = new Set((await readSurfaceFiles(root)).map((file) => file.path));
494
736
  await fs.rm(root, { force: true, recursive: true });
495
737
  await writeSurfaceFiles(root, files);
@@ -3,9 +3,8 @@ import { type WorkbenchEngineCase, type WorkbenchEngineResolveResult } from "@wo
3
3
  import { type WorkspaceSnapshotFile } from "./workspace-snapshot.js";
4
4
  import { type ResolvedWorkbenchAdapter } from "./adapter-project.js";
5
5
  export declare const WORKBENCH_BENCHMARK_FILE = "benchmark.yaml";
6
- export declare const WORKBENCH_SUBJECTS_DIR = "subjects";
7
- export declare const WORKBENCH_OPTIMIZERS_DIR = "optimizers";
8
- export declare const WORKBENCH_SUBJECT_FILE = "subject.yaml";
6
+ export declare const WORKBENCH_CANDIDATES_DIR = "candidates";
7
+ export declare const WORKBENCH_CANDIDATE_FILE = "candidate.yaml";
9
8
  export type HostedFile = WorkspaceSnapshotFile;
10
9
  export interface LocalProjectSource {
11
10
  dir: string;
@@ -14,20 +13,20 @@ export interface LocalProjectSource {
14
13
  spec: ReturnType<typeof resolveWorkbenchResolvedSourceYaml>;
15
14
  benchmarkPath: string;
16
15
  benchmarkSource: string;
17
- subjectName: string;
18
- subjectDir: string;
19
- subjectFilesPath: string;
20
- subjectSpecPath: string;
21
- subjectSource: string;
22
- optimizerPath?: string;
23
- optimizerSource?: string;
16
+ candidateName: string;
17
+ candidateDir: string;
18
+ candidateFilesPath: string;
19
+ candidateSpecPath: string;
20
+ candidateSource: string;
21
+ candidateRunId: string;
22
+ candidateRunIds: string[];
24
23
  benchmarkAdapterSources: string[];
25
24
  benchmarkAdapterIds: string[];
26
25
  dockerfilePath: string;
27
26
  dockerfile: string;
28
27
  runtimeDockerfile: string;
29
28
  dockerfileFiles: HostedFile[];
30
- subjectFiles: HostedFile[];
29
+ candidateFiles: HostedFile[];
31
30
  engineResolveFiles: HostedFile[];
32
31
  adapters: ResolvedWorkbenchAdapter[];
33
32
  adapterFiles: HostedFile[];
@@ -44,11 +43,9 @@ export interface LocalAuthoredProjectSource {
44
43
  specSource: string;
45
44
  benchmarkPath: string;
46
45
  benchmarkSource: string;
47
- subjectDir: string;
48
- subjectSpecPath: string;
49
- subjectSource: string;
50
- optimizerPath?: string;
51
- optimizerSource?: string;
46
+ candidateDir: string;
47
+ candidateSpecPath: string;
48
+ candidateSource: string;
52
49
  sourceFiles: SurfaceSnapshotFile[];
53
50
  }
54
51
  export interface LocalEngineResolveInvocation {
@@ -57,7 +54,7 @@ export interface LocalEngineResolveInvocation {
57
54
  auth?: Json;
58
55
  }
59
56
  interface LocalProjectSourceOptions {
60
- optimizerPath?: string;
57
+ runId?: string;
61
58
  }
62
59
  export declare function readLocalProjectSource(source: string, options?: LocalProjectSourceOptions): Promise<LocalProjectSource>;
63
60
  export declare function readLocalAuthoredProjectSource(source: string, options?: LocalProjectSourceOptions): Promise<LocalAuthoredProjectSource>;
@@ -1 +1 @@
1
- {"version":3,"file":"project-source.d.ts","sourceRoot":"","sources":["../src/project-source.ts"],"names":[],"mappings":"AAKA,OAAO,EAML,kCAAkC,EAGlC,KAAK,IAAI,EACT,KAAK,mBAAmB,EACzB,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAQL,KAAK,mBAAmB,EACxB,KAAK,4BAA4B,EAClC,MAAM,kCAAkC,CAAC;AAE1C,OAAO,EAGL,KAAK,qBAAqB,EAC3B,MAAM,yBAAyB,CAAC;AACjC,OAAO,EAML,KAAK,wBAAwB,EAC9B,MAAM,sBAAsB,CAAC;AAI9B,eAAO,MAAM,wBAAwB,mBAAsB,CAAC;AAC5D,eAAO,MAAM,sBAAsB,aAAa,CAAC;AACjD,eAAO,MAAM,wBAAwB,eAAe,CAAC;AACrD,eAAO,MAAM,sBAAsB,iBAAiB,CAAC;AAErD,MAAM,MAAM,UAAU,GAAG,qBAAqB,CAAC;AAE/C,MAAM,WAAW,kBAAkB;IACjC,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,IAAI,EAAE,UAAU,CAAC,OAAO,kCAAkC,CAAC,CAAC;IAC5D,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,EAAE,MAAM,CAAC;IACxB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,CAAC;IACzB,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,uBAAuB,EAAE,MAAM,EAAE,CAAC;IAClC,mBAAmB,EAAE,MAAM,EAAE,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,eAAe,EAAE,UAAU,EAAE,CAAC;IAC9B,YAAY,EAAE,UAAU,EAAE,CAAC;IAC3B,kBAAkB,EAAE,UAAU,EAAE,CAAC;IACjC,QAAQ,EAAE,wBAAwB,EAAE,CAAC;IACrC,YAAY,EAAE,UAAU,EAAE,CAAC;IAC3B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,WAAW,EAAE,mBAAmB,EAAE,CAAC;IACnC,aAAa,EAAE,4BAA4B,CAAC;IAC5C,4BAA4B,EAAE,MAAM,CAAC;IACrC,wBAAwB,CAAC,EAAE,4BAA4B,CAAC,aAAa,CAAC,CAAC;IACvE,WAAW,EAAE,mBAAmB,EAAE,CAAC;CACpC;AAED,MAAM,WAAW,0BAA0B;IACzC,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,EAAE,MAAM,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,mBAAmB,EAAE,CAAC;CACpC;AAED,MAAM,WAAW,4BAA4B;IAC3C,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,CAAC,EAAE,IAAI,CAAC;CACb;AAED,UAAU,yBAAyB;IACjC,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAaD,wBAAsB,sBAAsB,CAC1C,MAAM,EAAE,MAAM,EACd,OAAO,GAAE,yBAA8B,GACtC,OAAO,CAAC,kBAAkB,CAAC,CAsH7B;AAED,wBAAsB,8BAA8B,CAClD,MAAM,EAAE,MAAM,EACd,OAAO,GAAE,yBAA8B,GACtC,OAAO,CAAC,0BAA0B,CAAC,CAqCrC"}
1
+ {"version":3,"file":"project-source.d.ts","sourceRoot":"","sources":["../src/project-source.ts"],"names":[],"mappings":"AAKA,OAAO,EAOL,kCAAkC,EAGlC,KAAK,IAAI,EACT,KAAK,mBAAmB,EACzB,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAQL,KAAK,mBAAmB,EACxB,KAAK,4BAA4B,EAClC,MAAM,kCAAkC,CAAC;AAE1C,OAAO,EAGL,KAAK,qBAAqB,EAC3B,MAAM,yBAAyB,CAAC;AACjC,OAAO,EAML,KAAK,wBAAwB,EAC9B,MAAM,sBAAsB,CAAC;AAI9B,eAAO,MAAM,wBAAwB,mBAAsB,CAAC;AAC5D,eAAO,MAAM,wBAAwB,eAAe,CAAC;AACrD,eAAO,MAAM,wBAAwB,mBAAsB,CAAC;AAE5D,MAAM,MAAM,UAAU,GAAG,qBAAqB,CAAC;AAE/C,MAAM,WAAW,kBAAkB;IACjC,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,IAAI,EAAE,UAAU,CAAC,OAAO,kCAAkC,CAAC,CAAC;IAC5D,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,uBAAuB,EAAE,MAAM,EAAE,CAAC;IAClC,mBAAmB,EAAE,MAAM,EAAE,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,eAAe,EAAE,UAAU,EAAE,CAAC;IAC9B,cAAc,EAAE,UAAU,EAAE,CAAC;IAC7B,kBAAkB,EAAE,UAAU,EAAE,CAAC;IACjC,QAAQ,EAAE,wBAAwB,EAAE,CAAC;IACrC,YAAY,EAAE,UAAU,EAAE,CAAC;IAC3B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,WAAW,EAAE,mBAAmB,EAAE,CAAC;IACnC,aAAa,EAAE,4BAA4B,CAAC;IAC5C,4BAA4B,EAAE,MAAM,CAAC;IACrC,wBAAwB,CAAC,EAAE,4BAA4B,CAAC,aAAa,CAAC,CAAC;IACvE,WAAW,EAAE,mBAAmB,EAAE,CAAC;CACpC;AAED,MAAM,WAAW,0BAA0B;IACzC,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,eAAe,EAAE,MAAM,CAAC;IACxB,WAAW,EAAE,mBAAmB,EAAE,CAAC;CACpC;AAED,MAAM,WAAW,4BAA4B;IAC3C,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,CAAC,EAAE,IAAI,CAAC;CACb;AAED,UAAU,yBAAyB;IACjC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAaD,wBAAsB,sBAAsB,CAC1C,MAAM,EAAE,MAAM,EACd,OAAO,GAAE,yBAA8B,GACtC,OAAO,CAAC,kBAAkB,CAAC,CA8G7B;AAED,wBAAsB,8BAA8B,CAClD,MAAM,EAAE,MAAM,EACd,OAAO,GAAE,yBAA8B,GACtC,OAAO,CAAC,0BAA0B,CAAC,CA6BrC"}