@workbench-ai/workbench 0.0.67 → 0.0.69

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dist/dev-open/client.css +387 -287
  2. package/dist/dev-open/client.js +202 -202
  3. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-400-normal.woff +0 -0
  4. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-400-normal.woff2 +0 -0
  5. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-500-normal.woff +0 -0
  6. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-500-normal.woff2 +0 -0
  7. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-600-normal.woff +0 -0
  8. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-600-normal.woff2 +0 -0
  9. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-400-normal.woff +0 -0
  10. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-400-normal.woff2 +0 -0
  11. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-500-normal.woff +0 -0
  12. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-500-normal.woff2 +0 -0
  13. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-600-normal.woff +0 -0
  14. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-600-normal.woff2 +0 -0
  15. package/dist/dev-open/fonts/ibm-plex-mono-latin-400-normal.woff +0 -0
  16. package/dist/dev-open/fonts/ibm-plex-mono-latin-400-normal.woff2 +0 -0
  17. package/dist/dev-open/fonts/ibm-plex-mono-latin-500-normal.woff +0 -0
  18. package/dist/dev-open/fonts/ibm-plex-mono-latin-500-normal.woff2 +0 -0
  19. package/dist/dev-open/fonts/ibm-plex-mono-latin-600-normal.woff +0 -0
  20. package/dist/dev-open/fonts/ibm-plex-mono-latin-600-normal.woff2 +0 -0
  21. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-400-normal.woff +0 -0
  22. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-400-normal.woff2 +0 -0
  23. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-500-normal.woff +0 -0
  24. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-500-normal.woff2 +0 -0
  25. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-600-normal.woff +0 -0
  26. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-600-normal.woff2 +0 -0
  27. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-400-normal.woff +0 -0
  28. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-400-normal.woff2 +0 -0
  29. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-500-normal.woff +0 -0
  30. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-500-normal.woff2 +0 -0
  31. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-600-normal.woff +0 -0
  32. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-600-normal.woff2 +0 -0
  33. package/dist/dev-open/fonts/libre-caslon-display-latin-400-normal.woff +0 -0
  34. package/dist/dev-open/fonts/libre-caslon-display-latin-400-normal.woff2 +0 -0
  35. package/dist/dev-open/fonts/libre-caslon-display-latin-ext-400-normal.woff +0 -0
  36. package/dist/dev-open/fonts/libre-caslon-display-latin-ext-400-normal.woff2 +0 -0
  37. package/dist/index.d.ts +2 -6
  38. package/dist/index.d.ts.map +1 -1
  39. package/dist/index.js +2035 -5100
  40. package/dist/install-targets.d.ts +35 -0
  41. package/dist/install-targets.d.ts.map +1 -0
  42. package/dist/install-targets.js +188 -0
  43. package/dist/open-server.d.ts +12 -0
  44. package/dist/open-server.d.ts.map +1 -0
  45. package/dist/open-server.js +248 -0
  46. package/dist/output.d.ts +22 -0
  47. package/dist/output.d.ts.map +1 -0
  48. package/dist/output.js +38 -0
  49. package/package.json +5 -5
  50. package/dist/adapter-command-env.d.ts +0 -8
  51. package/dist/adapter-command-env.d.ts.map +0 -1
  52. package/dist/adapter-command-env.js +0 -80
  53. package/dist/adapter-project.d.ts +0 -29
  54. package/dist/adapter-project.d.ts.map +0 -1
  55. package/dist/adapter-project.js +0 -332
  56. package/dist/benchmark-fingerprint.d.ts +0 -6
  57. package/dist/benchmark-fingerprint.d.ts.map +0 -1
  58. package/dist/benchmark-fingerprint.js +0 -42
  59. package/dist/command-model.d.ts +0 -5
  60. package/dist/command-model.d.ts.map +0 -1
  61. package/dist/command-model.js +0 -537
  62. package/dist/dev-open-server.d.ts +0 -18
  63. package/dist/dev-open-server.d.ts.map +0 -1
  64. package/dist/dev-open-server.js +0 -297
  65. package/dist/init-scaffold.d.ts +0 -22
  66. package/dist/init-scaffold.d.ts.map +0 -1
  67. package/dist/init-scaffold.js +0 -30
  68. package/dist/init-template-pack.d.ts +0 -19
  69. package/dist/init-template-pack.d.ts.map +0 -1
  70. package/dist/init-template-pack.js +0 -262
  71. package/dist/local-archive.d.ts +0 -48
  72. package/dist/local-archive.d.ts.map +0 -1
  73. package/dist/local-archive.js +0 -838
  74. package/dist/local-inspection.d.ts +0 -9
  75. package/dist/local-inspection.d.ts.map +0 -1
  76. package/dist/local-inspection.js +0 -354
  77. package/dist/project-source.d.ts +0 -63
  78. package/dist/project-source.d.ts.map +0 -1
  79. package/dist/project-source.js +0 -682
  80. package/dist/workspace-snapshot.d.ts +0 -10
  81. package/dist/workspace-snapshot.d.ts.map +0 -1
  82. package/dist/workspace-snapshot.js +0 -81
@@ -1,838 +0,0 @@
1
- import { promises as fs } from "node:fs";
2
- import path from "node:path";
3
- import { buildWorkbenchTraceSessionsFromFiles, candidateRecordWithoutDerivedFields, compactWorkbenchRuntimeJobForExchange, mergeWorkbenchRuntimeCandidateForExchange, sanitizeWorkbenchRuntimeCandidateForExchange, sanitizeWorkbenchRuntimeJobForExchange, selectExecutionOutputFilesForInspection, isSurfaceSnapshotFile, jsonRecord, normalizeRelativePath, readSurfaceFiles, workbenchRuntimeBundleStats, workbenchRuntimeCandidateIdentityForExchange, workbenchRuntimeProjectedActiveId, workbenchSurfaceFilesEqualForExchange, writeSurfaceFiles, } from "@workbench-ai/workbench-core";
4
- const RUNTIME_DIR = ".workbench/runtime";
5
- const CANDIDATE_RECORDS_DIR = "candidates";
6
- export function localRuntimeDir(workspace) {
7
- return path.join(workspace, RUNTIME_DIR);
8
- }
9
- export async function loadLocalArchive(workspace) {
10
- const index = await loadLocalArchiveIndex(workspace);
11
- const root = localRuntimeDir(workspace);
12
- const candidateFiles = {};
13
- await Promise.all(index.candidates.map(async (candidate) => {
14
- candidateFiles[candidate.id] = await readSurfaceFiles(path.join(root, CANDIDATE_RECORDS_DIR, localRecordName(candidate.id), "files"));
15
- }));
16
- const snapshot = {
17
- ...index,
18
- candidateFiles,
19
- };
20
- validateLocalArchiveSnapshot(snapshot);
21
- return snapshot;
22
- }
23
- export async function loadLocalArchiveIndex(workspace) {
24
- const root = localRuntimeDir(workspace);
25
- const [state, candidates, evaluations, runs, events] = await Promise.all([
26
- readJson(path.join(root, "state.json"), {}),
27
- readRecords(path.join(root, CANDIDATE_RECORDS_DIR), "record.json"),
28
- readFlatRecords(path.join(root, "evaluations")),
29
- readFlatRecords(path.join(root, "runs")),
30
- readJson(path.join(root, "events.json"), []),
31
- ]);
32
- const index = {
33
- activeId: typeof state.activeId === "string" ? state.activeId : null,
34
- candidates: candidates.sort(compareLocalCandidateRecords),
35
- evaluations: evaluations.sort((left, right) => left.createdAt.localeCompare(right.createdAt) || left.id.localeCompare(right.id)),
36
- runs: runs.sort((left, right) => left.startedAt.localeCompare(right.startedAt) || left.id.localeCompare(right.id)),
37
- events: events.sort((left, right) => left.at.localeCompare(right.at) || left.id.localeCompare(right.id)),
38
- };
39
- validateLocalArchiveIndex(index);
40
- return index;
41
- }
42
- export async function saveLocalArchive(workspace, snapshot) {
43
- const root = localRuntimeDir(workspace);
44
- await fs.mkdir(root, { recursive: true });
45
- await writeJson(path.join(root, "state.json"), { activeId: snapshot.activeId });
46
- await fs.rm(path.join(root, CANDIDATE_RECORDS_DIR), { force: true, recursive: true });
47
- await fs.rm(path.join(root, "evaluations"), { force: true, recursive: true });
48
- await fs.rm(path.join(root, "runs"), { force: true, recursive: true });
49
- await Promise.all([
50
- fs.mkdir(path.join(root, CANDIDATE_RECORDS_DIR), { recursive: true }),
51
- fs.mkdir(path.join(root, "evaluations"), { recursive: true }),
52
- fs.mkdir(path.join(root, "runs"), { recursive: true }),
53
- ]);
54
- for (const candidate of snapshot.candidates) {
55
- const candidateRoot = path.join(root, CANDIDATE_RECORDS_DIR, candidate.id);
56
- await fs.mkdir(candidateRoot, { recursive: true });
57
- await writeJson(path.join(candidateRoot, "record.json"), candidateRecordWithoutDerivedFields(candidate));
58
- await writeSurfaceFiles(path.join(candidateRoot, "files"), snapshot.candidateFiles[candidate.id] ?? []);
59
- }
60
- for (const evaluation of snapshot.evaluations) {
61
- await writeJson(path.join(root, "evaluations", `${evaluation.id}.json`), evaluation);
62
- }
63
- for (const run of snapshot.runs) {
64
- await writeJson(path.join(root, "runs", `${run.id}.json`), run);
65
- }
66
- await writeJson(path.join(root, "events.json"), snapshot.events);
67
- }
68
- export async function saveLocalJobs(workspace, jobs) {
69
- if (jobs.length === 0) {
70
- return;
71
- }
72
- await writeArchivedLocalJobs(workspace, jobs, new Map());
73
- }
74
- export async function exportLocalRuntimeBundle(workspace, options = {}) {
75
- const snapshot = await loadLocalArchive(workspace);
76
- const archivedJobs = await readLocalJobs(workspace);
77
- const jobs = archivedJobs.map(compactWorkbenchRuntimeJobForExchange);
78
- const executionFiles = (await Promise.all(archivedJobs.map(async (job) => ({
79
- jobId: job.id,
80
- files: await readLocalExecutionFiles(workspace, job.id),
81
- })))).filter((group) => group.files.length > 0);
82
- const activeId = options.currentBenchmarkFingerprint
83
- ? workbenchRuntimeProjectedActiveId({
84
- candidates: snapshot.candidates,
85
- evaluations: snapshot.evaluations,
86
- runs: snapshot.runs,
87
- benchmarkFingerprint: options.currentBenchmarkFingerprint,
88
- })
89
- : snapshot.activeId;
90
- return {
91
- schema: "workbench.runtime.bundle.v1",
92
- activeId,
93
- candidates: snapshot.candidates.map(sanitizeWorkbenchRuntimeCandidateForExchange),
94
- candidateFiles: Object.entries(snapshot.candidateFiles).map(([candidateId, files]) => ({
95
- candidateId,
96
- files: copySurfaceFiles(files),
97
- })),
98
- evaluations: snapshot.evaluations.map((evaluation) => ({ ...evaluation })),
99
- runs: snapshot.runs.map((run) => ({ ...run })),
100
- jobs,
101
- executionFiles,
102
- events: snapshot.events.map((event) => ({ ...event })),
103
- };
104
- }
105
- export async function importLocalRuntimeBundle(workspace, bundle, currentBenchmarkFingerprint) {
106
- validateRuntimeBundleSchema(bundle);
107
- const snapshot = await loadLocalArchive(workspace);
108
- const existingJobs = (await readLocalJobs(workspace)).map(sanitizeRuntimeJobForExchange);
109
- let changed = false;
110
- const existingCandidates = snapshot.candidates.map(sanitizeWorkbenchRuntimeCandidateForExchange);
111
- if (JSON.stringify(existingCandidates) !== JSON.stringify(snapshot.candidates)) {
112
- changed = true;
113
- }
114
- const incomingCandidates = bundle.candidates.map(sanitizeWorkbenchRuntimeCandidateForExchange);
115
- const candidates = mergeRecordsById(existingCandidates, incomingCandidates, (candidate) => candidate.id, (didChange) => {
116
- changed ||= didChange;
117
- }, runtimeCandidatesCompatibleForExchange, mergeWorkbenchRuntimeCandidateForExchange).sort(compareLocalCandidateRecords);
118
- const candidateFiles = { ...snapshot.candidateFiles };
119
- for (const group of bundle.candidateFiles) {
120
- const candidateId = localRecordName(group.candidateId);
121
- const files = copySurfaceFiles(group.files);
122
- const existing = candidateFiles[candidateId];
123
- if (existing) {
124
- if (!workbenchSurfaceFilesEqualForExchange(existing, files)) {
125
- throw new Error(`Runtime history conflict for candidate files ${candidateId}.`);
126
- }
127
- }
128
- else {
129
- changed = true;
130
- }
131
- candidateFiles[candidateId] = files;
132
- }
133
- const evaluations = mergeRecordsById(snapshot.evaluations, bundle.evaluations, (evaluation) => evaluation.id, (didChange) => {
134
- changed ||= didChange;
135
- }, runtimeEvaluationsCompatibleForExchange).sort((left, right) => left.createdAt.localeCompare(right.createdAt) || left.id.localeCompare(right.id));
136
- const runs = mergeRecordsById(snapshot.runs, bundle.runs, (run) => run.id, (didChange) => {
137
- changed ||= didChange;
138
- }, runtimeRunsCompatibleForExchange).sort((left, right) => left.startedAt.localeCompare(right.startedAt) || left.id.localeCompare(right.id));
139
- const events = mergeRecordsById(snapshot.events, bundle.events, runtimeEventKey, (didChange) => {
140
- changed ||= didChange;
141
- }).sort((left, right) => left.at.localeCompare(right.at) || left.id.localeCompare(right.id));
142
- const executionFilesByJobId = new Map();
143
- await Promise.all(existingJobs.map(async (job) => {
144
- executionFilesByJobId.set(job.id, await readLocalExecutionFiles(workspace, job.id));
145
- }));
146
- const existingJobById = new Map(existingJobs.map((job) => [job.id, job]));
147
- const incomingJobById = new Map(bundle.jobs.map(sanitizeRuntimeJobForExchange).map((job) => [job.id, job]));
148
- for (const group of bundle.executionFiles) {
149
- const jobId = localRecordName(group.jobId);
150
- const files = copySurfaceFiles(group.files);
151
- const existing = executionFilesByJobId.get(jobId);
152
- if (existing) {
153
- if (!workbenchSurfaceFilesEqualForExchange(existing, files)) {
154
- const existingJob = existingJobById.get(jobId) ?? null;
155
- const incomingJob = incomingJobById.get(jobId) ?? null;
156
- if (!existingJob || !incomingJob || !runtimeJobsEqualForExchange(existingJob, incomingJob)) {
157
- throw new Error(`Runtime history conflict for execution files ${jobId}.`);
158
- }
159
- changed = true;
160
- }
161
- }
162
- else {
163
- changed = true;
164
- }
165
- executionFilesByJobId.set(jobId, files);
166
- }
167
- const jobs = mergeRecordsById(existingJobs, bundle.jobs.map(sanitizeRuntimeJobForExchange), (job) => job.id, (didChange) => {
168
- changed ||= didChange;
169
- }, runtimeJobsEqualForExchange).sort((left, right) => (left.startedAt ?? left.createdAt).localeCompare(right.startedAt ?? right.createdAt) ||
170
- left.id.localeCompare(right.id));
171
- const activeId = workbenchRuntimeProjectedActiveId({
172
- candidates,
173
- evaluations,
174
- runs,
175
- benchmarkFingerprint: currentBenchmarkFingerprint,
176
- });
177
- if (activeId !== snapshot.activeId) {
178
- changed = true;
179
- }
180
- await saveLocalArchive(workspace, {
181
- activeId,
182
- candidates,
183
- candidateFiles,
184
- evaluations,
185
- runs,
186
- events,
187
- });
188
- await writeArchivedLocalJobs(workspace, jobs, executionFilesByJobId);
189
- return {
190
- changed,
191
- stats: runtimeBundleStats({
192
- schema: "workbench.runtime.bundle.v1",
193
- activeId,
194
- candidates,
195
- candidateFiles: Object.entries(candidateFiles).map(([candidateId, files]) => ({
196
- candidateId,
197
- files,
198
- })),
199
- evaluations,
200
- runs,
201
- jobs,
202
- executionFiles: [...executionFilesByJobId.entries()].map(([jobId, files]) => ({
203
- jobId,
204
- files,
205
- })),
206
- events,
207
- }),
208
- };
209
- }
210
- export function runtimeBundleStats(bundle) {
211
- return workbenchRuntimeBundleStats(bundle);
212
- }
213
- export function sanitizeRuntimeJobForExchange(job) {
214
- return sanitizeWorkbenchRuntimeJobForExchange(job);
215
- }
216
- function sanitizeRuntimeJobForArchive(job) {
217
- const { leaseUntil: _leaseUntil, wakeupLeaseUntil: _wakeupLeaseUntil, hostId: _hostId, workerId: _workerId, claimTokenHash: _claimTokenHash, ...portable } = job;
218
- return { ...portable };
219
- }
220
- async function writeArchivedLocalJobs(workspace, jobs, executionFilesByJobId) {
221
- if (jobs.length === 0) {
222
- return;
223
- }
224
- const root = localRuntimeDir(workspace);
225
- const jobsDir = path.join(root, "jobs");
226
- const executionFilesDir = path.join(root, "execution-files");
227
- await Promise.all([
228
- fs.mkdir(jobsDir, { recursive: true }),
229
- fs.mkdir(executionFilesDir, { recursive: true }),
230
- ]);
231
- for (const job of jobs) {
232
- const sanitizedJob = sanitizeRuntimeJobForArchive(job);
233
- const safeJobId = localRecordName(job.id);
234
- const explicitOutputFiles = executionFilesByJobId.get(job.id);
235
- const traceSourceFiles = filterArchivedExecutionFiles(completedJobOutputFiles(sanitizedJob));
236
- const outputFiles = explicitOutputFiles
237
- ? copySurfaceFiles(explicitOutputFiles)
238
- : selectExecutionOutputFilesForInspection({
239
- purpose: readExecutionPurpose(sanitizedJob),
240
- files: traceSourceFiles,
241
- output: jsonRecord(sanitizedJob.output),
242
- });
243
- await writeJson(path.join(jobsDir, `${safeJobId}.json`), archivedLocalJob(sanitizedJob, outputFiles, traceSourceFiles.length > 0 ? traceSourceFiles : outputFiles));
244
- const filesRoot = path.join(executionFilesDir, safeJobId);
245
- await fs.rm(filesRoot, { force: true, recursive: true });
246
- await writeSurfaceFiles(filesRoot, outputFiles);
247
- }
248
- }
249
- export async function readLocalExecutionFiles(workspace, jobId) {
250
- return selectExecutionOutputFilesForInspection({
251
- purpose: null,
252
- files: await readSurfaceFiles(path.join(localRuntimeDir(workspace), "execution-files", localRecordName(jobId))),
253
- });
254
- }
255
- export async function readLocalCandidateRecord(workspace, candidateId) {
256
- const candidate = await readJson(path.join(localRuntimeDir(workspace), CANDIDATE_RECORDS_DIR, localRecordName(candidateId), "record.json"), null);
257
- if (!candidate) {
258
- throw new Error(`Candidate not found: ${candidateId}`);
259
- }
260
- validateCandidateRecord(candidate);
261
- return candidate;
262
- }
263
- export async function readLocalCandidateFilesForId(workspace, candidateId) {
264
- await readLocalCandidateRecord(workspace, candidateId);
265
- return await readSurfaceFiles(path.join(localRuntimeDir(workspace), CANDIDATE_RECORDS_DIR, localRecordName(candidateId), "files"));
266
- }
267
- export async function readLocalEvaluationRecord(workspace, evaluationId) {
268
- const evaluation = await readJson(path.join(localRuntimeDir(workspace), "evaluations", `${localRecordName(evaluationId)}.json`), null);
269
- if (!evaluation) {
270
- throw new Error(`Evaluation not found: ${evaluationId}`);
271
- }
272
- validateEvaluationRecord(evaluation);
273
- return evaluation;
274
- }
275
- export async function readLocalRunRecord(workspace, runId) {
276
- const run = await readJson(path.join(localRuntimeDir(workspace), "runs", `${localRecordName(runId)}.json`), null);
277
- if (!run) {
278
- throw new Error(`Run not found: ${runId}`);
279
- }
280
- validateRunRecord(run);
281
- return run;
282
- }
283
- export async function readLocalJobs(workspace) {
284
- const jobs = await readFlatRecords(path.join(localRuntimeDir(workspace), "jobs"));
285
- return jobs.sort((left, right) => (left.startedAt ?? left.createdAt).localeCompare(right.startedAt ?? right.createdAt) ||
286
- left.id.localeCompare(right.id));
287
- }
288
- export async function readLocalRunJobs(workspace, runId) {
289
- return (await readLocalJobs(workspace)).filter((job) => job.runId === runId);
290
- }
291
- export async function readLocalJobInRun(workspace, runId, jobId) {
292
- return (await readLocalRunJobs(workspace, runId)).find((job) => job.id === jobId) ?? null;
293
- }
294
- export function upsertLocalCandidate(snapshot, candidate, files) {
295
- return {
296
- ...snapshot,
297
- candidates: [
298
- ...snapshot.candidates.filter((entry) => entry.id !== candidate.id),
299
- candidate,
300
- ].sort(compareLocalCandidateRecords),
301
- candidateFiles: {
302
- ...snapshot.candidateFiles,
303
- [candidate.id]: files.map((file) => ({ ...file })),
304
- },
305
- };
306
- }
307
- export function upsertLocalEvaluation(snapshot, evaluation) {
308
- return {
309
- ...snapshot,
310
- evaluations: [
311
- ...snapshot.evaluations.filter((entry) => entry.id !== evaluation.id),
312
- evaluation,
313
- ].sort((left, right) => left.createdAt.localeCompare(right.createdAt) || left.id.localeCompare(right.id)),
314
- };
315
- }
316
- export function upsertLocalRun(snapshot, run, events) {
317
- return {
318
- ...snapshot,
319
- runs: [
320
- ...snapshot.runs.filter((entry) => entry.id !== run.id),
321
- run,
322
- ].sort((left, right) => left.startedAt.localeCompare(right.startedAt) || left.id.localeCompare(right.id)),
323
- events: [
324
- ...snapshot.events,
325
- ...events,
326
- ].sort((left, right) => left.at.localeCompare(right.at) || left.id.localeCompare(right.id)),
327
- };
328
- }
329
- export function setLocalActive(snapshot, activeId) {
330
- return {
331
- ...snapshot,
332
- activeId,
333
- };
334
- }
335
- export function readLocalCandidate(snapshot, candidateId) {
336
- const candidate = snapshot.candidates.find((entry) => entry.id === candidateId);
337
- if (!candidate) {
338
- throw new Error(`Candidate not found: ${candidateId}`);
339
- }
340
- return candidate;
341
- }
342
- export function readLocalCandidateFiles(snapshot, candidateId) {
343
- readLocalCandidate(snapshot, candidateId);
344
- return (snapshot.candidateFiles[candidateId] ?? []).map((file) => ({ ...file }));
345
- }
346
- function validateLocalArchiveSnapshot(snapshot) {
347
- validateLocalArchiveIndex(snapshot);
348
- }
349
- function validateRuntimeBundleSchema(bundle) {
350
- if (!bundle || bundle.schema !== "workbench.runtime.bundle.v1") {
351
- throw new Error("Unsupported Workbench runtime bundle.");
352
- }
353
- }
354
- function mergeRecordsById(existing, incoming, idFor, markChanged, equal = runtimeRecordsEqual, merge = (_left, right) => right) {
355
- const records = new Map();
356
- for (const record of existing) {
357
- records.set(localRecordName(idFor(record)), record);
358
- }
359
- for (const record of incoming) {
360
- const id = localRecordName(idFor(record));
361
- const previous = records.get(id);
362
- if (!previous) {
363
- markChanged(true);
364
- records.set(id, record);
365
- continue;
366
- }
367
- if (!equal(previous, record)) {
368
- throw new Error(`Runtime history conflict for id ${id}.`);
369
- }
370
- const merged = merge(previous, record);
371
- if (!runtimeRecordsEqual(previous, merged)) {
372
- markChanged(true);
373
- }
374
- records.set(id, merged);
375
- }
376
- return [...records.values()];
377
- }
378
- function runtimeRecordsEqual(left, right) {
379
- return JSON.stringify(canonicalRuntimeJson(left)) ===
380
- JSON.stringify(canonicalRuntimeJson(right));
381
- }
382
- function runtimeJobsEqualForExchange(left, right) {
383
- if (runtimeRecordsEqual(runtimeComparableJob(left), runtimeComparableJob(right))) {
384
- return true;
385
- }
386
- return runtimeRecordsEqual(runtimeJobIdentityForExchange(left), runtimeJobIdentityForExchange(right));
387
- }
388
- function runtimeComparableJob(job) {
389
- const comparable = sanitizeRuntimeJobForExchange(job);
390
- const output = comparable.output;
391
- if (!output || typeof output !== "object" || Array.isArray(output)) {
392
- return comparable;
393
- }
394
- const { files: _files, fileSet: _fileSet, ...portableOutput } = output;
395
- return {
396
- ...comparable,
397
- output: portableOutput,
398
- };
399
- }
400
- function runtimeCandidatesCompatibleForExchange(left, right) {
401
- return runtimeRecordsEqual(workbenchRuntimeCandidateIdentityForExchange(left), workbenchRuntimeCandidateIdentityForExchange(right));
402
- }
403
- function runtimeEvaluationsCompatibleForExchange(left, right) {
404
- if (runtimeRecordsEqual(left, right)) {
405
- return true;
406
- }
407
- return runtimeRecordsEqual(runtimeEvaluationIdentityForExchange(left), runtimeEvaluationIdentityForExchange(right));
408
- }
409
- function runtimeEvaluationIdentityForExchange(evaluation) {
410
- return {
411
- id: evaluation.id,
412
- runId: evaluation.runId,
413
- candidateId: evaluation.candidateId,
414
- candidateVersion: evaluation.candidateVersion,
415
- benchmarkFingerprint: evaluation.benchmarkFingerprint,
416
- candidateFingerprint: evaluation.candidateFingerprint,
417
- };
418
- }
419
- function runtimeRunsCompatibleForExchange(left, right) {
420
- if (runtimeRecordsEqual(left, right)) {
421
- return true;
422
- }
423
- return runtimeRecordsEqual(runtimeRunIdentityForExchange(left), runtimeRunIdentityForExchange(right));
424
- }
425
- function runtimeRunIdentityForExchange(run) {
426
- return {
427
- id: run.id,
428
- workflow: run.workflow,
429
- benchmarkFingerprint: run.benchmarkFingerprint,
430
- candidateId: run.candidateId ?? null,
431
- outputCandidateId: run.outputCandidateId ?? null,
432
- engineRun: run.engineRun,
433
- improver: run.improver,
434
- strategy: run.strategy,
435
- budget: run.budget,
436
- samples: run.samples,
437
- attemptsRequested: run.attemptsRequested,
438
- };
439
- }
440
- function runtimeJobIdentityForExchange(job) {
441
- return {
442
- id: job.id,
443
- runId: job.runId,
444
- candidateId: job.candidateId,
445
- kind: job.kind,
446
- attempt: job.attempt,
447
- };
448
- }
449
- function canonicalRuntimeJson(value) {
450
- if (Array.isArray(value)) {
451
- return value.map(canonicalRuntimeJson);
452
- }
453
- if (value && typeof value === "object") {
454
- return Object.fromEntries(Object.keys(value)
455
- .sort()
456
- .map((key) => [key, canonicalRuntimeJson(value[key])]));
457
- }
458
- return value;
459
- }
460
- function runtimeEventKey(event) {
461
- return [
462
- event.runId ?? "_",
463
- event.jobId ?? "_",
464
- event.at,
465
- event.id,
466
- ].join("#");
467
- }
468
- function copySurfaceFiles(files) {
469
- return files.map((file) => ({ ...file }));
470
- }
471
- function validateLocalArchiveIndex(snapshot) {
472
- const candidateIds = new Set(snapshot.candidates.map((candidate) => candidate.id));
473
- if (snapshot.activeId && !candidateIds.has(snapshot.activeId)) {
474
- throw new Error(`Active candidate not found: ${snapshot.activeId}`);
475
- }
476
- for (const candidate of snapshot.candidates) {
477
- validateCandidateRecord(candidate);
478
- if (!Array.isArray(candidate.referenceIds)) {
479
- throw new Error(`candidate ${candidate.id}.referenceIds must be an array.`);
480
- }
481
- if (!Array.isArray(candidate.fileChanges)) {
482
- throw new Error(`candidate ${candidate.id}.fileChanges must be an array.`);
483
- }
484
- if (candidate.baseId && !candidateIds.has(candidate.baseId)) {
485
- throw new Error(`candidate ${candidate.id}.baseId not found: ${candidate.baseId}`);
486
- }
487
- }
488
- for (const evaluation of snapshot.evaluations) {
489
- validateEvaluationRecord(evaluation);
490
- const candidate = snapshot.candidates.find((entry) => entry.id === evaluation.candidateId);
491
- if (!candidate) {
492
- throw new Error(`evaluation ${evaluation.id}.candidateId not found: ${evaluation.candidateId}`);
493
- }
494
- if (candidate.candidateFingerprint !== evaluation.candidateFingerprint) {
495
- throw new Error(`evaluation ${evaluation.id}.candidateFingerprint does not match candidate ${candidate.id}.`);
496
- }
497
- }
498
- for (const run of snapshot.runs) {
499
- validateRunRecord(run);
500
- }
501
- }
502
- function validateCandidateRecord(candidate) {
503
- requireArchiveString(candidate.id, "candidate.id");
504
- requireArchivePositiveInteger(candidate.version, `candidate ${candidate.id}.version`);
505
- requireArchivePositiveInteger(candidate.ordinal, `candidate ${candidate.id}.ordinal`);
506
- requireArchiveString(candidate.benchmarkFingerprint, `candidate ${candidate.id}.benchmarkFingerprint`);
507
- requireArchiveString(candidate.candidateFingerprint, `candidate ${candidate.id}.candidateFingerprint`);
508
- if (candidate.visibility !== "private" && candidate.visibility !== "public") {
509
- throw new Error(`candidate ${candidate.id}.visibility must be private or public.`);
510
- }
511
- requireArchiveString(candidate.createdAt, `candidate ${candidate.id}.createdAt`);
512
- }
513
- function validateEvaluationRecord(evaluation) {
514
- requireArchiveString(evaluation.id, "evaluation.id");
515
- requireArchiveString(evaluation.runId, `evaluation ${evaluation.id}.runId`);
516
- requireArchiveString(evaluation.benchmarkFingerprint, `evaluation ${evaluation.id}.benchmarkFingerprint`);
517
- requireArchiveString(evaluation.candidateFingerprint, `evaluation ${evaluation.id}.candidateFingerprint`);
518
- requireArchiveString(evaluation.candidateId, `evaluation ${evaluation.id}.candidateId`);
519
- }
520
- function validateRunRecord(run) {
521
- requireArchiveString(run.id, "run.id");
522
- requireArchiveString(run.workflow, `run ${run.id}.workflow`);
523
- requireArchiveString(run.benchmarkFingerprint, `run ${run.id}.benchmarkFingerprint`);
524
- requireArchiveString(run.status, `run ${run.id}.status`);
525
- requireArchiveString(run.startedAt, `run ${run.id}.startedAt`);
526
- }
527
- function requireArchiveString(value, label) {
528
- if (typeof value !== "string" || value.length === 0) {
529
- throw new Error(`${label} must be a non-empty string.`);
530
- }
531
- }
532
- function requireArchivePositiveInteger(value, label) {
533
- if (typeof value !== "number" || !Number.isSafeInteger(value) || value <= 0) {
534
- throw new Error(`${label} must be a positive integer.`);
535
- }
536
- }
537
- function compareLocalCandidateRecords(left, right) {
538
- return left.version - right.version ||
539
- left.createdAt.localeCompare(right.createdAt) ||
540
- left.id.localeCompare(right.id);
541
- }
542
- function archivedLocalJob(job, outputFiles, traceSourceFiles) {
543
- const output = jsonRecord(job.output);
544
- const existingTrace = readExistingTrace(job);
545
- const existingTraceSessions = readExistingTraceSessions(job);
546
- const traceSessions = existingTraceSessions.length > 0
547
- ? existingTraceSessions
548
- : buildLocalJobTraceSessions(job, traceSourceFiles);
549
- return {
550
- ...job,
551
- ...(Object.keys(output).length > 0
552
- ? { output: { ...output, files: traceSourceFiles } }
553
- : {}),
554
- trace: existingTrace ?? buildLocalJobTrace(job),
555
- traceSessions,
556
- };
557
- }
558
- function readExistingTrace(job) {
559
- const trace = job.trace;
560
- if (!trace || typeof trace !== "object" || Array.isArray(trace)) {
561
- return null;
562
- }
563
- return {
564
- trace_id: typeof trace.trace_id === "string" && trace.trace_id.length > 0
565
- ? trace.trace_id
566
- : job.id,
567
- spans: Array.isArray(trace.spans) ? trace.spans : [],
568
- events: Array.isArray(trace.events) ? trace.events : [],
569
- summaries: Array.isArray(trace.summaries) ? trace.summaries : [],
570
- };
571
- }
572
- function readExistingTraceSessions(job) {
573
- const sessions = job.traceSessions;
574
- if (!Array.isArray(sessions)) {
575
- return [];
576
- }
577
- return sessions.map((session) => ({ ...session }));
578
- }
579
- function filterArchivedExecutionFiles(files) {
580
- return files.filter((file) => file.path.startsWith(".workbench/traces/") ||
581
- !isWorkbenchReservedArchivePath(file.path));
582
- }
583
- function isWorkbenchReservedArchivePath(filePath) {
584
- return filePath === ".workbench" || filePath.startsWith(".workbench/");
585
- }
586
- function buildLocalJobTrace(job) {
587
- const purpose = readExecutionPurpose(job);
588
- const role = purpose === "improve" ? "improver" : "engine";
589
- const stageId = purpose ?? "execution";
590
- const status = traceStatusForJob(job.status);
591
- const startedAt = job.startedAt ?? job.createdAt;
592
- const endedAt = job.finishedAt ?? null;
593
- const spanId = "job";
594
- const output = jsonRecord(job.output);
595
- const usage = traceUsageSummary(output.usage);
596
- const events = [
597
- traceEvent({
598
- index: 1,
599
- spanId,
600
- stageId,
601
- kind: "status",
602
- at: startedAt,
603
- message: `${capitalize(role)} job ${status === "completed" ? "completed" : status}.`,
604
- attributes: {
605
- job_id: job.id,
606
- purpose: purpose ?? "unknown",
607
- },
608
- }),
609
- ];
610
- const outputMessage = localJobOutputMessage(job, output);
611
- if (outputMessage) {
612
- events.push(traceEvent({
613
- index: events.length + 1,
614
- spanId,
615
- stageId,
616
- kind: "output",
617
- at: endedAt ?? startedAt,
618
- message: outputMessage,
619
- attributes: {
620
- job_id: job.id,
621
- },
622
- }));
623
- }
624
- if (usage) {
625
- events.push(traceEvent({
626
- index: events.length + 1,
627
- spanId,
628
- stageId,
629
- kind: "usage",
630
- at: endedAt ?? startedAt,
631
- message: usage.total_tokens !== null
632
- ? `Usage recorded: ${usage.total_tokens} token(s).`
633
- : "Usage recorded.",
634
- attributes: {
635
- job_id: job.id,
636
- usage: usage,
637
- },
638
- }));
639
- }
640
- if (job.error) {
641
- events.push(traceEvent({
642
- index: events.length + 1,
643
- spanId,
644
- stageId,
645
- kind: "error",
646
- at: endedAt ?? startedAt,
647
- message: job.error,
648
- attributes: { job_id: job.id },
649
- }));
650
- }
651
- const span = {
652
- id: spanId,
653
- parent_id: null,
654
- attempt_number: Math.max(1, job.attempt || 1),
655
- stage_id: stageId,
656
- stage_run_index: null,
657
- kind: purpose === "attempt" || purpose === "improve" ? "turn" : "stage",
658
- title: `${capitalize(role)} job ${job.id}`,
659
- status,
660
- started_at: startedAt,
661
- ended_at: endedAt,
662
- attributes: {
663
- job_id: job.id,
664
- purpose: purpose ?? "unknown",
665
- },
666
- };
667
- return {
668
- trace_id: `local-${job.id}`,
669
- spans: [span],
670
- events,
671
- summaries: [traceSummary(job, stageId, status, startedAt, endedAt, usage, outputMessage, null)],
672
- };
673
- }
674
- function buildLocalJobTraceSessions(job, outputFiles) {
675
- const purpose = readExecutionPurpose(job);
676
- return buildWorkbenchTraceSessionsFromFiles({
677
- job,
678
- files: outputFiles,
679
- purpose,
680
- fallbackRole: purpose === "improve" ? "improver" : "engine",
681
- });
682
- }
683
- function completedJobOutputFiles(job) {
684
- const output = jsonRecord(job.output);
685
- if (!Array.isArray(output.files)) {
686
- return [];
687
- }
688
- return output.files.filter(isSurfaceSnapshotFile).map((file) => ({ ...file }));
689
- }
690
- function readExecutionPurpose(job) {
691
- const input = jsonRecord(job.input);
692
- return stringValue(jsonRecord(input.execution).purpose);
693
- }
694
- function traceStatusForJob(status) {
695
- if (status === "succeeded")
696
- return "completed";
697
- if (status === "failed")
698
- return "failed";
699
- if (status === "cancelled")
700
- return "canceled";
701
- if (status === "running")
702
- return "running";
703
- return "warning";
704
- }
705
- function localJobOutputMessage(job, output) {
706
- const purpose = readExecutionPurpose(job);
707
- const result = jsonRecord(output.result);
708
- const score = numberValue(result.score);
709
- if (purpose === "attempt" && score !== null) {
710
- const summary = stringValue(result.summary) ?? stringValue(jsonRecord(result.feedback).summary);
711
- return `Attempt produced score ${score}.${summary ? ` ${summary}` : ""}`.trim();
712
- }
713
- const summary = stringValue(output.summary);
714
- return summary ? truncateTraceMessage(summary) : null;
715
- }
716
- function traceSummary(job, stageId, status, startedAt, endedAt, usage, outputMessage, eventCount) {
717
- const durationMs = endedAt && Number.isFinite(Date.parse(endedAt)) && Number.isFinite(Date.parse(startedAt))
718
- ? Math.max(0, Date.parse(endedAt) - Date.parse(startedAt))
719
- : 0;
720
- return {
721
- attempt_number: Math.max(1, job.attempt || 1),
722
- stage_id: stageId,
723
- stage_run_index: null,
724
- status,
725
- started_at: startedAt,
726
- ended_at: endedAt,
727
- duration_ms: durationMs,
728
- tool_call_count: eventCount ?? 0,
729
- input_tokens: usage?.input_tokens ?? null,
730
- output_tokens: usage?.output_tokens ?? null,
731
- usage,
732
- final_output_present: Boolean(outputMessage),
733
- error_message: job.error ?? null,
734
- };
735
- }
736
- function traceEvent(args) {
737
- return {
738
- id: `event-${String(args.index).padStart(3, "0")}`,
739
- span_id: args.spanId,
740
- attempt_number: 1,
741
- stage_id: args.stageId,
742
- stage_run_index: null,
743
- kind: args.kind,
744
- at: args.at,
745
- message: truncateTraceMessage(args.message),
746
- attributes: args.attributes,
747
- };
748
- }
749
- function traceUsageSummary(value) {
750
- const record = jsonRecord(value);
751
- const usage = ["total", "improver", "runner", "engine"]
752
- .map((key) => jsonRecord(record[key]))
753
- .find((entry) => Object.keys(entry).length > 0) ?? record;
754
- if (Object.keys(usage).length === 0) {
755
- return null;
756
- }
757
- return {
758
- provider: stringValue(usage.provider),
759
- model: stringValue(usage.model),
760
- input_tokens: numberValue(usage.inputTokens) ?? numberValue(usage.input_tokens),
761
- uncached_input_tokens: numberValue(usage.uncachedInputTokens) ?? numberValue(usage.uncached_input_tokens),
762
- cached_input_tokens: numberValue(usage.cachedInputTokens) ?? numberValue(usage.cached_input_tokens),
763
- cache_creation_input_tokens: numberValue(usage.cacheCreationInputTokens) ?? numberValue(usage.cache_creation_input_tokens),
764
- cache_read_input_tokens: numberValue(usage.cacheReadInputTokens) ?? numberValue(usage.cache_read_input_tokens),
765
- output_tokens: numberValue(usage.outputTokens) ?? numberValue(usage.output_tokens),
766
- reasoning_output_tokens: numberValue(usage.reasoningOutputTokens) ?? numberValue(usage.reasoning_output_tokens),
767
- total_tokens: numberValue(usage.totalTokens) ?? numberValue(usage.total_tokens),
768
- total_cost_usd: numberValue(usage.costUsd) ?? numberValue(usage.totalCostUsd) ?? numberValue(usage.total_cost_usd),
769
- cost_source: stringValue(usage.costSource) ?? stringValue(usage.cost_source),
770
- pricing_source: stringValue(usage.pricingSource) ?? stringValue(usage.pricing_source),
771
- };
772
- }
773
- function stringValue(value) {
774
- return typeof value === "string" && value.length > 0 ? value : null;
775
- }
776
- function numberValue(value) {
777
- return typeof value === "number" && Number.isFinite(value) ? value : null;
778
- }
779
- function capitalize(value) {
780
- return value.length > 0 ? `${value[0]?.toUpperCase() ?? ""}${value.slice(1)}` : value;
781
- }
782
- function truncateTraceMessage(value) {
783
- return value.length > 500 ? `${value.slice(0, 497)}...` : value;
784
- }
785
- function localRecordName(value) {
786
- if (!value || /[\\/\\\0]/u.test(value)) {
787
- throw new Error(`Unsafe local archive record id: ${value}`);
788
- }
789
- return value;
790
- }
791
- export async function materializeCandidateRoot(workspace, candidateRoot, files) {
792
- const root = path.join(workspace, normalizeRelativePath(candidateRoot));
793
- const before = new Set((await readSurfaceFiles(root)).map((file) => file.path));
794
- await fs.rm(root, { force: true, recursive: true });
795
- await writeSurfaceFiles(root, files);
796
- const after = new Set(files.map((file) => file.path));
797
- return [...new Set([...before, ...after])].sort();
798
- }
799
- export function findArchivedFile(files, filePath) {
800
- const normalized = normalizeRelativePath(filePath);
801
- return files.find((file) => file.path === normalized) ?? null;
802
- }
803
- async function readRecords(root, fileName) {
804
- const entries = await fs.readdir(root, { withFileTypes: true }).catch(() => []);
805
- const records = [];
806
- for (const entry of entries) {
807
- if (!entry.isDirectory()) {
808
- continue;
809
- }
810
- records.push(await readJson(path.join(root, entry.name, fileName), null));
811
- }
812
- return records.filter((entry) => entry != null);
813
- }
814
- async function readFlatRecords(root) {
815
- const entries = await fs.readdir(root, { withFileTypes: true }).catch(() => []);
816
- const records = [];
817
- for (const entry of entries) {
818
- if (entry.isFile() && entry.name.endsWith(".json")) {
819
- records.push(await readJson(path.join(root, entry.name), null));
820
- }
821
- }
822
- return records.filter((entry) => entry != null);
823
- }
824
- async function readJson(filePath, fallback) {
825
- try {
826
- return JSON.parse(await fs.readFile(filePath, "utf8"));
827
- }
828
- catch (error) {
829
- if (error.code === "ENOENT") {
830
- return fallback;
831
- }
832
- throw error;
833
- }
834
- }
835
- async function writeJson(filePath, value) {
836
- await fs.mkdir(path.dirname(filePath), { recursive: true });
837
- await fs.writeFile(filePath, `${JSON.stringify(value, null, 2)}\n`);
838
- }