@workbench-ai/workbench 0.0.50 → 0.0.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark-fingerprint.d.ts +1 -3
- package/dist/benchmark-fingerprint.d.ts.map +1 -1
- package/dist/benchmark-fingerprint.js +19 -87
- package/dist/command-model.d.ts.map +1 -1
- package/dist/command-model.js +37 -418
- package/dist/dev-open/client.css +21 -50
- package/dist/dev-open/client.js +140 -140
- package/dist/dev-open-server.d.ts +3 -0
- package/dist/dev-open-server.d.ts.map +1 -1
- package/dist/dev-open-server.js +40 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +452 -886
- package/dist/local-archive.d.ts +5 -1
- package/dist/local-archive.d.ts.map +1 -1
- package/dist/local-archive.js +332 -10
- package/dist/project-source.d.ts +1 -0
- package/dist/project-source.d.ts.map +1 -1
- package/dist/project-source.js +23 -1
- package/package.json +4 -4
package/dist/local-archive.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type CandidateRecord, type EvaluationScorecard, type HostedWorkbenchJob, type RunSummary, type RuntimeEvent, type SurfaceSnapshotFile, type WorkbenchExecutionTrace, type WorkbenchTraceSession } from "@workbench-ai/workbench-core";
|
|
1
|
+
import { type CandidateRecord, type EvaluationScorecard, type HostedWorkbenchJob, type RunSummary, type RuntimeEvent, type SurfaceSnapshotFile, type WorkbenchRuntimeBundle, type WorkbenchRuntimeBundleStats, type WorkbenchRuntimeImportResult, type WorkbenchExecutionTrace, type WorkbenchTraceSession } from "@workbench-ai/workbench-core";
|
|
2
2
|
export interface LocalArchiveSnapshot {
|
|
3
3
|
activeId: string | null;
|
|
4
4
|
candidates: CandidateRecord[];
|
|
@@ -23,6 +23,10 @@ export declare function loadLocalArchive(workspace: string): Promise<LocalArchiv
|
|
|
23
23
|
export declare function loadLocalArchiveIndex(workspace: string): Promise<LocalArchiveIndex>;
|
|
24
24
|
export declare function saveLocalArchive(workspace: string, snapshot: LocalArchiveSnapshot): Promise<void>;
|
|
25
25
|
export declare function saveLocalJobs(workspace: string, jobs: readonly HostedWorkbenchJob[]): Promise<void>;
|
|
26
|
+
export declare function exportLocalRuntimeBundle(workspace: string): Promise<WorkbenchRuntimeBundle>;
|
|
27
|
+
export declare function importLocalRuntimeBundle(workspace: string, bundle: WorkbenchRuntimeBundle, currentBenchmarkFingerprint: string): Promise<WorkbenchRuntimeImportResult>;
|
|
28
|
+
export declare function runtimeBundleStats(bundle: WorkbenchRuntimeBundle): WorkbenchRuntimeBundleStats;
|
|
29
|
+
export declare function sanitizeRuntimeJobForExchange(job: HostedWorkbenchJob): HostedWorkbenchJob;
|
|
26
30
|
export declare function readLocalExecutionFiles(workspace: string, jobId: string): Promise<SurfaceSnapshotFile[]>;
|
|
27
31
|
export declare function readLocalCandidateRecord(workspace: string, candidateId: string): Promise<CandidateRecord>;
|
|
28
32
|
export declare function readLocalCandidateFilesForId(workspace: string, candidateId: string): Promise<SurfaceSnapshotFile[]>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"local-archive.d.ts","sourceRoot":"","sources":["../src/local-archive.ts"],"names":[],"mappings":"AAGA,OAAO,
|
|
1
|
+
{"version":3,"file":"local-archive.d.ts","sourceRoot":"","sources":["../src/local-archive.ts"],"names":[],"mappings":"AAGA,OAAO,EAQL,KAAK,eAAe,EACpB,KAAK,mBAAmB,EACxB,KAAK,kBAAkB,EACvB,KAAK,UAAU,EACf,KAAK,YAAY,EAEjB,KAAK,mBAAmB,EACxB,KAAK,sBAAsB,EAC3B,KAAK,2BAA2B,EAChC,KAAK,4BAA4B,EACjC,KAAK,uBAAuB,EAC5B,KAAK,qBAAqB,EAC3B,MAAM,8BAA8B,CAAC;AAOtC,MAAM,WAAW,oBAAoB;IACnC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,UAAU,EAAE,eAAe,EAAE,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,mBAAmB,EAAE,CAAC,CAAC;IACtD,WAAW,EAAE,mBAAmB,EAAE,CAAC;IACnC,IAAI,EAAE,UAAU,EAAE,CAAC;IACnB,MAAM,EAAE,YAAY,EAAE,CAAC;CACxB;AAED,MAAM,WAAW,iBAAiB;IAChC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,UAAU,EAAE,eAAe,EAAE,CAAC;IAC9B,WAAW,EAAE,mBAAmB,EAAE,CAAC;IACnC,IAAI,EAAE,UAAU,EAAE,CAAC;IACnB,MAAM,EAAE,YAAY,EAAE,CAAC;CACxB;AAED,MAAM,MAAM,gBAAgB,GAAG,kBAAkB,GAAG;IAClD,KAAK,CAAC,EAAE,uBAAuB,CAAC;IAChC,aAAa,CAAC,EAAE,qBAAqB,EAAE,CAAC;CACzC,CAAC;AASF,wBAAgB,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAEzD;AAED,wBAAsB,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAevF;AAED,wBAAsB,qBAAqB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAkBzF;AAED,wBAAsB,gBAAgB,CACpC,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,oBAAoB,GAC7B,OAAO,CAAC,IAAI,CAAC,CAyBf;AAED,wBAAsB,aAAa,CACjC,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,SAAS,kBAAkB,EAAE,GAClC,OAAO,CAAC,IAAI,CAAC,CAKf;AAED,wBAAsB,wBAAwB,CAC5C,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,sBAAsB,CAAC,CAuBjC;AAED,wBAAsB,wBAAwB,CAC5C,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,sBAAsB,EAC9B,2BAA2B,EAAE,MAAM,GAClC,OAAO,CAAC,4BAA4B,CAAC,CAkHvC;AAED,wBAAgB,kBAAkB,CAChC,MAAM,EAAE,sBAAsB,GAC7B,2BAA2B,CAE7B;AAED,wBAAgB,6BAA6B,CAC3C,GAAG,EAAE,kBAAkB,GACtB,kBAAkB,CAEpB;AAyDD,wBAAsB,uBAAuB,CAC3C,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAIhC;AAED,wBAAsB,wBAAwB,CAC5C,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,eAAe,CAAC,CAU1B;AAED,wBAAsB,4BAA4B,CAChD,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAKhC;AAED,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,MAAM,EACjB,YAAY,EAAE,MAAM,GACnB,OAAO,CAAC,mBAAmB,CAAC,CAU9B;AAED,wBAAsB,kBAAkB,CACtC,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,UAAU,CAAC,CAUrB;AAED,wBAAsB,aAAa,CACjC,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAM7B;AAED,wBAAsB,gBAAgB,CACpC,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAE7B;AAED,wBAAsB,iBAAiB,CACrC,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,CAElC;AAED,wBAAgB,oBAAoB,CAClC,QAAQ,EAAE,oBAAoB,EAC9B,SAAS,EAAE,eAAe,EAC1B,KAAK,EAAE,SAAS,mBAAmB,EAAE,GACpC,oBAAoB,CAYtB;AAED,wBAAgB,qBAAqB,CACnC,QAAQ,EAAE,oBAAoB,EAC9B,UAAU,EAAE,mBAAmB,GAC9B,oBAAoB,CAQtB;AAED,wBAAgB,cAAc,CAC5B,QAAQ,EAAE,oBAAoB,EAC9B,GAAG,EAAE,UAAU,EACf,MAAM,EAAE,SAAS,YAAY,EAAE,GAC9B,oBAAoB,CAYtB;AAED,wBAAgB,cAAc,CAAC,QAAQ,EAAE,oBAAoB,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,GAAG,oBAAoB,CAK5G;AAED,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,oBAAoB,EAAE,WAAW,EAAE,MAAM,GAAG,eAAe,CAMvG;AAED,wBAAgB,uBAAuB,CAAC,QAAQ,EAAE,oBAAoB,EAAE,WAAW,EAAE,MAAM,GAAG,mBAAmB,EAAE,CAGlH;AAkoBD,wBAAsB,wBAAwB,CAC5C,SAAS,EAAE,MAAM,EACjB,aAAa,EAAE,MAAM,EACrB,KAAK,EAAE,SAAS,mBAAmB,EAAE,GACpC,OAAO,CAAC,MAAM,EAAE,CAAC,CAOnB;AAED,wBAAgB,gBAAgB,CAC9B,KAAK,EAAE,SAAS,mBAAmB,EAAE,EACrC,QAAQ,EAAE,MAAM,GACf,mBAAmB,GAAG,IAAI,CAG5B"}
|
package/dist/local-archive.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { promises as fs } from "node:fs";
|
|
2
2
|
import path from "node:path";
|
|
3
|
-
import { buildWorkbenchTraceSessionsFromFiles, candidateRecordWithoutDerivedFields, selectExecutionOutputFilesForInspection, } from "@workbench-ai/workbench-core";
|
|
3
|
+
import { buildWorkbenchTraceSessionsFromFiles, candidateRecordWithoutDerivedFields, sanitizeWorkbenchRuntimeCandidateForExchange, sanitizeWorkbenchRuntimeJobForExchange, selectExecutionOutputFilesForInspection, workbenchRuntimeBundleStats, workbenchSurfaceFilesEqualForExchange, } from "@workbench-ai/workbench-core";
|
|
4
4
|
const RUNTIME_DIR = ".workbench/runtime";
|
|
5
5
|
const CANDIDATE_RECORDS_DIR = "candidates";
|
|
6
6
|
export function localRuntimeDir(workspace) {
|
|
@@ -66,6 +66,146 @@ export async function saveLocalArchive(workspace, snapshot) {
|
|
|
66
66
|
await writeJson(path.join(root, "events.json"), snapshot.events);
|
|
67
67
|
}
|
|
68
68
|
export async function saveLocalJobs(workspace, jobs) {
|
|
69
|
+
if (jobs.length === 0) {
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
await writeArchivedLocalJobs(workspace, jobs, new Map());
|
|
73
|
+
}
|
|
74
|
+
export async function exportLocalRuntimeBundle(workspace) {
|
|
75
|
+
const snapshot = await loadLocalArchive(workspace);
|
|
76
|
+
const jobs = (await readLocalJobs(workspace)).map(sanitizeRuntimeJobForExchange);
|
|
77
|
+
const executionFiles = await Promise.all(jobs.map(async (job) => ({
|
|
78
|
+
jobId: job.id,
|
|
79
|
+
files: await readLocalExecutionFiles(workspace, job.id),
|
|
80
|
+
})));
|
|
81
|
+
return {
|
|
82
|
+
schema: "workbench.runtime.bundle.v1",
|
|
83
|
+
activeId: snapshot.activeId,
|
|
84
|
+
candidates: snapshot.candidates.map(sanitizeWorkbenchRuntimeCandidateForExchange),
|
|
85
|
+
candidateFiles: Object.entries(snapshot.candidateFiles).map(([candidateId, files]) => ({
|
|
86
|
+
candidateId,
|
|
87
|
+
files: copySurfaceFiles(files),
|
|
88
|
+
})),
|
|
89
|
+
evaluations: snapshot.evaluations.map((evaluation) => ({ ...evaluation })),
|
|
90
|
+
runs: snapshot.runs.map((run) => ({ ...run })),
|
|
91
|
+
jobs,
|
|
92
|
+
executionFiles,
|
|
93
|
+
events: snapshot.events.map((event) => ({ ...event })),
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
export async function importLocalRuntimeBundle(workspace, bundle, currentBenchmarkFingerprint) {
|
|
97
|
+
validateRuntimeBundleSchema(bundle);
|
|
98
|
+
const snapshot = await loadLocalArchive(workspace);
|
|
99
|
+
const existingJobs = (await readLocalJobs(workspace)).map(sanitizeRuntimeJobForExchange);
|
|
100
|
+
let changed = false;
|
|
101
|
+
const existingCandidates = snapshot.candidates.map(sanitizeWorkbenchRuntimeCandidateForExchange);
|
|
102
|
+
if (JSON.stringify(existingCandidates) !== JSON.stringify(snapshot.candidates)) {
|
|
103
|
+
changed = true;
|
|
104
|
+
}
|
|
105
|
+
const incomingCandidates = bundle.candidates.map(sanitizeWorkbenchRuntimeCandidateForExchange);
|
|
106
|
+
const candidates = mergeRecordsById(existingCandidates, incomingCandidates, (candidate) => candidate.id, (didChange) => {
|
|
107
|
+
changed ||= didChange;
|
|
108
|
+
}, runtimeCandidatesCompatibleForExchange, mergeRuntimeCandidateForExchange).sort(compareLocalCandidateRecords);
|
|
109
|
+
const candidateFiles = { ...snapshot.candidateFiles };
|
|
110
|
+
for (const group of bundle.candidateFiles) {
|
|
111
|
+
const candidateId = localRecordName(group.candidateId);
|
|
112
|
+
const files = copySurfaceFiles(group.files);
|
|
113
|
+
const existing = candidateFiles[candidateId];
|
|
114
|
+
if (existing) {
|
|
115
|
+
if (!workbenchSurfaceFilesEqualForExchange(existing, files)) {
|
|
116
|
+
throw new Error(`Runtime history conflict for candidate files ${candidateId}.`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
else {
|
|
120
|
+
changed = true;
|
|
121
|
+
}
|
|
122
|
+
candidateFiles[candidateId] = files;
|
|
123
|
+
}
|
|
124
|
+
const activeId = compatibleRuntimeActiveCandidateId(candidates, bundle.activeId ?? null, currentBenchmarkFingerprint) ??
|
|
125
|
+
compatibleRuntimeActiveCandidateId(candidates, snapshot.activeId, currentBenchmarkFingerprint) ??
|
|
126
|
+
latestCompatibleRuntimeCandidateId(candidates, currentBenchmarkFingerprint);
|
|
127
|
+
if (activeId !== snapshot.activeId) {
|
|
128
|
+
changed = true;
|
|
129
|
+
}
|
|
130
|
+
const evaluations = mergeRecordsById(snapshot.evaluations, bundle.evaluations, (evaluation) => evaluation.id, (didChange) => {
|
|
131
|
+
changed ||= didChange;
|
|
132
|
+
}, runtimeEvaluationsCompatibleForExchange).sort((left, right) => left.createdAt.localeCompare(right.createdAt) || left.id.localeCompare(right.id));
|
|
133
|
+
const runs = mergeRecordsById(snapshot.runs, bundle.runs, (run) => run.id, (didChange) => {
|
|
134
|
+
changed ||= didChange;
|
|
135
|
+
}, runtimeRunsCompatibleForExchange).sort((left, right) => left.startedAt.localeCompare(right.startedAt) || left.id.localeCompare(right.id));
|
|
136
|
+
const events = mergeRecordsById(snapshot.events, bundle.events, runtimeEventKey, (didChange) => {
|
|
137
|
+
changed ||= didChange;
|
|
138
|
+
}).sort((left, right) => left.at.localeCompare(right.at) || left.id.localeCompare(right.id));
|
|
139
|
+
const executionFilesByJobId = new Map();
|
|
140
|
+
await Promise.all(existingJobs.map(async (job) => {
|
|
141
|
+
executionFilesByJobId.set(job.id, await readLocalExecutionFiles(workspace, job.id));
|
|
142
|
+
}));
|
|
143
|
+
const existingJobById = new Map(existingJobs.map((job) => [job.id, job]));
|
|
144
|
+
const incomingJobById = new Map(bundle.jobs.map(sanitizeRuntimeJobForExchange).map((job) => [job.id, job]));
|
|
145
|
+
for (const group of bundle.executionFiles) {
|
|
146
|
+
const jobId = localRecordName(group.jobId);
|
|
147
|
+
const files = copySurfaceFiles(group.files);
|
|
148
|
+
const existing = executionFilesByJobId.get(jobId);
|
|
149
|
+
if (existing) {
|
|
150
|
+
if (!workbenchSurfaceFilesEqualForExchange(existing, files)) {
|
|
151
|
+
const existingJob = existingJobById.get(jobId) ?? null;
|
|
152
|
+
const incomingJob = incomingJobById.get(jobId) ?? null;
|
|
153
|
+
if (!existingJob || !incomingJob || !runtimeJobsEqualForExchange(existingJob, incomingJob)) {
|
|
154
|
+
throw new Error(`Runtime history conflict for execution files ${jobId}.`);
|
|
155
|
+
}
|
|
156
|
+
changed = true;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
else {
|
|
160
|
+
changed = true;
|
|
161
|
+
}
|
|
162
|
+
executionFilesByJobId.set(jobId, files);
|
|
163
|
+
}
|
|
164
|
+
const jobs = mergeRecordsById(existingJobs, bundle.jobs.map(sanitizeRuntimeJobForExchange), (job) => job.id, (didChange) => {
|
|
165
|
+
changed ||= didChange;
|
|
166
|
+
}, runtimeJobsEqualForExchange).sort((left, right) => (left.startedAt ?? left.createdAt).localeCompare(right.startedAt ?? right.createdAt) ||
|
|
167
|
+
left.id.localeCompare(right.id));
|
|
168
|
+
await saveLocalArchive(workspace, {
|
|
169
|
+
activeId,
|
|
170
|
+
candidates,
|
|
171
|
+
candidateFiles,
|
|
172
|
+
evaluations,
|
|
173
|
+
runs,
|
|
174
|
+
events,
|
|
175
|
+
});
|
|
176
|
+
await writeArchivedLocalJobs(workspace, jobs, executionFilesByJobId);
|
|
177
|
+
return {
|
|
178
|
+
changed,
|
|
179
|
+
stats: runtimeBundleStats({
|
|
180
|
+
schema: "workbench.runtime.bundle.v1",
|
|
181
|
+
activeId,
|
|
182
|
+
candidates,
|
|
183
|
+
candidateFiles: Object.entries(candidateFiles).map(([candidateId, files]) => ({
|
|
184
|
+
candidateId,
|
|
185
|
+
files,
|
|
186
|
+
})),
|
|
187
|
+
evaluations,
|
|
188
|
+
runs,
|
|
189
|
+
jobs,
|
|
190
|
+
executionFiles: [...executionFilesByJobId.entries()].map(([jobId, files]) => ({
|
|
191
|
+
jobId,
|
|
192
|
+
files,
|
|
193
|
+
})),
|
|
194
|
+
events,
|
|
195
|
+
}),
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
export function runtimeBundleStats(bundle) {
|
|
199
|
+
return workbenchRuntimeBundleStats(bundle);
|
|
200
|
+
}
|
|
201
|
+
export function sanitizeRuntimeJobForExchange(job) {
|
|
202
|
+
return sanitizeWorkbenchRuntimeJobForExchange(job);
|
|
203
|
+
}
|
|
204
|
+
function sanitizeRuntimeJobForArchive(job) {
|
|
205
|
+
const { leaseUntil: _leaseUntil, wakeupLeaseUntil: _wakeupLeaseUntil, hostId: _hostId, workerId: _workerId, claimTokenHash: _claimTokenHash, ...portable } = job;
|
|
206
|
+
return { ...portable };
|
|
207
|
+
}
|
|
208
|
+
async function writeArchivedLocalJobs(workspace, jobs, executionFilesByJobId) {
|
|
69
209
|
if (jobs.length === 0) {
|
|
70
210
|
return;
|
|
71
211
|
}
|
|
@@ -77,14 +217,18 @@ export async function saveLocalJobs(workspace, jobs) {
|
|
|
77
217
|
fs.mkdir(executionFilesDir, { recursive: true }),
|
|
78
218
|
]);
|
|
79
219
|
for (const job of jobs) {
|
|
220
|
+
const sanitizedJob = sanitizeRuntimeJobForArchive(job);
|
|
80
221
|
const safeJobId = localRecordName(job.id);
|
|
81
|
-
const
|
|
82
|
-
const
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
222
|
+
const explicitOutputFiles = executionFilesByJobId.get(job.id);
|
|
223
|
+
const traceSourceFiles = filterArchivedExecutionFiles(completedJobOutputFiles(sanitizedJob));
|
|
224
|
+
const outputFiles = explicitOutputFiles
|
|
225
|
+
? copySurfaceFiles(explicitOutputFiles)
|
|
226
|
+
: selectExecutionOutputFilesForInspection({
|
|
227
|
+
purpose: readExecutionPurpose(sanitizedJob),
|
|
228
|
+
files: traceSourceFiles,
|
|
229
|
+
output: jsonRecord(sanitizedJob.output),
|
|
230
|
+
});
|
|
231
|
+
await writeJson(path.join(jobsDir, `${safeJobId}.json`), archivedLocalJob(sanitizedJob, outputFiles, traceSourceFiles.length > 0 ? traceSourceFiles : outputFiles));
|
|
88
232
|
const filesRoot = path.join(executionFilesDir, safeJobId);
|
|
89
233
|
await fs.rm(filesRoot, { force: true, recursive: true });
|
|
90
234
|
await writeSurfaceFiles(filesRoot, outputFiles);
|
|
@@ -187,6 +331,156 @@ export function readLocalCandidateFiles(snapshot, candidateId) {
|
|
|
187
331
|
function validateLocalArchiveSnapshot(snapshot) {
|
|
188
332
|
validateLocalArchiveIndex(snapshot);
|
|
189
333
|
}
|
|
334
|
+
function validateRuntimeBundleSchema(bundle) {
|
|
335
|
+
if (!bundle || bundle.schema !== "workbench.runtime.bundle.v1") {
|
|
336
|
+
throw new Error("Unsupported Workbench runtime bundle.");
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
function mergeRecordsById(existing, incoming, idFor, markChanged, equal = runtimeRecordsEqual, merge = (_left, right) => right) {
|
|
340
|
+
const records = new Map();
|
|
341
|
+
for (const record of existing) {
|
|
342
|
+
records.set(localRecordName(idFor(record)), record);
|
|
343
|
+
}
|
|
344
|
+
for (const record of incoming) {
|
|
345
|
+
const id = localRecordName(idFor(record));
|
|
346
|
+
const previous = records.get(id);
|
|
347
|
+
if (!previous) {
|
|
348
|
+
markChanged(true);
|
|
349
|
+
records.set(id, record);
|
|
350
|
+
continue;
|
|
351
|
+
}
|
|
352
|
+
if (!equal(previous, record)) {
|
|
353
|
+
throw new Error(`Runtime history conflict for id ${id}.`);
|
|
354
|
+
}
|
|
355
|
+
const merged = merge(previous, record);
|
|
356
|
+
if (!runtimeRecordsEqual(previous, merged)) {
|
|
357
|
+
markChanged(true);
|
|
358
|
+
}
|
|
359
|
+
records.set(id, merged);
|
|
360
|
+
}
|
|
361
|
+
return [...records.values()];
|
|
362
|
+
}
|
|
363
|
+
function runtimeRecordsEqual(left, right) {
|
|
364
|
+
return JSON.stringify(canonicalRuntimeJson(left)) ===
|
|
365
|
+
JSON.stringify(canonicalRuntimeJson(right));
|
|
366
|
+
}
|
|
367
|
+
function runtimeJobsEqualForExchange(left, right) {
|
|
368
|
+
if (runtimeRecordsEqual(runtimeComparableJob(left), runtimeComparableJob(right))) {
|
|
369
|
+
return true;
|
|
370
|
+
}
|
|
371
|
+
return runtimeRecordsEqual(runtimeJobIdentityForExchange(left), runtimeJobIdentityForExchange(right));
|
|
372
|
+
}
|
|
373
|
+
function runtimeComparableJob(job) {
|
|
374
|
+
const comparable = sanitizeRuntimeJobForExchange(job);
|
|
375
|
+
const output = comparable.output;
|
|
376
|
+
if (!output || typeof output !== "object" || Array.isArray(output)) {
|
|
377
|
+
return comparable;
|
|
378
|
+
}
|
|
379
|
+
const { files: _files, fileSet: _fileSet, ...portableOutput } = output;
|
|
380
|
+
return {
|
|
381
|
+
...comparable,
|
|
382
|
+
output: portableOutput,
|
|
383
|
+
};
|
|
384
|
+
}
|
|
385
|
+
function runtimeCandidatesCompatibleForExchange(left, right) {
|
|
386
|
+
return runtimeRecordsEqual(runtimeCandidateIdentityForExchange(left), runtimeCandidateIdentityForExchange(right));
|
|
387
|
+
}
|
|
388
|
+
function runtimeCandidateIdentityForExchange(candidate) {
|
|
389
|
+
const { eval: _eval, prompt: _prompt, meta: _meta, status: _status, usage: _usage, visibility: _visibility, ownerUserId: _ownerUserId, ownerUsername: _ownerUsername, metrics: _metrics, candidateRunId: _candidateRunId, candidateRunName: _candidateRunName, ...identity } = candidate;
|
|
390
|
+
return identity;
|
|
391
|
+
}
|
|
392
|
+
function mergeRuntimeCandidateForExchange(left, right) {
|
|
393
|
+
return {
|
|
394
|
+
...left,
|
|
395
|
+
...right,
|
|
396
|
+
...(right.eval ? { eval: right.eval } : left.eval ? { eval: left.eval } : {}),
|
|
397
|
+
...(right.prompt ? { prompt: right.prompt } : left.prompt ? { prompt: left.prompt } : {}),
|
|
398
|
+
...(right.meta !== undefined ? { meta: right.meta } : left.meta !== undefined ? { meta: left.meta } : {}),
|
|
399
|
+
...(right.usage ? { usage: right.usage } : left.usage ? { usage: left.usage } : {}),
|
|
400
|
+
visibility: right.visibility ?? left.visibility,
|
|
401
|
+
};
|
|
402
|
+
}
|
|
403
|
+
function runtimeEvaluationsCompatibleForExchange(left, right) {
|
|
404
|
+
if (runtimeRecordsEqual(left, right)) {
|
|
405
|
+
return true;
|
|
406
|
+
}
|
|
407
|
+
return runtimeRecordsEqual(runtimeEvaluationIdentityForExchange(left), runtimeEvaluationIdentityForExchange(right));
|
|
408
|
+
}
|
|
409
|
+
function runtimeEvaluationIdentityForExchange(evaluation) {
|
|
410
|
+
return {
|
|
411
|
+
id: evaluation.id,
|
|
412
|
+
runId: evaluation.runId,
|
|
413
|
+
candidateId: evaluation.candidateId,
|
|
414
|
+
candidateVersion: evaluation.candidateVersion,
|
|
415
|
+
benchmarkFingerprint: evaluation.benchmarkFingerprint,
|
|
416
|
+
candidateFingerprint: evaluation.candidateFingerprint,
|
|
417
|
+
};
|
|
418
|
+
}
|
|
419
|
+
function runtimeRunsCompatibleForExchange(left, right) {
|
|
420
|
+
if (runtimeRecordsEqual(left, right)) {
|
|
421
|
+
return true;
|
|
422
|
+
}
|
|
423
|
+
return runtimeRecordsEqual(runtimeRunIdentityForExchange(left), runtimeRunIdentityForExchange(right));
|
|
424
|
+
}
|
|
425
|
+
function runtimeRunIdentityForExchange(run) {
|
|
426
|
+
return {
|
|
427
|
+
id: run.id,
|
|
428
|
+
workflow: run.workflow,
|
|
429
|
+
benchmarkFingerprint: run.benchmarkFingerprint,
|
|
430
|
+
candidateId: run.candidateId ?? null,
|
|
431
|
+
outputCandidateId: run.outputCandidateId ?? null,
|
|
432
|
+
engineRun: run.engineRun,
|
|
433
|
+
improver: run.improver,
|
|
434
|
+
strategy: run.strategy,
|
|
435
|
+
budget: run.budget,
|
|
436
|
+
samples: run.samples,
|
|
437
|
+
attemptsRequested: run.attemptsRequested,
|
|
438
|
+
};
|
|
439
|
+
}
|
|
440
|
+
function runtimeJobIdentityForExchange(job) {
|
|
441
|
+
return {
|
|
442
|
+
id: job.id,
|
|
443
|
+
runId: job.runId,
|
|
444
|
+
candidateId: job.candidateId,
|
|
445
|
+
kind: job.kind,
|
|
446
|
+
attempt: job.attempt,
|
|
447
|
+
};
|
|
448
|
+
}
|
|
449
|
+
function compatibleRuntimeActiveCandidateId(candidates, candidateId, benchmarkFingerprint) {
|
|
450
|
+
if (!candidateId) {
|
|
451
|
+
return null;
|
|
452
|
+
}
|
|
453
|
+
const candidate = candidates.find((entry) => entry.id === candidateId) ?? null;
|
|
454
|
+
return candidate?.benchmarkFingerprint === benchmarkFingerprint ? candidate.id : null;
|
|
455
|
+
}
|
|
456
|
+
function latestCompatibleRuntimeCandidateId(candidates, benchmarkFingerprint) {
|
|
457
|
+
return candidates
|
|
458
|
+
.filter((candidate) => candidate.benchmarkFingerprint === benchmarkFingerprint &&
|
|
459
|
+
candidate.status === "evaluated")
|
|
460
|
+
.at(-1)?.id ?? null;
|
|
461
|
+
}
|
|
462
|
+
function canonicalRuntimeJson(value) {
|
|
463
|
+
if (Array.isArray(value)) {
|
|
464
|
+
return value.map(canonicalRuntimeJson);
|
|
465
|
+
}
|
|
466
|
+
if (value && typeof value === "object") {
|
|
467
|
+
return Object.fromEntries(Object.keys(value)
|
|
468
|
+
.sort()
|
|
469
|
+
.map((key) => [key, canonicalRuntimeJson(value[key])]));
|
|
470
|
+
}
|
|
471
|
+
return value;
|
|
472
|
+
}
|
|
473
|
+
function runtimeEventKey(event) {
|
|
474
|
+
return [
|
|
475
|
+
event.runId ?? "_",
|
|
476
|
+
event.jobId ?? "_",
|
|
477
|
+
event.at,
|
|
478
|
+
event.id,
|
|
479
|
+
].join("#");
|
|
480
|
+
}
|
|
481
|
+
function copySurfaceFiles(files) {
|
|
482
|
+
return files.map((file) => ({ ...file }));
|
|
483
|
+
}
|
|
190
484
|
function validateLocalArchiveIndex(snapshot) {
|
|
191
485
|
const candidateIds = new Set(snapshot.candidates.map((candidate) => candidate.id));
|
|
192
486
|
if (snapshot.activeId && !candidateIds.has(snapshot.activeId)) {
|
|
@@ -227,6 +521,9 @@ function validateCandidateRecord(candidate) {
|
|
|
227
521
|
requireArchivePositiveInteger(candidate.ordinal, `candidate ${candidate.id}.ordinal`);
|
|
228
522
|
requireArchiveString(candidate.benchmarkFingerprint, `candidate ${candidate.id}.benchmarkFingerprint`);
|
|
229
523
|
requireArchiveString(candidate.candidateFingerprint, `candidate ${candidate.id}.candidateFingerprint`);
|
|
524
|
+
if (candidate.visibility !== "private" && candidate.visibility !== "public") {
|
|
525
|
+
throw new Error(`candidate ${candidate.id}.visibility must be private or public.`);
|
|
526
|
+
}
|
|
230
527
|
requireArchiveString(candidate.createdAt, `candidate ${candidate.id}.createdAt`);
|
|
231
528
|
}
|
|
232
529
|
function validateEvaluationRecord(evaluation) {
|
|
@@ -260,16 +557,41 @@ function compareLocalCandidateRecords(left, right) {
|
|
|
260
557
|
}
|
|
261
558
|
function archivedLocalJob(job, outputFiles, traceSourceFiles) {
|
|
262
559
|
const output = jsonRecord(job.output);
|
|
263
|
-
const
|
|
560
|
+
const existingTrace = readExistingTrace(job);
|
|
561
|
+
const existingTraceSessions = readExistingTraceSessions(job);
|
|
562
|
+
const traceSessions = existingTraceSessions.length > 0
|
|
563
|
+
? existingTraceSessions
|
|
564
|
+
: buildLocalJobTraceSessions(job, traceSourceFiles);
|
|
264
565
|
return {
|
|
265
566
|
...job,
|
|
266
567
|
...(Object.keys(output).length > 0
|
|
267
568
|
? { output: { ...output, files: traceSourceFiles } }
|
|
268
569
|
: {}),
|
|
269
|
-
trace: buildLocalJobTrace(job),
|
|
570
|
+
trace: existingTrace ?? buildLocalJobTrace(job),
|
|
270
571
|
traceSessions,
|
|
271
572
|
};
|
|
272
573
|
}
|
|
574
|
+
function readExistingTrace(job) {
|
|
575
|
+
const trace = job.trace;
|
|
576
|
+
if (!trace || typeof trace !== "object" || Array.isArray(trace)) {
|
|
577
|
+
return null;
|
|
578
|
+
}
|
|
579
|
+
return {
|
|
580
|
+
trace_id: typeof trace.trace_id === "string" && trace.trace_id.length > 0
|
|
581
|
+
? trace.trace_id
|
|
582
|
+
: job.id,
|
|
583
|
+
spans: Array.isArray(trace.spans) ? trace.spans : [],
|
|
584
|
+
events: Array.isArray(trace.events) ? trace.events : [],
|
|
585
|
+
summaries: Array.isArray(trace.summaries) ? trace.summaries : [],
|
|
586
|
+
};
|
|
587
|
+
}
|
|
588
|
+
function readExistingTraceSessions(job) {
|
|
589
|
+
const sessions = job.traceSessions;
|
|
590
|
+
if (!Array.isArray(sessions)) {
|
|
591
|
+
return [];
|
|
592
|
+
}
|
|
593
|
+
return sessions.map((session) => ({ ...session }));
|
|
594
|
+
}
|
|
273
595
|
function filterArchivedExecutionFiles(files) {
|
|
274
596
|
return files.filter((file) => file.path.startsWith(".workbench/traces/") ||
|
|
275
597
|
!isWorkbenchReservedArchivePath(file.path));
|
package/dist/project-source.d.ts
CHANGED
|
@@ -58,5 +58,6 @@ interface LocalProjectSourceOptions {
|
|
|
58
58
|
}
|
|
59
59
|
export declare function readLocalProjectSource(source: string, options?: LocalProjectSourceOptions): Promise<LocalProjectSource>;
|
|
60
60
|
export declare function readLocalAuthoredProjectSource(source: string, options?: LocalProjectSourceOptions): Promise<LocalAuthoredProjectSource>;
|
|
61
|
+
export declare function hostedEngineResolveFiles(source: LocalProjectSource): HostedFile[];
|
|
61
62
|
export {};
|
|
62
63
|
//# sourceMappingURL=project-source.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"project-source.d.ts","sourceRoot":"","sources":["../src/project-source.ts"],"names":[],"mappings":"AAKA,OAAO,EAOL,kCAAkC,EAGlC,KAAK,IAAI,EACT,KAAK,mBAAmB,EACzB,MAAM,8BAA8B,CAAC;AACtC,OAAO,
|
|
1
|
+
{"version":3,"file":"project-source.d.ts","sourceRoot":"","sources":["../src/project-source.ts"],"names":[],"mappings":"AAKA,OAAO,EAOL,kCAAkC,EAGlC,KAAK,IAAI,EACT,KAAK,mBAAmB,EACzB,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAUL,KAAK,mBAAmB,EACxB,KAAK,4BAA4B,EAClC,MAAM,kCAAkC,CAAC;AAE1C,OAAO,EAGL,KAAK,qBAAqB,EAC3B,MAAM,yBAAyB,CAAC;AACjC,OAAO,EAML,KAAK,wBAAwB,EAC9B,MAAM,sBAAsB,CAAC;AAI9B,eAAO,MAAM,wBAAwB,mBAAsB,CAAC;AAC5D,eAAO,MAAM,wBAAwB,eAAe,CAAC;AACrD,eAAO,MAAM,wBAAwB,mBAAsB,CAAC;AAE5D,MAAM,MAAM,UAAU,GAAG,qBAAqB,CAAC;AAE/C,MAAM,WAAW,kBAAkB;IACjC,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,IAAI,EAAE,UAAU,CAAC,OAAO,kCAAkC,CAAC,CAAC;IAC5D,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,uBAAuB,EAAE,MAAM,EAAE,CAAC;IAClC,mBAAmB,EAAE,MAAM,EAAE,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,eAAe,EAAE,UAAU,EAAE,CAAC;IAC9B,cAAc,EAAE,UAAU,EAAE,CAAC;IAC7B,kBAAkB,EAAE,UAAU,EAAE,CAAC;IACjC,QAAQ,EAAE,wBAAwB,EAAE,CAAC;IACrC,YAAY,EAAE,UAAU,EAAE,CAAC;IAC3B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,WAAW,EAAE,mBAAmB,EAAE,CAAC;IACnC,aAAa,EAAE,4BAA4B,CAAC;IAC5C,4BAA4B,EAAE,MAAM,CAAC;IACrC,wBAAwB,CAAC,EAAE,4BAA4B,CAAC,aAAa,CAAC,CAAC;IACvE,WAAW,EAAE,mBAAmB,EAAE,CAAC;CACpC;AAED,MAAM,WAAW,0BAA0B;IACzC,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,eAAe,EAAE,MAAM,CAAC;IACxB,WAAW,EAAE,mBAAmB,EAAE,CAAC;CACpC;AAED,MAAM,WAAW,4BAA4B;IAC3C,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,CAAC,EAAE,IAAI,CAAC;CACb;AAED,UAAU,yBAAyB;IACjC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAaD,wBAAsB,sBAAsB,CAC1C,MAAM,EAAE,MAAM,EACd,OAAO,GAAE,yBAA8B,GACtC,OAAO,CAAC,kBAAkB,CAAC,CA8G7B;AAED,wBAAsB,8BAA8B,CAClD,MAAM,EAAE,MAAM,EACd,OAAO,GAAE,yBAA8B,GACtC,OAAO,CAAC,0BAA0B,CAAC,CA6BrC;AAED,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,kBAAkB,GAAG,UAAU,EAAE,CAqBjF"}
|
package/dist/project-source.js
CHANGED
|
@@ -3,7 +3,7 @@ import { createHash, randomUUID } from "node:crypto";
|
|
|
3
3
|
import { spawn } from "node:child_process";
|
|
4
4
|
import path from "node:path";
|
|
5
5
|
import { BENCHMARK_SPEC_FILE, CANDIDATE_SPEC_FILE, buildWorkbenchProjectSourceFiles, engineResolveInvocationForSpec, normalizeSurfaceFiles, parseWorkbenchSourceFiles, resolveWorkbenchResolvedSourceYaml, serializeWorkbenchResolvedSourceYaml, validateWorkbenchResolvedSourceYaml, } from "@workbench-ai/workbench-core";
|
|
6
|
-
import { assertWorkbenchAdapterOperationSupport, assertWorkbenchAdapterOperationResultOk, collectWorkbenchAdapterInvocations, readWorkbenchAdapterOperationResult, workbenchAdapterOperationCommand, workbenchAdapterOperationResultPath, } from "@workbench-ai/workbench-protocol";
|
|
6
|
+
import { assertWorkbenchAdapterOperationSupport, assertWorkbenchAdapterOperationResultOk, collectWorkbenchAdapterInvocations, readWorkbenchAdapterOperationResult, WORKBENCH_ADAPTER_RESULT_FILE, WORKBENCH_ADAPTER_RESULT_PROTOCOL, workbenchAdapterOperationCommand, workbenchAdapterOperationResultPath, } from "@workbench-ai/workbench-protocol";
|
|
7
7
|
import { readSnapshotFiles, WorkspaceSnapshotError, } from "./workspace-snapshot.js";
|
|
8
8
|
import { defaultAdapterManifests, composeRuntimeDockerfileWithAdapters, resolveDefaultWorkbenchAdapter, resolveProjectAdapterSource, resolveWorkbenchAdaptersForProject, } from "./adapter-project.js";
|
|
9
9
|
import { createAdapterCommandEnv } from "./adapter-command-env.js";
|
|
@@ -136,6 +136,28 @@ export async function readLocalAuthoredProjectSource(source, options = {}) {
|
|
|
136
136
|
],
|
|
137
137
|
};
|
|
138
138
|
}
|
|
139
|
+
export function hostedEngineResolveFiles(source) {
|
|
140
|
+
return [
|
|
141
|
+
...source.engineResolveFiles,
|
|
142
|
+
{
|
|
143
|
+
path: WORKBENCH_ADAPTER_RESULT_FILE,
|
|
144
|
+
content: `${JSON.stringify({
|
|
145
|
+
protocol: WORKBENCH_ADAPTER_RESULT_PROTOCOL,
|
|
146
|
+
operation: "engine.resolve",
|
|
147
|
+
ok: true,
|
|
148
|
+
value: {
|
|
149
|
+
cases: source.engineCases,
|
|
150
|
+
...(source.engineResolveEnvironment
|
|
151
|
+
? { environment: source.engineResolveEnvironment }
|
|
152
|
+
: {}),
|
|
153
|
+
},
|
|
154
|
+
feedback: {
|
|
155
|
+
path: source.engineResolveFingerprintPath,
|
|
156
|
+
},
|
|
157
|
+
}, null, 2)}\n`,
|
|
158
|
+
},
|
|
159
|
+
];
|
|
160
|
+
}
|
|
139
161
|
async function resolveLocalProjectSourcePaths(source, options) {
|
|
140
162
|
const resolved = path.resolve(source);
|
|
141
163
|
const stat = await fs.stat(resolved).catch(() => null);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@workbench-ai/workbench",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.52",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -21,9 +21,9 @@
|
|
|
21
21
|
],
|
|
22
22
|
"dependencies": {
|
|
23
23
|
"yaml": "^2.8.2",
|
|
24
|
-
"@workbench-ai/workbench-built-in-adapters": "0.0.
|
|
25
|
-
"@workbench-ai/workbench-
|
|
26
|
-
"@workbench-ai/workbench-
|
|
24
|
+
"@workbench-ai/workbench-built-in-adapters": "0.0.52",
|
|
25
|
+
"@workbench-ai/workbench-core": "0.0.52",
|
|
26
|
+
"@workbench-ai/workbench-protocol": "0.0.52"
|
|
27
27
|
},
|
|
28
28
|
"devDependencies": {
|
|
29
29
|
"@tailwindcss/postcss": "^4.2.2",
|