@workbench-ai/workbench 0.0.65 → 0.0.67

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import { promises as fs } from "node:fs";
2
2
  import path from "node:path";
3
- import { buildWorkbenchTraceSessionsFromFiles, candidateRecordWithoutDerivedFields, compactWorkbenchRuntimeJobForExchange, mergeWorkbenchRuntimeCandidateForExchange, sanitizeWorkbenchRuntimeCandidateForExchange, sanitizeWorkbenchRuntimeJobForExchange, selectExecutionOutputFilesForInspection, workbenchRuntimeExplicitActiveId, workbenchRuntimeBundleStats, workbenchRuntimeCandidateIdentityForExchange, workbenchSurfaceFilesEqualForExchange, } from "@workbench-ai/workbench-core";
3
+ import { buildWorkbenchTraceSessionsFromFiles, candidateRecordWithoutDerivedFields, compactWorkbenchRuntimeJobForExchange, mergeWorkbenchRuntimeCandidateForExchange, sanitizeWorkbenchRuntimeCandidateForExchange, sanitizeWorkbenchRuntimeJobForExchange, selectExecutionOutputFilesForInspection, isSurfaceSnapshotFile, jsonRecord, normalizeRelativePath, readSurfaceFiles, workbenchRuntimeBundleStats, workbenchRuntimeCandidateIdentityForExchange, workbenchRuntimeProjectedActiveId, workbenchSurfaceFilesEqualForExchange, writeSurfaceFiles, } from "@workbench-ai/workbench-core";
4
4
  const RUNTIME_DIR = ".workbench/runtime";
5
5
  const CANDIDATE_RECORDS_DIR = "candidates";
6
6
  export function localRuntimeDir(workspace) {
@@ -73,12 +73,17 @@ export async function saveLocalJobs(workspace, jobs) {
73
73
  }
74
74
  export async function exportLocalRuntimeBundle(workspace, options = {}) {
75
75
  const snapshot = await loadLocalArchive(workspace);
76
- const jobs = (await readLocalJobs(workspace)).map(compactWorkbenchRuntimeJobForExchange);
76
+ const archivedJobs = await readLocalJobs(workspace);
77
+ const jobs = archivedJobs.map(compactWorkbenchRuntimeJobForExchange);
78
+ const executionFiles = (await Promise.all(archivedJobs.map(async (job) => ({
79
+ jobId: job.id,
80
+ files: await readLocalExecutionFiles(workspace, job.id),
81
+ })))).filter((group) => group.files.length > 0);
77
82
  const activeId = options.currentBenchmarkFingerprint
78
- ? workbenchRuntimeExplicitActiveId({
83
+ ? workbenchRuntimeProjectedActiveId({
79
84
  candidates: snapshot.candidates,
85
+ evaluations: snapshot.evaluations,
80
86
  runs: snapshot.runs,
81
- preferredActiveId: snapshot.activeId,
82
87
  benchmarkFingerprint: options.currentBenchmarkFingerprint,
83
88
  })
84
89
  : snapshot.activeId;
@@ -93,7 +98,7 @@ export async function exportLocalRuntimeBundle(workspace, options = {}) {
93
98
  evaluations: snapshot.evaluations.map((evaluation) => ({ ...evaluation })),
94
99
  runs: snapshot.runs.map((run) => ({ ...run })),
95
100
  jobs,
96
- executionFiles: [],
101
+ executionFiles,
97
102
  events: snapshot.events.map((event) => ({ ...event })),
98
103
  };
99
104
  }
@@ -163,10 +168,10 @@ export async function importLocalRuntimeBundle(workspace, bundle, currentBenchma
163
168
  changed ||= didChange;
164
169
  }, runtimeJobsEqualForExchange).sort((left, right) => (left.startedAt ?? left.createdAt).localeCompare(right.startedAt ?? right.createdAt) ||
165
170
  left.id.localeCompare(right.id));
166
- const activeId = workbenchRuntimeExplicitActiveId({
171
+ const activeId = workbenchRuntimeProjectedActiveId({
167
172
  candidates,
173
+ evaluations,
168
174
  runs,
169
- preferredActiveId: bundle.activeId ?? null,
170
175
  benchmarkFingerprint: currentBenchmarkFingerprint,
171
176
  });
172
177
  if (activeId !== snapshot.activeId) {
@@ -682,14 +687,6 @@ function completedJobOutputFiles(job) {
682
687
  }
683
688
  return output.files.filter(isSurfaceSnapshotFile).map((file) => ({ ...file }));
684
689
  }
685
- function isSurfaceSnapshotFile(value) {
686
- const record = jsonRecord(value);
687
- return (typeof record.path === "string" &&
688
- (record.kind === "text" || record.kind === "binary") &&
689
- (record.encoding === "utf8" || record.encoding === "base64") &&
690
- typeof record.content === "string" &&
691
- typeof record.executable === "boolean");
692
- }
693
690
  function readExecutionPurpose(job) {
694
691
  const input = jsonRecord(job.input);
695
692
  return stringValue(jsonRecord(input.execution).purpose);
@@ -751,15 +748,9 @@ function traceEvent(args) {
751
748
  }
752
749
  function traceUsageSummary(value) {
753
750
  const record = jsonRecord(value);
754
- const usage = Object.keys(jsonRecord(record.total)).length > 0
755
- ? jsonRecord(record.total)
756
- : Object.keys(jsonRecord(record.improver)).length > 0
757
- ? jsonRecord(record.improver)
758
- : Object.keys(jsonRecord(record.runner)).length > 0
759
- ? jsonRecord(record.runner)
760
- : Object.keys(jsonRecord(record.engine)).length > 0
761
- ? jsonRecord(record.engine)
762
- : record;
751
+ const usage = ["total", "improver", "runner", "engine"]
752
+ .map((key) => jsonRecord(record[key]))
753
+ .find((entry) => Object.keys(entry).length > 0) ?? record;
763
754
  if (Object.keys(usage).length === 0) {
764
755
  return null;
765
756
  }
@@ -779,11 +770,6 @@ function traceUsageSummary(value) {
779
770
  pricing_source: stringValue(usage.pricingSource) ?? stringValue(usage.pricing_source),
780
771
  };
781
772
  }
782
- function jsonRecord(value) {
783
- return value && typeof value === "object" && !Array.isArray(value)
784
- ? value
785
- : {};
786
- }
787
773
  function stringValue(value) {
788
774
  return typeof value === "string" && value.length > 0 ? value : null;
789
775
  }
@@ -850,70 +836,3 @@ async function writeJson(filePath, value) {
850
836
  await fs.mkdir(path.dirname(filePath), { recursive: true });
851
837
  await fs.writeFile(filePath, `${JSON.stringify(value, null, 2)}\n`);
852
838
  }
853
- async function writeSurfaceFiles(root, files) {
854
- await fs.mkdir(root, { recursive: true });
855
- for (const file of files) {
856
- const target = path.join(root, normalizeRelativePath(file.path));
857
- await fs.mkdir(path.dirname(target), { recursive: true });
858
- const body = file.encoding === "base64" ? Buffer.from(file.content, "base64") : Buffer.from(file.content, "utf8");
859
- await fs.writeFile(target, body);
860
- if (file.executable) {
861
- await fs.chmod(target, 0o755).catch(() => undefined);
862
- }
863
- }
864
- }
865
- async function readSurfaceFiles(root) {
866
- const decoder = new TextDecoder("utf-8", { fatal: true });
867
- const files = [];
868
- async function walk(directory) {
869
- const entries = await fs.readdir(directory, { withFileTypes: true }).catch(() => []);
870
- for (const entry of entries) {
871
- const absolutePath = path.join(directory, entry.name);
872
- if (entry.isDirectory()) {
873
- await walk(absolutePath);
874
- continue;
875
- }
876
- if (!entry.isFile()) {
877
- continue;
878
- }
879
- const body = await fs.readFile(absolutePath);
880
- const relativePath = normalizeRelativePath(path.relative(root, absolutePath).replace(/\\/gu, "/"));
881
- const stats = await fs.stat(absolutePath);
882
- const content = encodeContent(body, decoder);
883
- files.push({
884
- path: relativePath,
885
- kind: content.encoding === "base64" ? "binary" : "text",
886
- encoding: content.encoding,
887
- content: content.content,
888
- executable: (stats.mode & 0o111) !== 0,
889
- });
890
- }
891
- }
892
- await walk(root);
893
- return files.sort((left, right) => left.path.localeCompare(right.path));
894
- }
895
- function encodeContent(body, decoder) {
896
- try {
897
- return {
898
- encoding: "utf8",
899
- content: decoder.decode(body),
900
- };
901
- }
902
- catch {
903
- return {
904
- encoding: "base64",
905
- content: body.toString("base64"),
906
- };
907
- }
908
- }
909
- function normalizeRelativePath(filePath) {
910
- const normalized = filePath.replace(/\\/gu, "/").replace(/^\/+/u, "");
911
- if (!normalized || normalized.includes("\0")) {
912
- throw new Error("File paths must be non-empty relative paths.");
913
- }
914
- const parts = normalized.split("/");
915
- if (parts.some((part) => part === ".." || part === "." || part === "")) {
916
- throw new Error(`Unsafe relative file path: ${filePath}`);
917
- }
918
- return normalized;
919
- }
@@ -1 +1 @@
1
- {"version":3,"file":"local-inspection.d.ts","sourceRoot":"","sources":["../src/local-inspection.ts"],"names":[],"mappings":"AAEA,OAAO,EAaL,KAAK,mBAAmB,EAGzB,MAAM,8BAA8B,CAAC;AAetC,OAAO,EAGL,KAAK,kBAAkB,EAExB,MAAM,qBAAqB,CAAC;AAgB7B,MAAM,WAAW,+BAA+B;IAC9C,SAAS,EAAE,MAAM,CAAC;IAClB,iBAAiB,CAAC,EAAE,MAAM,OAAO,CAAC,kBAAkB,CAAC,CAAC;CACvD;AAED,wBAAgB,8BAA8B,CAC5C,OAAO,EAAE,+BAA+B,GACvC,mBAAmB,CAqCrB;AAED,wBAAgB,8BAA8B,CAC5C,SAAS,EAAE,MAAM,GAChB,MAAM,OAAO,CAAC,kBAAkB,CAAC,CAiBnC"}
1
+ {"version":3,"file":"local-inspection.d.ts","sourceRoot":"","sources":["../src/local-inspection.ts"],"names":[],"mappings":"AAEA,OAAO,EAgBL,KAAK,mBAAmB,EAIzB,MAAM,8BAA8B,CAAC;AAetC,OAAO,EAGL,KAAK,kBAAkB,EAExB,MAAM,qBAAqB,CAAC;AAgB7B,MAAM,WAAW,+BAA+B;IAC9C,SAAS,EAAE,MAAM,CAAC;IAClB,iBAAiB,CAAC,EAAE,MAAM,OAAO,CAAC,kBAAkB,CAAC,CAAC;CACvD;AAED,wBAAgB,8BAA8B,CAC5C,OAAO,EAAE,+BAA+B,GACvC,mBAAmB,CA2DrB;AAkBD,wBAAgB,8BAA8B,CAC5C,SAAS,EAAE,MAAM,GAChB,MAAM,OAAO,CAAC,kBAAkB,CAAC,CAiBnC"}
@@ -1,5 +1,5 @@
1
1
  import path from "node:path";
2
- import { WorkbenchInspectionError, candidateRecordWithoutDerivedFields, candidateSummaryFromRecord, createWorkbenchInspection, loadAuthoredWorkbenchSourceDocument, traceSessionLabel, } from "@workbench-ai/workbench-core";
2
+ import { WorkbenchInspectionError, candidateRecordWithoutDerivedFields, candidateSummaryFromRecord, createCandidateFilePreview, createWorkbenchInspection, loadAuthoredWorkbenchSourceDocument, selectedFilePath, summarizeCandidateFiles, traceSessionLabel, } from "@workbench-ai/workbench-core";
3
3
  import { localBenchmarkFingerprint } from "./benchmark-fingerprint.js";
4
4
  import { loadLocalArchiveIndex, readLocalCandidateFilesForId, readLocalCandidateRecord, readLocalEvaluationRecord, readLocalExecutionFiles, readLocalJobInRun, readLocalRunJobs, readLocalRunRecord, } from "./local-archive.js";
5
5
  import { readLocalAuthoredProjectSource, readLocalProjectSource, WORKBENCH_BENCHMARK_FILE, } from "./project-source.js";
@@ -14,14 +14,22 @@ export function createLocalWorkbenchInspection(options) {
14
14
  projectId: "local",
15
15
  snapshot: () => localBenchmarkSnapshot(context),
16
16
  spec: (input) => localSpecDocument(context, input.fingerprint),
17
- sourceFiles: (input) => localBenchmarkMountedFiles(context, input.fingerprint),
17
+ sourceFiles: async (input) => {
18
+ const files = await localBenchmarkMountedFiles(context, input.fingerprint);
19
+ return summarizeCandidateFiles(files, files.map((file) => file.path));
20
+ },
21
+ sourceFileSurface: async (input) => {
22
+ const files = await localBenchmarkMountedFiles(context, input.fingerprint);
23
+ return localFileSurface(files, files.map((file) => file.path), input.path, input.view);
24
+ },
18
25
  candidate: (input) => readCandidateForInspection(context.workspace, input.id),
19
26
  candidateFiles: async (input) => {
20
27
  const candidate = await readCandidateForInspection(context.workspace, input.id);
21
- return {
22
- files: await readCandidateFilesForInspection(context.workspace, input.id),
23
- changedPaths: candidate.fileChanges,
24
- };
28
+ return summarizeCandidateFiles(await readCandidateFilesForInspection(context.workspace, input.id), candidate.fileChanges);
29
+ },
30
+ candidateFileSurface: async (input) => {
31
+ const candidate = await readCandidateForInspection(context.workspace, input.id);
32
+ return localFileSurface(await readCandidateFilesForInspection(context.workspace, input.id), candidate.fileChanges, input.path, input.view);
25
33
  },
26
34
  evaluation: (input) => readEvaluationForInspection(context.workspace, input.id),
27
35
  run: async (input) => {
@@ -34,12 +42,29 @@ export function createLocalWorkbenchInspection(options) {
34
42
  };
35
43
  },
36
44
  jobInRun: (input) => readExecutionJobForRun(context.workspace, input.runId, input.jobId),
37
- executionFiles: (input) => readExecutionFilesForRun(context.workspace, input.runId, input.jobId),
45
+ executionFiles: async (input) => {
46
+ const files = await readExecutionFilesForRun(context.workspace, input.runId, input.jobId);
47
+ return summarizeCandidateFiles(files, files.map((file) => file.path));
48
+ },
49
+ executionFileSurface: async (input) => {
50
+ const files = await readExecutionFilesForRun(context.workspace, input.runId, input.jobId);
51
+ return localFileSurface(files, files.map((file) => file.path), input.path, input.view);
52
+ },
38
53
  traceForJob: readLocalAggregateTrace,
39
54
  traceSessionsForJob: readLocalTraceSessions,
40
55
  };
41
56
  return createWorkbenchInspection(backend);
42
57
  }
58
+ function localFileSurface(files, changedPaths, path, view = "rendered") {
59
+ const summaries = summarizeCandidateFiles(files, changedPaths);
60
+ const previewPath = selectedFilePath(path, summaries);
61
+ return {
62
+ files: summaries,
63
+ preview: previewPath
64
+ ? createCandidateFilePreview({ files, path: previewPath, view })
65
+ : null,
66
+ };
67
+ }
43
68
  export function createLocalProjectSourceReader(workspace) {
44
69
  const resolvedWorkspace = path.resolve(workspace);
45
70
  let cached = null;
@@ -90,9 +115,20 @@ async function localSpecDocument(context, benchmarkFingerprint) {
90
115
  currentFingerprint &&
91
116
  requestedFingerprint !== currentFingerprint) {
92
117
  const snapshot = await loadLocalArchiveIndex(workspace);
93
- const document = localHistoricalBenchmarkDocument(snapshot, requestedFingerprint);
94
- if (document) {
95
- return document;
118
+ const source = localHistoricalBenchmarkSource(snapshot, requestedFingerprint);
119
+ if (source) {
120
+ return loadAuthoredWorkbenchSourceDocument({
121
+ sourceYaml: source.sourceYaml,
122
+ path: WORKBENCH_BENCHMARK_FILE,
123
+ sourceFiles: [{
124
+ path: WORKBENCH_BENCHMARK_FILE,
125
+ kind: "text",
126
+ encoding: "utf8",
127
+ content: source.sourceYaml,
128
+ executable: false,
129
+ }],
130
+ cases: source.engineResolveFiles,
131
+ });
96
132
  }
97
133
  throw new WorkbenchInspectionError(`Benchmark version not found: ${requestedFingerprint}`, { status: 404 });
98
134
  }
@@ -116,12 +152,16 @@ async function localBenchmarkMountedFiles(context, benchmarkFingerprint) {
116
152
  currentFingerprint &&
117
153
  requestedFingerprint !== currentFingerprint) {
118
154
  const snapshot = await loadLocalArchiveIndex(workspace);
119
- return localHistoricalBenchmarkFiles(snapshot, requestedFingerprint);
155
+ const source = localHistoricalBenchmarkSource(snapshot, requestedFingerprint);
156
+ if (source) {
157
+ return source.engineResolveFiles.map((file) => ({ ...file }));
158
+ }
159
+ throw new WorkbenchInspectionError(`Benchmark version not found: ${requestedFingerprint}`, { status: 404 });
120
160
  }
121
161
  return inspectableEngineCaseFiles(projectSource.engineCases);
122
162
  }
123
163
  function publicLocalRunSummary(run) {
124
- const { executionFingerprint: _executionFingerprint, ...summary } = run;
164
+ const { executionFingerprint: _executionFingerprint, input: _input, ...summary } = run;
125
165
  return summary;
126
166
  }
127
167
  async function readCurrentBenchmarkFingerprint(context) {
@@ -147,21 +187,17 @@ function caseSummaryFilesFromEngineCases(engineCases, files) {
147
187
  })),
148
188
  ];
149
189
  }
150
- function localHistoricalBenchmarkDocument(snapshot, benchmarkFingerprint) {
151
- const candidate = snapshot.candidates.find((entry) => entry.benchmarkFingerprint === benchmarkFingerprint && readBenchmarkSourceMetadata(entry));
152
- const source = candidate ? readBenchmarkSourceMetadata(candidate) : null;
153
- if (!source?.sourceYaml) {
154
- return null;
190
+ function localHistoricalBenchmarkSource(snapshot, benchmarkFingerprint) {
191
+ for (const run of snapshot.runs) {
192
+ if (run.benchmarkFingerprint !== benchmarkFingerprint) {
193
+ continue;
194
+ }
195
+ const source = readRunSourceInput(run);
196
+ if (source) {
197
+ return source;
198
+ }
155
199
  }
156
- return loadAuthoredWorkbenchSourceDocument({
157
- sourceYaml: source.sourceYaml,
158
- path: WORKBENCH_BENCHMARK_FILE,
159
- sourceFiles: source.files,
160
- cases: localHistoricalBenchmarkFiles(snapshot, benchmarkFingerprint),
161
- });
162
- }
163
- function localHistoricalBenchmarkFiles(_snapshot, _benchmarkFingerprint) {
164
- return [];
200
+ return null;
165
201
  }
166
202
  function inspectableEngineCaseFiles(engineCases) {
167
203
  return engineCases.flatMap((bundle) => engineCaseFiles(bundle).map((file) => ({
@@ -195,18 +231,19 @@ async function readRunForInspection(workspace, runId) {
195
231
  async function readCandidateFilesForInspection(workspace, candidateId) {
196
232
  return await readArchiveRecord("Candidate", candidateId, () => readLocalCandidateFilesForId(workspace, candidateId));
197
233
  }
198
- function readBenchmarkSourceMetadata(candidate) {
199
- const benchmark = asRecord(asRecord(candidate.meta)?.benchmark);
200
- const files = Array.isArray(benchmark?.files)
201
- ? benchmark.files
234
+ function readRunSourceInput(run) {
235
+ const input = asRecord(run.input);
236
+ const sourceYaml = typeof input?.sourceYaml === "string" ? input.sourceYaml : null;
237
+ if (!input || !sourceYaml) {
238
+ return null;
239
+ }
240
+ const engineResolveFiles = Array.isArray(input.engineResolveFiles)
241
+ ? input.engineResolveFiles
202
242
  .map(readSurfaceSnapshotFile)
203
243
  .filter((file) => file !== null)
244
+ .sort((left, right) => left.path.localeCompare(right.path))
204
245
  : [];
205
- const sourceYaml = files.find((file) => file.path === WORKBENCH_BENCHMARK_FILE)?.content ?? null;
206
- if (!sourceYaml) {
207
- return null;
208
- }
209
- return { sourceYaml, files };
246
+ return { sourceYaml, engineResolveFiles };
210
247
  }
211
248
  function readSurfaceSnapshotFile(value) {
212
249
  const record = asRecord(value);
package/package.json CHANGED
@@ -1,6 +1,11 @@
1
1
  {
2
2
  "name": "@workbench-ai/workbench",
3
- "version": "0.0.65",
3
+ "version": "0.0.67",
4
+ "repository": {
5
+ "type": "git",
6
+ "url": "git+https://github.com/workbench-ai/workbench.git",
7
+ "directory": "packages/cli"
8
+ },
4
9
  "type": "module",
5
10
  "publishConfig": {
6
11
  "registry": "https://registry.npmjs.org/",
@@ -16,9 +21,9 @@
16
21
  ],
17
22
  "dependencies": {
18
23
  "yaml": "^2.8.2",
19
- "@workbench-ai/workbench-built-in-adapters": "0.0.65",
20
- "@workbench-ai/workbench-protocol": "0.0.65",
21
- "@workbench-ai/workbench-core": "0.0.65"
24
+ "@workbench-ai/workbench-built-in-adapters": "0.0.67",
25
+ "@workbench-ai/workbench-core": "0.0.67",
26
+ "@workbench-ai/workbench-protocol": "0.0.67"
22
27
  },
23
28
  "devDependencies": {
24
29
  "@tailwindcss/postcss": "^4.2.2",