@workbench-ai/workbench 0.0.48 → 0.0.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { type SurfaceSnapshotFile } from "@workbench-ai/workbench-core";
1
+ import { type RunSummary, type SurfaceSnapshotFile } from "@workbench-ai/workbench-core";
2
2
  import { type LocalProjectSource } from "./project-source.js";
3
3
  export interface LocalWorkbenchDevServer {
4
4
  url: string;
@@ -20,30 +20,17 @@ export declare function localBenchmarkSnapshot(context: LocalWorkbenchRequestCon
20
20
  workspaceRoot: string;
21
21
  activeId: string | null;
22
22
  currentBenchmarkFingerprint: string | null;
23
- summaries: {
24
- id: string;
25
- name?: string;
26
- ordinal: number;
27
- benchmarkFingerprint: string;
28
- subjectFingerprint: string;
29
- ownerUserId?: string;
30
- ownerUsername?: string;
31
- visibility?: "private" | "public";
32
- createdAt: string;
33
- baseId?: string;
34
- referenceIds: string[];
35
- status: import("@workbench-ai/workbench-contract").SubjectStatus;
36
- fileChanges: string[];
37
- metrics?: Record<string, number>;
38
- usage?: import("@workbench-ai/workbench-contract").UsageSummary;
39
- }[];
23
+ summaries: import("@workbench-ai/workbench-contract").CandidateSummary[];
40
24
  evaluations: {
41
25
  id: string;
42
26
  runId: string;
43
27
  benchmarkFingerprint: string;
44
- subjectFingerprint: string;
45
- subjectId: string;
46
- subjectName?: string;
28
+ candidateFingerprint: string;
29
+ candidateId: string;
30
+ candidateName?: string;
31
+ candidateVersion: number;
32
+ candidateRunId?: string;
33
+ candidateRunName?: string;
47
34
  createdAt: string;
48
35
  updatedAt: string;
49
36
  status: import("@workbench-ai/workbench-contract").EvaluationStatus;
@@ -55,7 +42,7 @@ export declare function localBenchmarkSnapshot(context: LocalWorkbenchRequestCon
55
42
  usage?: import("@workbench-ai/workbench-contract").EvaluationUsageStats;
56
43
  error?: string;
57
44
  }[];
58
- runs: import("@workbench-ai/workbench-contract").RunSummary[];
45
+ runs: RunSummary[];
59
46
  }>;
60
47
  export declare function localSpecDocument(context: LocalWorkbenchRequestContext, benchmarkFingerprint?: string | null): Promise<import("@workbench-ai/workbench-contract").AuthoredWorkbenchSourceDocument>;
61
48
  export declare function localSourceFiles(workspace: string): Promise<SurfaceSnapshotFile[]>;
@@ -1 +1 @@
1
- {"version":3,"file":"dev-open-server.d.ts","sourceRoot":"","sources":["../src/dev-open-server.ts"],"names":[],"mappings":"AAKA,OAAO,EAUL,KAAK,mBAAmB,EAIzB,MAAM,8BAA8B,CAAC;AAatC,OAAO,EAGL,KAAK,kBAAkB,EAExB,MAAM,qBAAqB,CAAC;AAG7B,MAAM,WAAW,uBAAuB;IACtC,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;CAC5B;AAED,MAAM,WAAW,8BAA8B;IAC7C,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAkBD,MAAM,WAAW,4BAA4B;IAC3C,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,OAAO,CAAC,kBAAkB,CAAC,CAAC;CACtD;AAKD,wBAAsB,4BAA4B,CAChD,OAAO,EAAE,8BAA8B,GACtC,OAAO,CAAC,uBAAuB,CAAC,CAwClC;AAoOD,wBAAsB,sBAAsB,CAAC,OAAO,EAAE,4BAA4B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiBjF;AAUD,wBAAsB,iBAAiB,CACrC,OAAO,EAAE,4BAA4B,EACrC,oBAAoB,CAAC,EAAE,MAAM,GAAG,IAAI,uFAiCrC;AAwBD,wBAAsB,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAExF;AAED,wBAAsB,0BAA0B,CAC9C,OAAO,EAAE,4BAA4B,EACrC,oBAAoB,CAAC,EAAE,MAAM,GAAG,IAAI,GACnC,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAchC"}
1
+ {"version":3,"file":"dev-open-server.d.ts","sourceRoot":"","sources":["../src/dev-open-server.ts"],"names":[],"mappings":"AAKA,OAAO,EAYL,KAAK,UAAU,EACf,KAAK,mBAAmB,EAIzB,MAAM,8BAA8B,CAAC;AAatC,OAAO,EAGL,KAAK,kBAAkB,EAExB,MAAM,qBAAqB,CAAC;AAG7B,MAAM,WAAW,uBAAuB;IACtC,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;CAC5B;AAED,MAAM,WAAW,8BAA8B;IAC7C,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAkBD,MAAM,WAAW,4BAA4B;IAC3C,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,OAAO,CAAC,kBAAkB,CAAC,CAAC;CACtD;AAKD,wBAAsB,4BAA4B,CAChD,OAAO,EAAE,8BAA8B,GACtC,OAAO,CAAC,uBAAuB,CAAC,CAwClC;AAoOD,wBAAsB,sBAAsB,CAAC,OAAO,EAAE,4BAA4B;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiBjF;AAeD,wBAAsB,iBAAiB,CACrC,OAAO,EAAE,4BAA4B,EACrC,oBAAoB,CAAC,EAAE,MAAM,GAAG,IAAI,uFAiCrC;AAwBD,wBAAsB,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAExF;AAED,wBAAsB,0BAA0B,CAC9C,OAAO,EAAE,4BAA4B,EACrC,oBAAoB,CAAC,EAAE,MAAM,GAAG,IAAI,GACnC,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAchC"}
@@ -2,8 +2,8 @@ import { promises as fs } from "node:fs";
2
2
  import http from "node:http";
3
3
  import path from "node:path";
4
4
  import { fileURLToPath } from "node:url";
5
- import { buildSubjectCaseExecutionRefs, buildWorkbenchExecutionEvidence, createSubjectFilePreview, createCaseReview, loadAuthoredWorkbenchSourceDocument, summarizeSubjectFiles, traceSessionLabel, } from "@workbench-ai/workbench-core";
6
- import { readLocalExecutionFiles, loadLocalArchiveIndex, readLocalEvaluationRecord, readLocalJobInRun, readLocalRunJobs, readLocalSubjectFilesForId, readLocalSubjectRecord, } from "./local-archive.js";
5
+ import { buildCandidateCaseExecutionRefs, buildWorkbenchExecutionEvidence, candidateRecordWithoutDerivedFields, candidateSummaryFromRecord, createCandidateFilePreview, createCaseReview, loadAuthoredWorkbenchSourceDocument, summarizeCandidateFiles, traceSessionLabel, } from "@workbench-ai/workbench-core";
6
+ import { readLocalExecutionFiles, loadLocalArchiveIndex, readLocalEvaluationRecord, readLocalJobInRun, readLocalRunJobs, readLocalCandidateFilesForId, readLocalCandidateRecord, } from "./local-archive.js";
7
7
  import { readLocalAuthoredProjectSource, readLocalProjectSource, WORKBENCH_BENCHMARK_FILE, } from "./project-source.js";
8
8
  import { localBenchmarkFingerprint } from "./benchmark-fingerprint.js";
9
9
  class LocalApiError extends Error {
@@ -120,45 +120,45 @@ async function handleApiRequest(request, response, context, url) {
120
120
  sendJson(response, await localSpecDocument(context, readOptionalSearchString(url.searchParams, "fingerprint")), 200, request.method);
121
121
  return;
122
122
  case "/api/source/files":
123
- sendJson(response, summarizeSubjectFiles(await localBenchmarkMountedFiles(context, readOptionalSearchString(url.searchParams, "fingerprint")), []), 200, request.method);
123
+ sendJson(response, summarizeCandidateFiles(await localBenchmarkMountedFiles(context, readOptionalSearchString(url.searchParams, "fingerprint")), []), 200, request.method);
124
124
  return;
125
125
  case "/api/source/preview":
126
- sendJson(response, createSubjectFilePreview({
126
+ sendJson(response, createCandidateFilePreview({
127
127
  files: await localBenchmarkMountedFiles(context, readOptionalSearchString(url.searchParams, "fingerprint")),
128
128
  path: readSearchString(url.searchParams, "path"),
129
129
  view: readPreviewMode(url.searchParams),
130
130
  }), 200, request.method);
131
131
  return;
132
132
  case "/api/record":
133
- sendJson(response, await readSubjectForApi(workspace, readSearchString(url.searchParams, "id")), 200, request.method);
133
+ sendJson(response, await readCandidateForApi(workspace, readSearchString(url.searchParams, "id")), 200, request.method);
134
134
  return;
135
135
  case "/api/evaluation":
136
136
  sendJson(response, await readEvaluationForApi(workspace, readSearchString(url.searchParams, "id")), 200, request.method);
137
137
  return;
138
- case "/api/subject/files": {
139
- const subjectId = readSearchString(url.searchParams, "id");
140
- const subject = await readSubjectForApi(workspace, subjectId);
141
- sendJson(response, summarizeSubjectFiles(await readSubjectFilesForApi(workspace, subjectId), subject.fileChanges), 200, request.method);
138
+ case "/api/candidate/files": {
139
+ const candidateId = readSearchString(url.searchParams, "id");
140
+ const candidate = await readCandidateForApi(workspace, candidateId);
141
+ sendJson(response, summarizeCandidateFiles(await readCandidateFilesForApi(workspace, candidateId), candidate.fileChanges), 200, request.method);
142
142
  return;
143
143
  }
144
- case "/api/subject/preview": {
145
- const subjectId = readSearchString(url.searchParams, "id");
146
- sendJson(response, createSubjectFilePreview({
147
- files: await readSubjectFilesForApi(workspace, subjectId),
144
+ case "/api/candidate/preview": {
145
+ const candidateId = readSearchString(url.searchParams, "id");
146
+ sendJson(response, createCandidateFilePreview({
147
+ files: await readCandidateFilesForApi(workspace, candidateId),
148
148
  path: readSearchString(url.searchParams, "path"),
149
149
  view: readPreviewMode(url.searchParams),
150
150
  }), 200, request.method);
151
151
  return;
152
152
  }
153
153
  case "/api/case-review": {
154
- const subjectId = readSearchString(url.searchParams, "id");
154
+ const candidateId = readSearchString(url.searchParams, "id");
155
155
  const caseId = readSearchString(url.searchParams, "case");
156
156
  const runId = readSearchString(url.searchParams, "run");
157
157
  const jobs = await readLocalRunJobs(workspace, runId);
158
158
  sendJson(response, createCaseReview({
159
- subject: await readSubjectForApi(workspace, subjectId),
159
+ candidate: await readCandidateForApi(workspace, candidateId),
160
160
  caseId,
161
- executions: buildSubjectCaseExecutionRefs({ jobs, subjectId, caseId }),
161
+ executions: buildCandidateCaseExecutionRefs({ jobs, candidateId, caseId }),
162
162
  }), 200, request.method);
163
163
  return;
164
164
  }
@@ -190,7 +190,7 @@ async function handleApiRequest(request, response, context, url) {
190
190
  const previewJobId = readSearchString(url.searchParams, "id");
191
191
  const previewFilePath = readSearchString(url.searchParams, "path");
192
192
  const previewFiles = await readExecutionFilesForRun(workspace, previewRunId, previewJobId);
193
- sendJson(response, createSubjectFilePreview({
193
+ sendJson(response, createCandidateFilePreview({
194
194
  files: previewFiles,
195
195
  path: previewFilePath,
196
196
  view: readPreviewMode(url.searchParams),
@@ -204,11 +204,11 @@ async function handleApiRequest(request, response, context, url) {
204
204
  export async function localBenchmarkSnapshot(context) {
205
205
  const { workspace } = context;
206
206
  const snapshot = await loadLocalArchiveIndex(workspace);
207
- const subjects = snapshot.subjects.filter(isInspectableSubjectRecord);
208
- const summaries = subjects.map(subjectSummary);
209
- const activeId = snapshot.activeId && subjects.some((subject) => subject.id === snapshot.activeId)
207
+ const candidates = snapshot.candidates.filter(isInspectableCandidateRecord);
208
+ const summaries = candidates.map(candidateSummary);
209
+ const activeId = snapshot.activeId && candidates.some((candidate) => candidate.id === snapshot.activeId)
210
210
  ? snapshot.activeId
211
- : subjects.at(-1)?.id ?? null;
211
+ : candidates.at(-1)?.id ?? null;
212
212
  const currentBenchmarkFingerprint = await readCurrentBenchmarkFingerprint(context);
213
213
  return {
214
214
  workspaceRoot: path.resolve(workspace),
@@ -216,9 +216,13 @@ export async function localBenchmarkSnapshot(context) {
216
216
  currentBenchmarkFingerprint,
217
217
  summaries,
218
218
  evaluations: snapshot.evaluations.map(evaluationSummary),
219
- runs: snapshot.runs,
219
+ runs: snapshot.runs.map(publicLocalRunSummary),
220
220
  };
221
221
  }
222
+ function publicLocalRunSummary(run) {
223
+ const { executionFingerprint: _executionFingerprint, ...summary } = run;
224
+ return summary;
225
+ }
222
226
  async function readCurrentBenchmarkFingerprint(context) {
223
227
  return await context.readProjectSource()
224
228
  .then(localBenchmarkFingerprint)
@@ -290,8 +294,8 @@ export async function localBenchmarkMountedFiles(context, benchmarkFingerprint)
290
294
  return inspectableEngineCaseFiles(projectSource.engineCases);
291
295
  }
292
296
  function localHistoricalBenchmarkDocument(snapshot, benchmarkFingerprint) {
293
- const subject = snapshot.subjects.find((entry) => entry.benchmarkFingerprint === benchmarkFingerprint && readBenchmarkSourceMetadata(entry));
294
- const source = subject ? readBenchmarkSourceMetadata(subject) : null;
297
+ const candidate = snapshot.candidates.find((entry) => entry.benchmarkFingerprint === benchmarkFingerprint && readBenchmarkSourceMetadata(entry));
298
+ const source = candidate ? readBenchmarkSourceMetadata(candidate) : null;
295
299
  if (!source?.sourceYaml) {
296
300
  return null;
297
301
  }
@@ -317,28 +321,28 @@ function engineCaseFiles(bundle) {
317
321
  ? buckets.source
318
322
  : [...(buckets.public ?? []), ...(buckets.private ?? [])];
319
323
  }
320
- function subjectSummary(subject) {
321
- const { eval: _eval, prompt: _prompt, meta: _meta, ...summary } = subject;
322
- return summary;
324
+ function candidateSummary(candidate) {
325
+ return candidateSummaryFromRecord(candidate);
323
326
  }
324
- function isInspectableSubjectRecord(subject) {
325
- return Boolean(subject.eval || asRecord(asRecord(subject.meta)?.source));
327
+ function isInspectableCandidateRecord(candidate) {
328
+ return Boolean(candidate.eval || asRecord(asRecord(candidate.meta)?.source));
326
329
  }
327
330
  function evaluationSummary(evaluation) {
328
331
  const { evaluation: _evaluation, ...summary } = evaluation;
329
332
  return summary;
330
333
  }
331
- async function readSubjectForApi(workspace, subjectId) {
332
- return await readArchiveRecord("Subject", subjectId, () => readLocalSubjectRecord(workspace, subjectId));
334
+ async function readCandidateForApi(workspace, candidateId) {
335
+ const candidate = await readArchiveRecord("Candidate", candidateId, () => readLocalCandidateRecord(workspace, candidateId));
336
+ return candidateRecordWithoutDerivedFields(candidate);
333
337
  }
334
338
  async function readEvaluationForApi(workspace, evaluationId) {
335
339
  return await readArchiveRecord("Evaluation", evaluationId, () => readLocalEvaluationRecord(workspace, evaluationId));
336
340
  }
337
- async function readSubjectFilesForApi(workspace, subjectId) {
338
- return await readArchiveRecord("Subject", subjectId, () => readLocalSubjectFilesForId(workspace, subjectId));
341
+ async function readCandidateFilesForApi(workspace, candidateId) {
342
+ return await readArchiveRecord("Candidate", candidateId, () => readLocalCandidateFilesForId(workspace, candidateId));
339
343
  }
340
- function readBenchmarkSourceMetadata(subject) {
341
- const benchmark = asRecord(asRecord(subject.meta)?.benchmark);
344
+ function readBenchmarkSourceMetadata(candidate) {
345
+ const benchmark = asRecord(asRecord(candidate.meta)?.benchmark);
342
346
  const files = Array.isArray(benchmark?.files)
343
347
  ? benchmark.files
344
348
  .map(readSurfaceSnapshotFile)
@@ -390,7 +394,7 @@ async function readArchiveRecord(kind, id, read) {
390
394
  }
391
395
  async function loadExecutionFiles(workspace, runId, jobId) {
392
396
  const files = await readExecutionFilesForRun(workspace, runId, jobId);
393
- return summarizeSubjectFiles(files);
397
+ return summarizeCandidateFiles(files);
394
398
  }
395
399
  async function readExecutionFilesForRun(workspace, runId, jobId) {
396
400
  await assertExecutionJobInRun(workspace, runId, jobId);
@@ -430,7 +434,7 @@ function readLocalTraceSessions(job, role) {
430
434
  jobId: typeof session.jobId === "string" && session.jobId.length > 0
431
435
  ? session.jobId
432
436
  : job.id,
433
- role: session.role === "optimizer" || session.role === "runner" || session.role === "engine"
437
+ role: session.role === "improver" || session.role === "runner" || session.role === "engine"
434
438
  ? session.role
435
439
  : role,
436
440
  kind: typeof session.kind === "string" && session.kind.length > 0 ? session.kind : "trace",
@@ -443,7 +447,7 @@ function readLocalTraceSessions(job, role) {
443
447
  }));
444
448
  }
445
449
  function traceSessionDisplayLabel(session, fallbackRole) {
446
- const role = session.role === "optimizer" || session.role === "runner" || session.role === "engine"
450
+ const role = session.role === "improver" || session.role === "runner" || session.role === "engine"
447
451
  ? session.role
448
452
  : fallbackRole;
449
453
  return typeof session.label === "string" && session.label.length > 0
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAsHA,UAAU,KAAK;IACb,KAAK,EAAE,MAAM,CAAC,cAAc,CAAC;IAC7B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;CAC/B;AA6BD,UAAU,iBAAiB;CAAG;AAqK9B,wBAAsB,MAAM,CAC1B,IAAI,EAAE,SAAS,MAAM,EAAE,EACvB,EAAE,GAAE,KAIH,EACD,cAAc,GAAE,iBAAsB,GACrC,OAAO,CAAC,MAAM,CAAC,CA8GjB"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAyHA,UAAU,KAAK;IACb,KAAK,EAAE,MAAM,CAAC,cAAc,CAAC;IAC7B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;CAC/B;AA6BD,UAAU,iBAAiB;CAAG;AA4K9B,wBAAsB,MAAM,CAC1B,IAAI,EAAE,SAAS,MAAM,EAAE,EACvB,EAAE,GAAE,KAIH,EACD,cAAc,GAAE,iBAAsB,GACrC,OAAO,CAAC,MAAM,CAAC,CAiHjB"}