@workbench-ai/workbench-core 0.0.67 → 0.0.69

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/coded-errors.d.ts +27 -0
  2. package/dist/coded-errors.d.ts.map +1 -0
  3. package/dist/coded-errors.js +52 -0
  4. package/dist/execution-events.d.ts +5 -1
  5. package/dist/execution-events.d.ts.map +1 -1
  6. package/dist/execution-events.js +13 -3
  7. package/dist/execution-graph.d.ts +4 -3
  8. package/dist/execution-graph.d.ts.map +1 -1
  9. package/dist/execution-graph.js +15 -14
  10. package/dist/execution-jobs.d.ts +5 -20
  11. package/dist/execution-jobs.d.ts.map +1 -1
  12. package/dist/execution-jobs.js +7 -91
  13. package/dist/execution-outputs.d.ts +2 -2
  14. package/dist/execution-outputs.d.ts.map +1 -1
  15. package/dist/execution-outputs.js +10 -10
  16. package/dist/execution-runtime-types.d.ts +1 -1
  17. package/dist/execution-runtime-types.d.ts.map +1 -1
  18. package/dist/execution-scheduler.d.ts +5 -3
  19. package/dist/execution-scheduler.d.ts.map +1 -1
  20. package/dist/execution-scheduler.js +33 -9
  21. package/dist/execution-traces.js +1 -1
  22. package/dist/generic-spec.d.ts +7 -61
  23. package/dist/generic-spec.d.ts.map +1 -1
  24. package/dist/generic-spec.js +0 -679
  25. package/dist/index.d.ts +377 -220
  26. package/dist/index.d.ts.map +1 -1
  27. package/dist/index.js +7887 -3881
  28. package/dist/remote-model.d.ts +17 -0
  29. package/dist/remote-model.d.ts.map +1 -0
  30. package/dist/remote-model.js +86 -0
  31. package/dist/runtime-dockerfile.d.ts +1 -1
  32. package/dist/runtime-dockerfile.d.ts.map +1 -1
  33. package/dist/runtime-dockerfile.js +4 -4
  34. package/dist/sandbox-backends/docker.d.ts.map +1 -1
  35. package/dist/sandbox-backends/docker.js +34 -16
  36. package/dist/sandbox-inputs.js +3 -3
  37. package/dist/sandbox-plane.d.ts.map +1 -1
  38. package/dist/sandbox-plane.js +13 -9
  39. package/dist/skill-patch.d.ts +8 -0
  40. package/dist/skill-patch.d.ts.map +1 -0
  41. package/dist/{candidate-patch.js → skill-patch.js} +5 -5
  42. package/package.json +3 -3
  43. package/worker/sandbox-adapter-runner.cjs +2 -2
  44. package/dist/candidate-patch.d.ts +0 -8
  45. package/dist/candidate-patch.d.ts.map +0 -1
  46. package/dist/execution-evidence.d.ts +0 -22
  47. package/dist/execution-evidence.d.ts.map +0 -1
  48. package/dist/execution-evidence.js +0 -302
  49. package/dist/inspection.d.ts +0 -117
  50. package/dist/inspection.d.ts.map +0 -1
  51. package/dist/inspection.js +0 -224
@@ -1,302 +0,0 @@
1
- import { mergeWorkbenchExecutionTracesByJob } from "./execution-traces.js";
2
- export function buildCandidateCaseExecutionRefs(args) {
3
- const groups = new Map();
4
- for (const job of args.jobs) {
5
- const kind = readWorkbenchExecutionPurpose(job);
6
- const jobCandidateId = job.candidateId ?? readWorkbenchExecutionMetadataString(job, "candidateId");
7
- const jobCaseId = readWorkbenchExecutionMetadataString(job, "caseId");
8
- if (jobCandidateId === args.candidateId &&
9
- kind === "attempt" &&
10
- caseReviewCaseIdsMatch(jobCaseId, args.caseId) &&
11
- caseReviewSampleIndicesMatch(readWorkbenchExecutionMetadataNumber(job, "sampleIndex"), args.sampleIndex)) {
12
- const key = [
13
- job.runId,
14
- kind,
15
- jobCaseId ?? "",
16
- readWorkbenchExecutionMetadataNumber(job, "sampleIndex") ?? "",
17
- ].join("\0");
18
- groups.set(key, [...(groups.get(key) ?? []), job]);
19
- }
20
- }
21
- const executions = [...groups.values()]
22
- .map((group) => group.slice().sort(compareWorkbenchExecutionJobs))
23
- .flatMap((group) => {
24
- const first = group[0];
25
- if (!first) {
26
- return [];
27
- }
28
- const kind = readWorkbenchExecutionPurpose(first);
29
- if (kind !== "attempt") {
30
- return [];
31
- }
32
- const startedAt = minTimestamp(group.map((job) => job.startedAt));
33
- const finishedAt = maxTimestamp(group.map((job) => job.finishedAt));
34
- const durationMs = startedAt && finishedAt
35
- ? Math.max(0, Date.parse(finishedAt) - Date.parse(startedAt))
36
- : null;
37
- return [{
38
- runId: first.runId,
39
- kind,
40
- role: "engine",
41
- status: resolveWorkbenchJobGroupStatus(group),
42
- jobIds: group.map((job) => job.id),
43
- executionIds: group.flatMap((job) => {
44
- const executionId = readWorkbenchExecutionId(job);
45
- return executionId ? [executionId] : [];
46
- }),
47
- createdAt: minTimestamp(group.map((job) => job.createdAt)) ?? first.createdAt,
48
- ...(startedAt ? { startedAt } : {}),
49
- ...(finishedAt ? { finishedAt } : {}),
50
- ...(durationMs !== null ? { durationMs } : {}),
51
- ...optionalString("caseId", readWorkbenchExecutionMetadataString(first, "caseId")),
52
- ...optionalNumber("sampleIndex", readWorkbenchExecutionMetadataNumber(first, "sampleIndex")),
53
- ...optionalNumber("attemptIndex", readWorkbenchExecutionMetadataNumber(first, "attemptIndex")),
54
- }];
55
- })
56
- .sort(compareCandidateCaseExecutions);
57
- return selectCurrentExecutionRun(executions);
58
- }
59
- export function buildWorkbenchExecutionEvidence(args) {
60
- const groups = new Map();
61
- for (const job of args.jobs) {
62
- if (isBaselineMaterializationJob(job)) {
63
- continue;
64
- }
65
- const purpose = readWorkbenchExecutionPurpose(job);
66
- if (!purpose) {
67
- continue;
68
- }
69
- const key = [
70
- job.runId,
71
- purpose,
72
- job.candidateId ?? readWorkbenchExecutionMetadataString(job, "candidateId") ?? "",
73
- readWorkbenchExecutionMetadataString(job, "caseId") ?? "",
74
- readWorkbenchExecutionMetadataNumber(job, "sampleIndex") ?? "",
75
- readWorkbenchExecutionMetadataNumber(job, "attemptIndex") ?? "",
76
- ].join("\0");
77
- groups.set(key, [...(groups.get(key) ?? []), job]);
78
- }
79
- return [...groups.values()]
80
- .map((group) => group.slice().sort(compareWorkbenchTraceJobs))
81
- .flatMap((group) => {
82
- const first = group[0];
83
- if (!first) {
84
- return [];
85
- }
86
- const purpose = readWorkbenchExecutionPurpose(first);
87
- if (!purpose) {
88
- return [];
89
- }
90
- const role = traceRoleForPurpose(purpose);
91
- const sessions = group.flatMap((job) => args.traceSessionsForJob
92
- ? args.traceSessionsForJob(job, role)
93
- : []);
94
- const jobIds = group.map((job) => job.id);
95
- const executionIds = group.flatMap((job) => {
96
- const executionId = readWorkbenchExecutionId(job);
97
- return executionId ? [executionId] : [];
98
- });
99
- return [{
100
- id: [
101
- purpose,
102
- first.runId,
103
- readWorkbenchExecutionMetadataString(first, "caseId") ?? "current",
104
- readWorkbenchExecutionMetadataNumber(first, "sampleIndex") ?? "sample",
105
- readWorkbenchExecutionMetadataNumber(first, "attemptIndex") ?? "attempt",
106
- jobIds.join("_"),
107
- ].join(":"),
108
- kind: purpose,
109
- executionId: group.length === 1 ? readWorkbenchExecutionId(first) : null,
110
- role,
111
- status: resolveWorkbenchJobGroupStatus(group),
112
- jobIds,
113
- executionIds,
114
- ...(first.candidateId ? { candidateId: first.candidateId } : {}),
115
- ...optionalString("caseId", readWorkbenchExecutionMetadataString(first, "caseId")),
116
- ...optionalNumber("sampleIndex", readWorkbenchExecutionMetadataNumber(first, "sampleIndex")),
117
- ...optionalNumber("attemptIndex", readWorkbenchExecutionMetadataNumber(first, "attemptIndex")),
118
- sessions,
119
- trace: mergeWorkbenchExecutionTracesByJob({
120
- traceIdPrefix: args.traceIdPrefix,
121
- stageId: purpose,
122
- jobs: [
123
- ...group.map((job) => ({
124
- id: job.id,
125
- trace: args.traceForJob(job, role),
126
- })),
127
- ...sessions.map((session) => ({
128
- id: session.id,
129
- jobId: session.jobId,
130
- trace: session.trace,
131
- })),
132
- ],
133
- }),
134
- }];
135
- })
136
- .sort(compareWorkbenchExecutionEvidence);
137
- }
138
- export function readWorkbenchExecutionPurpose(job) {
139
- if (job.kind !== "execute") {
140
- return null;
141
- }
142
- const purpose = readExecutionRecord(job)?.purpose;
143
- return purpose === "improve" || purpose === "attempt"
144
- ? purpose
145
- : null;
146
- }
147
- export function readWorkbenchExecutionId(job) {
148
- const id = readExecutionRecord(job)?.id;
149
- return typeof id === "string" && id.length > 0 ? id : null;
150
- }
151
- export function readWorkbenchExecutionMetadataString(job, key) {
152
- const raw = readWorkbenchExecutionMetadataValue(job, key);
153
- return typeof raw === "string" && raw.length > 0 ? raw : null;
154
- }
155
- export function readWorkbenchExecutionMetadataNumber(job, key) {
156
- const raw = readWorkbenchExecutionMetadataValue(job, key);
157
- return typeof raw === "number" && Number.isFinite(raw) ? raw : null;
158
- }
159
- export function isWorkbenchExecutionActive(execution) {
160
- return execution.status === "queued" || execution.status === "running";
161
- }
162
- export function resolveWorkbenchJobGroupStatus(jobs) {
163
- if (jobs.some((job) => job.status === "running")) {
164
- return "running";
165
- }
166
- if (jobs.some((job) => job.status === "queued")) {
167
- return "queued";
168
- }
169
- if (jobs.some((job) => job.status === "failed")) {
170
- return "failed";
171
- }
172
- if (jobs.some((job) => job.status === "cancelled")) {
173
- return "cancelled";
174
- }
175
- return "succeeded";
176
- }
177
- function readWorkbenchExecutionMetadataValue(job, key) {
178
- const input = asRecord(job.input);
179
- const execution = asRecord(input?.execution);
180
- const metadata = asRecord(execution?.metadata);
181
- return metadata?.[key] ?? input?.[key] ?? null;
182
- }
183
- function readExecutionRecord(job) {
184
- const input = asRecord(job.input);
185
- return asRecord(input?.execution);
186
- }
187
- function isBaselineMaterializationJob(job) {
188
- const input = asRecord(job.input);
189
- const execution = asRecord(input?.execution);
190
- const metadata = asRecord(execution?.metadata);
191
- return metadata?.baseline === true || input?.baseline === true;
192
- }
193
- function caseReviewCaseIdsMatch(jobCaseId, requestedCaseId) {
194
- return Boolean(jobCaseId) &&
195
- (jobCaseId === requestedCaseId || requestedCaseId.startsWith(`${jobCaseId}__`));
196
- }
197
- function caseReviewSampleIndicesMatch(jobSampleIndex, reviewSampleIndex) {
198
- return typeof reviewSampleIndex !== "number" || jobSampleIndex === reviewSampleIndex;
199
- }
200
- function selectCurrentExecutionRun(executions) {
201
- if (executions.length <= 1) {
202
- return executions;
203
- }
204
- const activeRunId = executions
205
- .filter(isWorkbenchExecutionActive)
206
- .sort(compareExecutionRecency)[0]?.runId;
207
- const selectedRunId = activeRunId ?? executions.slice().sort(compareExecutionRecency)[0]?.runId;
208
- return selectedRunId
209
- ? executions.filter((execution) => execution.runId === selectedRunId)
210
- : executions;
211
- }
212
- function compareCandidateCaseExecutions(left, right) {
213
- return (executionKindOrder(left.kind) - executionKindOrder(right.kind) ||
214
- (left.sampleIndex ?? -1) - (right.sampleIndex ?? -1) ||
215
- readExecutionRecencyMs(right) - readExecutionRecencyMs(left));
216
- }
217
- function compareExecutionRecency(left, right) {
218
- return readExecutionRecencyMs(right) - readExecutionRecencyMs(left);
219
- }
220
- function compareWorkbenchExecutionJobs(left, right) {
221
- return (executionKindOrder(readWorkbenchExecutionPurpose(left)) -
222
- executionKindOrder(readWorkbenchExecutionPurpose(right)) ||
223
- (readWorkbenchExecutionMetadataNumber(left, "sampleIndex") ?? -1) -
224
- (readWorkbenchExecutionMetadataNumber(right, "sampleIndex") ?? -1) ||
225
- readJobRecencyMs(right) - readJobRecencyMs(left) ||
226
- left.id.localeCompare(right.id));
227
- }
228
- function compareWorkbenchTraceJobs(left, right) {
229
- return (executionKindOrder(readWorkbenchExecutionPurpose(left)) -
230
- executionKindOrder(readWorkbenchExecutionPurpose(right)) ||
231
- String(readWorkbenchExecutionMetadataString(left, "caseId") ?? "").localeCompare(String(readWorkbenchExecutionMetadataString(right, "caseId") ?? "")) ||
232
- (readWorkbenchExecutionMetadataNumber(left, "sampleIndex") ?? -1) -
233
- (readWorkbenchExecutionMetadataNumber(right, "sampleIndex") ?? -1) ||
234
- (readWorkbenchExecutionMetadataNumber(left, "attemptIndex") ?? -1) -
235
- (readWorkbenchExecutionMetadataNumber(right, "attemptIndex") ?? -1) ||
236
- left.id.localeCompare(right.id));
237
- }
238
- function compareWorkbenchExecutionEvidence(left, right) {
239
- return (executionKindOrder(left.kind) - executionKindOrder(right.kind) ||
240
- String(left.caseId ?? "").localeCompare(String(right.caseId ?? "")) ||
241
- (left.sampleIndex ?? -1) - (right.sampleIndex ?? -1) ||
242
- (left.attemptIndex ?? -1) - (right.attemptIndex ?? -1) ||
243
- String(left.jobIds[0] ?? "").localeCompare(String(right.jobIds[0] ?? "")));
244
- }
245
- function traceRoleForPurpose(purpose) {
246
- if (purpose === "improve") {
247
- return "improver";
248
- }
249
- return "engine";
250
- }
251
- function executionKindOrder(kind) {
252
- if (kind === "improve") {
253
- return 0;
254
- }
255
- if (kind === "attempt") {
256
- return 1;
257
- }
258
- return 3;
259
- }
260
- function readExecutionRecencyMs(execution) {
261
- return (parseTimestampMs(execution.finishedAt) ??
262
- parseTimestampMs(execution.startedAt) ??
263
- parseTimestampMs(execution.createdAt) ??
264
- 0);
265
- }
266
- function readJobRecencyMs(job) {
267
- return (parseTimestampMs(job.finishedAt) ??
268
- parseTimestampMs(job.startedAt) ??
269
- parseTimestampMs(job.updatedAt) ??
270
- parseTimestampMs(job.createdAt) ??
271
- 0);
272
- }
273
- function minTimestamp(values) {
274
- const sorted = values
275
- .filter((value) => typeof value === "string" && value.length > 0)
276
- .sort();
277
- return sorted[0] ?? null;
278
- }
279
- function maxTimestamp(values) {
280
- const sorted = values
281
- .filter((value) => typeof value === "string" && value.length > 0)
282
- .sort();
283
- return sorted[sorted.length - 1] ?? null;
284
- }
285
- function parseTimestampMs(value) {
286
- if (!value) {
287
- return null;
288
- }
289
- const parsed = Date.parse(value);
290
- return Number.isFinite(parsed) ? parsed : null;
291
- }
292
- function optionalString(key, value) {
293
- return value ? { [key]: value } : {};
294
- }
295
- function optionalNumber(key, value) {
296
- return value == null ? {} : { [key]: value };
297
- }
298
- function asRecord(value) {
299
- return value && typeof value === "object" && !Array.isArray(value)
300
- ? value
301
- : null;
302
- }
@@ -1,117 +0,0 @@
1
- import { type WorkbenchEvaluationComparison } from "@workbench-ai/workbench-contract";
2
- import type { AuthoredWorkbenchSourceDocument, CandidateCaseReview, CandidateFilePreview, CandidateFileSummary, CandidateLineageGraph, CandidateRecord, EvaluationScorecard, RemoteWorkbenchJob, RunSummary, RuntimeSnapshot, WorkbenchExecutionEventRole, WorkbenchExecutionTrace, WorkbenchExecutionTraceDetail, WorkbenchTraceSession } from "@workbench-ai/workbench-contract";
3
- export interface WorkbenchInspectionErrorOptions {
4
- status?: number;
5
- }
6
- export declare class WorkbenchInspectionError extends Error {
7
- readonly status: number;
8
- readonly statusCode: number;
9
- constructor(message: string, options?: WorkbenchInspectionErrorOptions);
10
- }
11
- export interface WorkbenchInspectionFileListInput {
12
- fingerprint?: string | null;
13
- }
14
- export interface WorkbenchInspectionFileSurface {
15
- files: CandidateFileSummary[];
16
- preview: CandidateFilePreview | null;
17
- }
18
- export interface WorkbenchInspectionFileSurfaceInput extends WorkbenchInspectionFileListInput {
19
- path?: string | null;
20
- view?: "diff" | "raw" | "rendered";
21
- }
22
- export interface WorkbenchInspectionCandidateInput {
23
- id: string;
24
- }
25
- export interface WorkbenchInspectionCandidateFileSurfaceInput extends WorkbenchInspectionCandidateInput {
26
- path?: string | null;
27
- view?: "diff" | "raw" | "rendered";
28
- }
29
- export interface WorkbenchInspectionEvaluationInput {
30
- id: string;
31
- }
32
- export interface WorkbenchInspectionCaseReviewInput {
33
- candidateId: string;
34
- caseId: string;
35
- runId: string;
36
- sampleIndex?: number;
37
- }
38
- export interface WorkbenchInspectionRunInput {
39
- id: string;
40
- includeJobs?: boolean;
41
- }
42
- export interface WorkbenchInspectionExecutionInput {
43
- runId: string;
44
- jobId: string;
45
- }
46
- export interface WorkbenchInspectionExecutionFileSurfaceInput extends WorkbenchInspectionExecutionInput {
47
- path?: string | null;
48
- view?: "diff" | "raw" | "rendered";
49
- }
50
- export interface WorkbenchInspectionRunDetail {
51
- run: RunSummary;
52
- jobs?: RemoteWorkbenchJob[];
53
- }
54
- export type WorkbenchFailureKind = "run" | "evaluation" | "sample" | "case" | "job";
55
- export interface WorkbenchFailureDetail {
56
- kind: WorkbenchFailureKind;
57
- id: string;
58
- status?: string;
59
- runId?: string;
60
- candidateId?: string;
61
- evaluationId?: string;
62
- jobId?: string;
63
- caseId?: string;
64
- sampleIndex?: number;
65
- attemptIndex?: number;
66
- error?: string;
67
- }
68
- export interface WorkbenchFailureDiagnosis {
69
- targetId: string | null;
70
- failures: WorkbenchFailureDetail[];
71
- failedRunCount: number;
72
- failedEvaluationCount: number;
73
- failedJobCount: number;
74
- }
75
- export interface WorkbenchInspectionBackend {
76
- projectId: string;
77
- snapshot(): Promise<RuntimeSnapshot>;
78
- spec(input: WorkbenchInspectionFileListInput): Promise<AuthoredWorkbenchSourceDocument>;
79
- sourceFiles(input: WorkbenchInspectionFileListInput): Promise<CandidateFileSummary[]>;
80
- sourceFileSurface(input: WorkbenchInspectionFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
81
- candidate(input: WorkbenchInspectionCandidateInput): Promise<CandidateRecord>;
82
- candidateFiles(input: WorkbenchInspectionCandidateInput): Promise<CandidateFileSummary[]>;
83
- candidateFileSurface(input: WorkbenchInspectionCandidateFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
84
- evaluation(input: WorkbenchInspectionEvaluationInput): Promise<EvaluationScorecard>;
85
- run(input: WorkbenchInspectionRunInput): Promise<WorkbenchInspectionRunDetail>;
86
- jobInRun?(input: WorkbenchInspectionExecutionInput): Promise<RemoteWorkbenchJob>;
87
- executionFiles(input: WorkbenchInspectionExecutionInput): Promise<CandidateFileSummary[]>;
88
- executionFileSurface(input: WorkbenchInspectionExecutionFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
89
- caseReview?(input: WorkbenchInspectionCaseReviewInput): Promise<CandidateCaseReview>;
90
- executionTrace?(input: WorkbenchInspectionExecutionInput): Promise<WorkbenchExecutionTraceDetail>;
91
- traceForJob?(job: RemoteWorkbenchJob, role: WorkbenchExecutionEventRole): WorkbenchExecutionTrace;
92
- traceSessionsForJob?(job: RemoteWorkbenchJob, role: WorkbenchExecutionEventRole): WorkbenchTraceSession[];
93
- }
94
- export interface WorkbenchInspection {
95
- snapshot(): Promise<RuntimeSnapshot>;
96
- spec(input?: WorkbenchInspectionFileListInput): Promise<AuthoredWorkbenchSourceDocument>;
97
- sourceFiles(input?: WorkbenchInspectionFileListInput): Promise<CandidateFileSummary[]>;
98
- sourceFileSurface(input?: WorkbenchInspectionFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
99
- candidate(input: WorkbenchInspectionCandidateInput): Promise<CandidateRecord>;
100
- candidateFiles(input: WorkbenchInspectionCandidateInput): Promise<CandidateFileSummary[]>;
101
- candidateFileSurface(input: WorkbenchInspectionCandidateFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
102
- evaluations(): Promise<WorkbenchEvaluationComparison>;
103
- evaluation(input: WorkbenchInspectionEvaluationInput): Promise<EvaluationScorecard>;
104
- caseReview(input: WorkbenchInspectionCaseReviewInput): Promise<CandidateCaseReview>;
105
- run(input: WorkbenchInspectionRunInput): Promise<WorkbenchInspectionRunDetail>;
106
- executionTrace(input: WorkbenchInspectionExecutionInput): Promise<WorkbenchExecutionTraceDetail>;
107
- executionFiles(input: WorkbenchInspectionExecutionInput): Promise<CandidateFileSummary[]>;
108
- executionFileSurface(input: WorkbenchInspectionExecutionFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
109
- lineage(): Promise<CandidateLineageGraph>;
110
- diagnose(input?: {
111
- targetId?: string | null;
112
- }): Promise<WorkbenchFailureDiagnosis>;
113
- }
114
- export declare function createWorkbenchInspection(backend: WorkbenchInspectionBackend): WorkbenchInspection;
115
- export declare function selectedFilePath(requestedPath: string | null | undefined, files: readonly CandidateFileSummary[]): string | null;
116
- export declare function pickDefaultCandidateFilePath(files: readonly CandidateFileSummary[]): string | null;
117
- //# sourceMappingURL=inspection.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"inspection.d.ts","sourceRoot":"","sources":["../src/inspection.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,KAAK,6BAA6B,EACnC,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,+BAA+B,EAC/B,mBAAmB,EACnB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,eAAe,EACf,mBAAmB,EAEnB,kBAAkB,EAElB,UAAU,EACV,eAAe,EACf,2BAA2B,EAC3B,uBAAuB,EACvB,6BAA6B,EAC7B,qBAAqB,EACtB,MAAM,kCAAkC,CAAC;AAW1C,MAAM,WAAW,+BAA+B;IAC9C,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,qBAAa,wBAAyB,SAAQ,KAAK;IACjD,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;gBAEhB,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,+BAAoC;CAM3E;AAED,MAAM,WAAW,gCAAgC;IAC/C,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B;AAED,MAAM,WAAW,8BAA8B;IAC7C,KAAK,EAAE,oBAAoB,EAAE,CAAC;IAC9B,OAAO,EAAE,oBAAoB,GAAG,IAAI,CAAC;CACtC;AAED,MAAM,WAAW,mCAAoC,SAAQ,gCAAgC;IAC3F,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACpC;AAED,MAAM,WAAW,iCAAiC;IAChD,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,4CAA6C,SAAQ,iCAAiC;IACrG,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACpC;AAED,MAAM,WAAW,kCAAkC;IACjD,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,kCAAkC;IACjD,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,2BAA2B;IAC1C,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED,MAAM,WAAW,iCAAiC;IAChD,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,4CAA6C,SAAQ,iCAAiC;IACrG,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACpC;AAED,MAAM,WAAW,4BAA4B;IAC3C,GAAG,EAAE,UAAU,CAAC;IAChB,IAAI,CAAC,EAAE,kBAAkB,EAAE,CAAC;CAC7B;AAED,MAAM,MAAM,oBAAoB,GAAG,KAAK,GAAG,YAAY,GAAG,QAAQ,GAAG,MAAM,GAAG,KAAK,CAAC;AAEpF,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,yBAAyB;IACxC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,QAAQ,EAAE,sBAAsB,EAAE,CAAC;IACnC,cAAc,EAAE,MAAM,CAAC;IACvB,qBAAqB,EAAE,MAAM,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,0BAA0B;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,IAAI,OAAO,CAAC,eAAe,CAAC,CAAC;IACrC,IAAI,CAAC,KAAK,EAAE,gCAAgC,GAAG,OAAO,CAAC,+BAA+B,CAAC,CAAC;IACxF,WAAW,CAAC,KAAK,EAAE,gCAAgC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IACtF,iBAAiB,CAAC,KAAK,EAAE,mCAAmC,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACvG,SAAS,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAC9E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,oBAAoB,CAAC,KAAK,EAAE,4CAA4C,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACnH,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,GAAG,CAAC,KAAK,EAAE,2BAA2B,GAAG,OAAO,CAAC,4BAA4B,CAAC,CAAC;IAC/E,QAAQ,CAAC,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACjF,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,oBAAoB,CAAC,KAAK,EAAE,4CAA4C,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACnH,UAAU,CAAC,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACrF,cAAc,CAAC,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;IAClG,WAAW,CAAC,CACV,GAAG,EAAE,kBAAkB,EACvB,IAAI,EAAE,2BAA2B,GAChC,uBAAuB,CAAC;IAC3B,mBAAmB,CAAC,CAClB,GAAG,EAAE,kBAAkB,EACvB,IAAI,EAAE,2BAA2B,GAChC,qBAAqB,EAAE,CAAC;CAC5B;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,IAAI,OAAO,CAAC,eAAe,CAAC,CAAC;IACrC,IAAI,CAAC,KAAK,CAAC,EAAE,gCAAgC,GAAG,OAAO,CAAC,+BAA+B,CAAC,CAAC;IACzF,WAAW,CAAC,KAAK,CAAC,EAAE,gCAAgC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IACvF,iBAAiB,CAAC,KAAK,CAAC,EAAE,mCAAmC,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACxG,SAAS,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAC9E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,oBAAoB,CAAC,KAAK,EAAE,4CAA4C,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACnH,WAAW,IAAI,OAAO,CAAC,6BAA6B,CAAC,CAAC;IACtD,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,GAAG,CAAC,KAAK,EAAE,2BAA2B,GAAG,OAAO,CAAC,4BAA4B,CAAC,CAAC;IAC/E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;IACjG,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,oBAAoB,CAAC,KAAK,EAAE,4CAA4C,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACnH,OAAO,IAAI,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAC1C,QAAQ,CAAC,KAAK,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAC;CACpF;AAED,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,0BAA0B,GAClC,mBAAmB,CAyErB;AAED,wBAAgB,gBAAgB,CAC9B,aAAa,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,EACxC,KAAK,EAAE,SAAS,oBAAoB,EAAE,GACrC,MAAM,GAAG,IAAI,CAMf;AAED,wBAAgB,4BAA4B,CAC1C,KAAK,EAAE,SAAS,oBAAoB,EAAE,GACrC,MAAM,GAAG,IAAI,CAIf"}
@@ -1,224 +0,0 @@
1
- import { buildCandidateLineage, buildWorkbenchEvaluationComparison, } from "@workbench-ai/workbench-contract";
2
- import { buildCandidateCaseExecutionRefs, buildWorkbenchExecutionEvidence, } from "./execution-evidence.js";
3
- import { candidateRecordWithoutDerivedFields, createCaseReview, } from "./index.js";
4
- export class WorkbenchInspectionError extends Error {
5
- status;
6
- statusCode;
7
- constructor(message, options = {}) {
8
- super(message);
9
- this.name = "WorkbenchInspectionError";
10
- this.status = options.status ?? 400;
11
- this.statusCode = this.status;
12
- }
13
- }
14
- export function createWorkbenchInspection(backend) {
15
- return {
16
- snapshot: () => backend.snapshot(),
17
- spec: (input = {}) => backend.spec(input),
18
- sourceFiles: (input = {}) => backend.sourceFiles(input),
19
- sourceFileSurface: (input = {}) => backend.sourceFileSurface(input),
20
- candidate: async (input) => candidateRecordWithoutDerivedFields(await backend.candidate(input)),
21
- candidateFiles: (input) => backend.candidateFiles(input),
22
- candidateFileSurface: (input) => backend.candidateFileSurface(input),
23
- evaluations: async () => {
24
- const snapshot = await backend.snapshot();
25
- return buildWorkbenchEvaluationComparison(snapshot.evaluations);
26
- },
27
- evaluation: (input) => backend.evaluation(input),
28
- caseReview: async (input) => {
29
- if (backend.caseReview) {
30
- return await backend.caseReview(input);
31
- }
32
- const candidate = await backend.candidate({ id: input.candidateId });
33
- const jobs = (await backend.run({ id: input.runId, includeJobs: true })).jobs ?? [];
34
- return createCaseReview({
35
- candidate,
36
- caseId: input.caseId,
37
- executions: buildCandidateCaseExecutionRefs({
38
- jobs,
39
- candidateId: input.candidateId,
40
- caseId: input.caseId,
41
- sampleIndex: input.sampleIndex,
42
- }),
43
- });
44
- },
45
- run: (input) => backend.run(input),
46
- executionTrace: async (input) => {
47
- if (backend.executionTrace) {
48
- return await backend.executionTrace(input);
49
- }
50
- if (!backend.jobInRun || !backend.traceForJob) {
51
- throw new WorkbenchInspectionError("Execution traces are not available for this Workbench inspection backend.", { status: 404 });
52
- }
53
- const jobs = [await backend.jobInRun(input)];
54
- return {
55
- projectId: backend.projectId,
56
- runId: input.runId,
57
- executions: buildWorkbenchExecutionEvidence({
58
- jobs,
59
- traceIdPrefix: `${backend.projectId}-execution`,
60
- traceForJob: backend.traceForJob,
61
- traceSessionsForJob: backend.traceSessionsForJob,
62
- }),
63
- };
64
- },
65
- executionFiles: (input) => backend.executionFiles(input),
66
- executionFileSurface: (input) => backend.executionFileSurface(input),
67
- lineage: async () => {
68
- const snapshot = await backend.snapshot();
69
- return buildCandidateLineage({
70
- summaries: snapshot.summaries,
71
- activeId: snapshot.activeId,
72
- });
73
- },
74
- diagnose: async (input = {}) => {
75
- const snapshot = await backend.snapshot();
76
- return await diagnoseWorkbenchFailures({
77
- snapshot,
78
- backend,
79
- targetId: input.targetId?.trim() || null,
80
- });
81
- },
82
- };
83
- }
84
- export function selectedFilePath(requestedPath, files) {
85
- const normalizedPath = requestedPath?.trim();
86
- if (normalizedPath && files.some((file) => file.path === normalizedPath)) {
87
- return normalizedPath;
88
- }
89
- return pickDefaultCandidateFilePath(files);
90
- }
91
- export function pickDefaultCandidateFilePath(files) {
92
- return files
93
- .map((entry) => entry.path)
94
- .sort(compareCandidateFilePreference)[0] ?? null;
95
- }
96
- function compareCandidateFilePreference(left, right) {
97
- const order = scoreCandidateFilePreference(left) - scoreCandidateFilePreference(right);
98
- return order === 0 ? left.localeCompare(right) : order;
99
- }
100
- function scoreCandidateFilePreference(path) {
101
- if (path.endsWith("/SKILL.md") || path === "SKILL.md") {
102
- return 0;
103
- }
104
- if (path.endsWith(".md")) {
105
- return 1;
106
- }
107
- if (path.endsWith(".yaml") || path.endsWith(".yml")) {
108
- return 2;
109
- }
110
- return 3;
111
- }
112
- async function diagnoseWorkbenchFailures(args) {
113
- const targetRun = args.targetId
114
- ? args.snapshot.runs.find((run) => run.id === args.targetId)
115
- : null;
116
- const targetEvaluation = args.targetId
117
- ? args.snapshot.evaluations.find((evaluation) => evaluation.id === args.targetId)
118
- : null;
119
- const failures = [];
120
- if (args.targetId && targetRun) {
121
- const detail = await args.backend.run({ id: targetRun.id, includeJobs: true });
122
- failures.push(...runFailures(detail.run));
123
- failures.push(...jobFailures(detail.jobs ?? []));
124
- }
125
- else if (args.targetId && targetEvaluation) {
126
- const evaluation = await args.backend.evaluation({ id: targetEvaluation.id });
127
- failures.push(...evaluationFailures(evaluation));
128
- }
129
- else {
130
- for (const run of args.snapshot.runs) {
131
- failures.push(...runFailures(run));
132
- }
133
- for (const evaluation of args.snapshot.evaluations) {
134
- failures.push(...evaluationSummaryFailures(evaluation));
135
- }
136
- }
137
- return {
138
- targetId: args.targetId,
139
- failures,
140
- failedRunCount: failures.filter((failure) => failure.kind === "run").length,
141
- failedEvaluationCount: failures.filter((failure) => failure.kind === "evaluation").length,
142
- failedJobCount: failures.filter((failure) => failure.kind === "job").length,
143
- };
144
- }
145
- function runFailures(run) {
146
- if (run.status !== "finished" || (run.outcome !== "error" && run.outcome !== "cancelled")) {
147
- return [];
148
- }
149
- return [{
150
- kind: "run",
151
- id: run.id,
152
- runId: run.id,
153
- candidateId: run.outputCandidateId ?? run.candidateId ?? undefined,
154
- status: run.outcome,
155
- ...(run.error ? { error: run.error } : {}),
156
- }];
157
- }
158
- function evaluationSummaryFailures(evaluation) {
159
- if (evaluation.status === "completed" &&
160
- evaluation.errorSampleCount === 0 &&
161
- !evaluation.error) {
162
- return [];
163
- }
164
- return [{
165
- kind: "evaluation",
166
- id: evaluation.id,
167
- evaluationId: evaluation.id,
168
- runId: evaluation.runId,
169
- candidateId: evaluation.candidateId,
170
- status: evaluation.status,
171
- ...(evaluation.error ? { error: evaluation.error } : {}),
172
- }];
173
- }
174
- function evaluationFailures(evaluation) {
175
- const failures = evaluationSummaryFailures(evaluation);
176
- for (const sample of evaluation.evaluation.samples) {
177
- if (!sample.error && !(sample.cases ?? []).some((entry) => entry.status && entry.status !== "completed")) {
178
- continue;
179
- }
180
- failures.push({
181
- kind: "sample",
182
- id: `${evaluation.id}:sample:${sample.index}`,
183
- evaluationId: evaluation.id,
184
- runId: evaluation.runId,
185
- candidateId: evaluation.candidateId,
186
- sampleIndex: sample.index,
187
- status: sample.status,
188
- ...(sample.error ? { error: sample.error } : {}),
189
- });
190
- for (const result of sample.cases ?? []) {
191
- if (!result.status || result.status === "completed") {
192
- continue;
193
- }
194
- failures.push({
195
- kind: "case",
196
- id: `${evaluation.id}:case:${result.id}:sample:${sample.index}`,
197
- evaluationId: evaluation.id,
198
- runId: evaluation.runId,
199
- candidateId: evaluation.candidateId,
200
- caseId: result.id,
201
- sampleIndex: sample.index,
202
- status: result.status,
203
- });
204
- }
205
- }
206
- return failures;
207
- }
208
- function jobFailures(jobs) {
209
- return jobs
210
- .filter((job) => isFailedJobStatus(job.status))
211
- .map((job) => ({
212
- kind: "job",
213
- id: job.id,
214
- jobId: job.id,
215
- runId: job.runId,
216
- candidateId: job.candidateId,
217
- status: job.status,
218
- attemptIndex: typeof job.attempt === "number" ? job.attempt : undefined,
219
- ...(job.error ? { error: job.error } : {}),
220
- }));
221
- }
222
- function isFailedJobStatus(status) {
223
- return status === "failed" || status === "cancelled";
224
- }