@workbench-ai/workbench-core 0.0.67 → 0.0.69
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/coded-errors.d.ts +27 -0
- package/dist/coded-errors.d.ts.map +1 -0
- package/dist/coded-errors.js +52 -0
- package/dist/execution-events.d.ts +5 -1
- package/dist/execution-events.d.ts.map +1 -1
- package/dist/execution-events.js +13 -3
- package/dist/execution-graph.d.ts +4 -3
- package/dist/execution-graph.d.ts.map +1 -1
- package/dist/execution-graph.js +15 -14
- package/dist/execution-jobs.d.ts +5 -20
- package/dist/execution-jobs.d.ts.map +1 -1
- package/dist/execution-jobs.js +7 -91
- package/dist/execution-outputs.d.ts +2 -2
- package/dist/execution-outputs.d.ts.map +1 -1
- package/dist/execution-outputs.js +10 -10
- package/dist/execution-runtime-types.d.ts +1 -1
- package/dist/execution-runtime-types.d.ts.map +1 -1
- package/dist/execution-scheduler.d.ts +5 -3
- package/dist/execution-scheduler.d.ts.map +1 -1
- package/dist/execution-scheduler.js +33 -9
- package/dist/execution-traces.js +1 -1
- package/dist/generic-spec.d.ts +7 -61
- package/dist/generic-spec.d.ts.map +1 -1
- package/dist/generic-spec.js +0 -679
- package/dist/index.d.ts +377 -220
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7887 -3881
- package/dist/remote-model.d.ts +17 -0
- package/dist/remote-model.d.ts.map +1 -0
- package/dist/remote-model.js +86 -0
- package/dist/runtime-dockerfile.d.ts +1 -1
- package/dist/runtime-dockerfile.d.ts.map +1 -1
- package/dist/runtime-dockerfile.js +4 -4
- package/dist/sandbox-backends/docker.d.ts.map +1 -1
- package/dist/sandbox-backends/docker.js +34 -16
- package/dist/sandbox-inputs.js +3 -3
- package/dist/sandbox-plane.d.ts.map +1 -1
- package/dist/sandbox-plane.js +13 -9
- package/dist/skill-patch.d.ts +8 -0
- package/dist/skill-patch.d.ts.map +1 -0
- package/dist/{candidate-patch.js → skill-patch.js} +5 -5
- package/package.json +3 -3
- package/worker/sandbox-adapter-runner.cjs +2 -2
- package/dist/candidate-patch.d.ts +0 -8
- package/dist/candidate-patch.d.ts.map +0 -1
- package/dist/execution-evidence.d.ts +0 -22
- package/dist/execution-evidence.d.ts.map +0 -1
- package/dist/execution-evidence.js +0 -302
- package/dist/inspection.d.ts +0 -117
- package/dist/inspection.d.ts.map +0 -1
- package/dist/inspection.js +0 -224
|
@@ -1,302 +0,0 @@
|
|
|
1
|
-
import { mergeWorkbenchExecutionTracesByJob } from "./execution-traces.js";
|
|
2
|
-
export function buildCandidateCaseExecutionRefs(args) {
|
|
3
|
-
const groups = new Map();
|
|
4
|
-
for (const job of args.jobs) {
|
|
5
|
-
const kind = readWorkbenchExecutionPurpose(job);
|
|
6
|
-
const jobCandidateId = job.candidateId ?? readWorkbenchExecutionMetadataString(job, "candidateId");
|
|
7
|
-
const jobCaseId = readWorkbenchExecutionMetadataString(job, "caseId");
|
|
8
|
-
if (jobCandidateId === args.candidateId &&
|
|
9
|
-
kind === "attempt" &&
|
|
10
|
-
caseReviewCaseIdsMatch(jobCaseId, args.caseId) &&
|
|
11
|
-
caseReviewSampleIndicesMatch(readWorkbenchExecutionMetadataNumber(job, "sampleIndex"), args.sampleIndex)) {
|
|
12
|
-
const key = [
|
|
13
|
-
job.runId,
|
|
14
|
-
kind,
|
|
15
|
-
jobCaseId ?? "",
|
|
16
|
-
readWorkbenchExecutionMetadataNumber(job, "sampleIndex") ?? "",
|
|
17
|
-
].join("\0");
|
|
18
|
-
groups.set(key, [...(groups.get(key) ?? []), job]);
|
|
19
|
-
}
|
|
20
|
-
}
|
|
21
|
-
const executions = [...groups.values()]
|
|
22
|
-
.map((group) => group.slice().sort(compareWorkbenchExecutionJobs))
|
|
23
|
-
.flatMap((group) => {
|
|
24
|
-
const first = group[0];
|
|
25
|
-
if (!first) {
|
|
26
|
-
return [];
|
|
27
|
-
}
|
|
28
|
-
const kind = readWorkbenchExecutionPurpose(first);
|
|
29
|
-
if (kind !== "attempt") {
|
|
30
|
-
return [];
|
|
31
|
-
}
|
|
32
|
-
const startedAt = minTimestamp(group.map((job) => job.startedAt));
|
|
33
|
-
const finishedAt = maxTimestamp(group.map((job) => job.finishedAt));
|
|
34
|
-
const durationMs = startedAt && finishedAt
|
|
35
|
-
? Math.max(0, Date.parse(finishedAt) - Date.parse(startedAt))
|
|
36
|
-
: null;
|
|
37
|
-
return [{
|
|
38
|
-
runId: first.runId,
|
|
39
|
-
kind,
|
|
40
|
-
role: "engine",
|
|
41
|
-
status: resolveWorkbenchJobGroupStatus(group),
|
|
42
|
-
jobIds: group.map((job) => job.id),
|
|
43
|
-
executionIds: group.flatMap((job) => {
|
|
44
|
-
const executionId = readWorkbenchExecutionId(job);
|
|
45
|
-
return executionId ? [executionId] : [];
|
|
46
|
-
}),
|
|
47
|
-
createdAt: minTimestamp(group.map((job) => job.createdAt)) ?? first.createdAt,
|
|
48
|
-
...(startedAt ? { startedAt } : {}),
|
|
49
|
-
...(finishedAt ? { finishedAt } : {}),
|
|
50
|
-
...(durationMs !== null ? { durationMs } : {}),
|
|
51
|
-
...optionalString("caseId", readWorkbenchExecutionMetadataString(first, "caseId")),
|
|
52
|
-
...optionalNumber("sampleIndex", readWorkbenchExecutionMetadataNumber(first, "sampleIndex")),
|
|
53
|
-
...optionalNumber("attemptIndex", readWorkbenchExecutionMetadataNumber(first, "attemptIndex")),
|
|
54
|
-
}];
|
|
55
|
-
})
|
|
56
|
-
.sort(compareCandidateCaseExecutions);
|
|
57
|
-
return selectCurrentExecutionRun(executions);
|
|
58
|
-
}
|
|
59
|
-
export function buildWorkbenchExecutionEvidence(args) {
|
|
60
|
-
const groups = new Map();
|
|
61
|
-
for (const job of args.jobs) {
|
|
62
|
-
if (isBaselineMaterializationJob(job)) {
|
|
63
|
-
continue;
|
|
64
|
-
}
|
|
65
|
-
const purpose = readWorkbenchExecutionPurpose(job);
|
|
66
|
-
if (!purpose) {
|
|
67
|
-
continue;
|
|
68
|
-
}
|
|
69
|
-
const key = [
|
|
70
|
-
job.runId,
|
|
71
|
-
purpose,
|
|
72
|
-
job.candidateId ?? readWorkbenchExecutionMetadataString(job, "candidateId") ?? "",
|
|
73
|
-
readWorkbenchExecutionMetadataString(job, "caseId") ?? "",
|
|
74
|
-
readWorkbenchExecutionMetadataNumber(job, "sampleIndex") ?? "",
|
|
75
|
-
readWorkbenchExecutionMetadataNumber(job, "attemptIndex") ?? "",
|
|
76
|
-
].join("\0");
|
|
77
|
-
groups.set(key, [...(groups.get(key) ?? []), job]);
|
|
78
|
-
}
|
|
79
|
-
return [...groups.values()]
|
|
80
|
-
.map((group) => group.slice().sort(compareWorkbenchTraceJobs))
|
|
81
|
-
.flatMap((group) => {
|
|
82
|
-
const first = group[0];
|
|
83
|
-
if (!first) {
|
|
84
|
-
return [];
|
|
85
|
-
}
|
|
86
|
-
const purpose = readWorkbenchExecutionPurpose(first);
|
|
87
|
-
if (!purpose) {
|
|
88
|
-
return [];
|
|
89
|
-
}
|
|
90
|
-
const role = traceRoleForPurpose(purpose);
|
|
91
|
-
const sessions = group.flatMap((job) => args.traceSessionsForJob
|
|
92
|
-
? args.traceSessionsForJob(job, role)
|
|
93
|
-
: []);
|
|
94
|
-
const jobIds = group.map((job) => job.id);
|
|
95
|
-
const executionIds = group.flatMap((job) => {
|
|
96
|
-
const executionId = readWorkbenchExecutionId(job);
|
|
97
|
-
return executionId ? [executionId] : [];
|
|
98
|
-
});
|
|
99
|
-
return [{
|
|
100
|
-
id: [
|
|
101
|
-
purpose,
|
|
102
|
-
first.runId,
|
|
103
|
-
readWorkbenchExecutionMetadataString(first, "caseId") ?? "current",
|
|
104
|
-
readWorkbenchExecutionMetadataNumber(first, "sampleIndex") ?? "sample",
|
|
105
|
-
readWorkbenchExecutionMetadataNumber(first, "attemptIndex") ?? "attempt",
|
|
106
|
-
jobIds.join("_"),
|
|
107
|
-
].join(":"),
|
|
108
|
-
kind: purpose,
|
|
109
|
-
executionId: group.length === 1 ? readWorkbenchExecutionId(first) : null,
|
|
110
|
-
role,
|
|
111
|
-
status: resolveWorkbenchJobGroupStatus(group),
|
|
112
|
-
jobIds,
|
|
113
|
-
executionIds,
|
|
114
|
-
...(first.candidateId ? { candidateId: first.candidateId } : {}),
|
|
115
|
-
...optionalString("caseId", readWorkbenchExecutionMetadataString(first, "caseId")),
|
|
116
|
-
...optionalNumber("sampleIndex", readWorkbenchExecutionMetadataNumber(first, "sampleIndex")),
|
|
117
|
-
...optionalNumber("attemptIndex", readWorkbenchExecutionMetadataNumber(first, "attemptIndex")),
|
|
118
|
-
sessions,
|
|
119
|
-
trace: mergeWorkbenchExecutionTracesByJob({
|
|
120
|
-
traceIdPrefix: args.traceIdPrefix,
|
|
121
|
-
stageId: purpose,
|
|
122
|
-
jobs: [
|
|
123
|
-
...group.map((job) => ({
|
|
124
|
-
id: job.id,
|
|
125
|
-
trace: args.traceForJob(job, role),
|
|
126
|
-
})),
|
|
127
|
-
...sessions.map((session) => ({
|
|
128
|
-
id: session.id,
|
|
129
|
-
jobId: session.jobId,
|
|
130
|
-
trace: session.trace,
|
|
131
|
-
})),
|
|
132
|
-
],
|
|
133
|
-
}),
|
|
134
|
-
}];
|
|
135
|
-
})
|
|
136
|
-
.sort(compareWorkbenchExecutionEvidence);
|
|
137
|
-
}
|
|
138
|
-
export function readWorkbenchExecutionPurpose(job) {
|
|
139
|
-
if (job.kind !== "execute") {
|
|
140
|
-
return null;
|
|
141
|
-
}
|
|
142
|
-
const purpose = readExecutionRecord(job)?.purpose;
|
|
143
|
-
return purpose === "improve" || purpose === "attempt"
|
|
144
|
-
? purpose
|
|
145
|
-
: null;
|
|
146
|
-
}
|
|
147
|
-
export function readWorkbenchExecutionId(job) {
|
|
148
|
-
const id = readExecutionRecord(job)?.id;
|
|
149
|
-
return typeof id === "string" && id.length > 0 ? id : null;
|
|
150
|
-
}
|
|
151
|
-
export function readWorkbenchExecutionMetadataString(job, key) {
|
|
152
|
-
const raw = readWorkbenchExecutionMetadataValue(job, key);
|
|
153
|
-
return typeof raw === "string" && raw.length > 0 ? raw : null;
|
|
154
|
-
}
|
|
155
|
-
export function readWorkbenchExecutionMetadataNumber(job, key) {
|
|
156
|
-
const raw = readWorkbenchExecutionMetadataValue(job, key);
|
|
157
|
-
return typeof raw === "number" && Number.isFinite(raw) ? raw : null;
|
|
158
|
-
}
|
|
159
|
-
export function isWorkbenchExecutionActive(execution) {
|
|
160
|
-
return execution.status === "queued" || execution.status === "running";
|
|
161
|
-
}
|
|
162
|
-
export function resolveWorkbenchJobGroupStatus(jobs) {
|
|
163
|
-
if (jobs.some((job) => job.status === "running")) {
|
|
164
|
-
return "running";
|
|
165
|
-
}
|
|
166
|
-
if (jobs.some((job) => job.status === "queued")) {
|
|
167
|
-
return "queued";
|
|
168
|
-
}
|
|
169
|
-
if (jobs.some((job) => job.status === "failed")) {
|
|
170
|
-
return "failed";
|
|
171
|
-
}
|
|
172
|
-
if (jobs.some((job) => job.status === "cancelled")) {
|
|
173
|
-
return "cancelled";
|
|
174
|
-
}
|
|
175
|
-
return "succeeded";
|
|
176
|
-
}
|
|
177
|
-
function readWorkbenchExecutionMetadataValue(job, key) {
|
|
178
|
-
const input = asRecord(job.input);
|
|
179
|
-
const execution = asRecord(input?.execution);
|
|
180
|
-
const metadata = asRecord(execution?.metadata);
|
|
181
|
-
return metadata?.[key] ?? input?.[key] ?? null;
|
|
182
|
-
}
|
|
183
|
-
function readExecutionRecord(job) {
|
|
184
|
-
const input = asRecord(job.input);
|
|
185
|
-
return asRecord(input?.execution);
|
|
186
|
-
}
|
|
187
|
-
function isBaselineMaterializationJob(job) {
|
|
188
|
-
const input = asRecord(job.input);
|
|
189
|
-
const execution = asRecord(input?.execution);
|
|
190
|
-
const metadata = asRecord(execution?.metadata);
|
|
191
|
-
return metadata?.baseline === true || input?.baseline === true;
|
|
192
|
-
}
|
|
193
|
-
function caseReviewCaseIdsMatch(jobCaseId, requestedCaseId) {
|
|
194
|
-
return Boolean(jobCaseId) &&
|
|
195
|
-
(jobCaseId === requestedCaseId || requestedCaseId.startsWith(`${jobCaseId}__`));
|
|
196
|
-
}
|
|
197
|
-
function caseReviewSampleIndicesMatch(jobSampleIndex, reviewSampleIndex) {
|
|
198
|
-
return typeof reviewSampleIndex !== "number" || jobSampleIndex === reviewSampleIndex;
|
|
199
|
-
}
|
|
200
|
-
function selectCurrentExecutionRun(executions) {
|
|
201
|
-
if (executions.length <= 1) {
|
|
202
|
-
return executions;
|
|
203
|
-
}
|
|
204
|
-
const activeRunId = executions
|
|
205
|
-
.filter(isWorkbenchExecutionActive)
|
|
206
|
-
.sort(compareExecutionRecency)[0]?.runId;
|
|
207
|
-
const selectedRunId = activeRunId ?? executions.slice().sort(compareExecutionRecency)[0]?.runId;
|
|
208
|
-
return selectedRunId
|
|
209
|
-
? executions.filter((execution) => execution.runId === selectedRunId)
|
|
210
|
-
: executions;
|
|
211
|
-
}
|
|
212
|
-
function compareCandidateCaseExecutions(left, right) {
|
|
213
|
-
return (executionKindOrder(left.kind) - executionKindOrder(right.kind) ||
|
|
214
|
-
(left.sampleIndex ?? -1) - (right.sampleIndex ?? -1) ||
|
|
215
|
-
readExecutionRecencyMs(right) - readExecutionRecencyMs(left));
|
|
216
|
-
}
|
|
217
|
-
function compareExecutionRecency(left, right) {
|
|
218
|
-
return readExecutionRecencyMs(right) - readExecutionRecencyMs(left);
|
|
219
|
-
}
|
|
220
|
-
function compareWorkbenchExecutionJobs(left, right) {
|
|
221
|
-
return (executionKindOrder(readWorkbenchExecutionPurpose(left)) -
|
|
222
|
-
executionKindOrder(readWorkbenchExecutionPurpose(right)) ||
|
|
223
|
-
(readWorkbenchExecutionMetadataNumber(left, "sampleIndex") ?? -1) -
|
|
224
|
-
(readWorkbenchExecutionMetadataNumber(right, "sampleIndex") ?? -1) ||
|
|
225
|
-
readJobRecencyMs(right) - readJobRecencyMs(left) ||
|
|
226
|
-
left.id.localeCompare(right.id));
|
|
227
|
-
}
|
|
228
|
-
function compareWorkbenchTraceJobs(left, right) {
|
|
229
|
-
return (executionKindOrder(readWorkbenchExecutionPurpose(left)) -
|
|
230
|
-
executionKindOrder(readWorkbenchExecutionPurpose(right)) ||
|
|
231
|
-
String(readWorkbenchExecutionMetadataString(left, "caseId") ?? "").localeCompare(String(readWorkbenchExecutionMetadataString(right, "caseId") ?? "")) ||
|
|
232
|
-
(readWorkbenchExecutionMetadataNumber(left, "sampleIndex") ?? -1) -
|
|
233
|
-
(readWorkbenchExecutionMetadataNumber(right, "sampleIndex") ?? -1) ||
|
|
234
|
-
(readWorkbenchExecutionMetadataNumber(left, "attemptIndex") ?? -1) -
|
|
235
|
-
(readWorkbenchExecutionMetadataNumber(right, "attemptIndex") ?? -1) ||
|
|
236
|
-
left.id.localeCompare(right.id));
|
|
237
|
-
}
|
|
238
|
-
function compareWorkbenchExecutionEvidence(left, right) {
|
|
239
|
-
return (executionKindOrder(left.kind) - executionKindOrder(right.kind) ||
|
|
240
|
-
String(left.caseId ?? "").localeCompare(String(right.caseId ?? "")) ||
|
|
241
|
-
(left.sampleIndex ?? -1) - (right.sampleIndex ?? -1) ||
|
|
242
|
-
(left.attemptIndex ?? -1) - (right.attemptIndex ?? -1) ||
|
|
243
|
-
String(left.jobIds[0] ?? "").localeCompare(String(right.jobIds[0] ?? "")));
|
|
244
|
-
}
|
|
245
|
-
function traceRoleForPurpose(purpose) {
|
|
246
|
-
if (purpose === "improve") {
|
|
247
|
-
return "improver";
|
|
248
|
-
}
|
|
249
|
-
return "engine";
|
|
250
|
-
}
|
|
251
|
-
function executionKindOrder(kind) {
|
|
252
|
-
if (kind === "improve") {
|
|
253
|
-
return 0;
|
|
254
|
-
}
|
|
255
|
-
if (kind === "attempt") {
|
|
256
|
-
return 1;
|
|
257
|
-
}
|
|
258
|
-
return 3;
|
|
259
|
-
}
|
|
260
|
-
function readExecutionRecencyMs(execution) {
|
|
261
|
-
return (parseTimestampMs(execution.finishedAt) ??
|
|
262
|
-
parseTimestampMs(execution.startedAt) ??
|
|
263
|
-
parseTimestampMs(execution.createdAt) ??
|
|
264
|
-
0);
|
|
265
|
-
}
|
|
266
|
-
function readJobRecencyMs(job) {
|
|
267
|
-
return (parseTimestampMs(job.finishedAt) ??
|
|
268
|
-
parseTimestampMs(job.startedAt) ??
|
|
269
|
-
parseTimestampMs(job.updatedAt) ??
|
|
270
|
-
parseTimestampMs(job.createdAt) ??
|
|
271
|
-
0);
|
|
272
|
-
}
|
|
273
|
-
function minTimestamp(values) {
|
|
274
|
-
const sorted = values
|
|
275
|
-
.filter((value) => typeof value === "string" && value.length > 0)
|
|
276
|
-
.sort();
|
|
277
|
-
return sorted[0] ?? null;
|
|
278
|
-
}
|
|
279
|
-
function maxTimestamp(values) {
|
|
280
|
-
const sorted = values
|
|
281
|
-
.filter((value) => typeof value === "string" && value.length > 0)
|
|
282
|
-
.sort();
|
|
283
|
-
return sorted[sorted.length - 1] ?? null;
|
|
284
|
-
}
|
|
285
|
-
function parseTimestampMs(value) {
|
|
286
|
-
if (!value) {
|
|
287
|
-
return null;
|
|
288
|
-
}
|
|
289
|
-
const parsed = Date.parse(value);
|
|
290
|
-
return Number.isFinite(parsed) ? parsed : null;
|
|
291
|
-
}
|
|
292
|
-
function optionalString(key, value) {
|
|
293
|
-
return value ? { [key]: value } : {};
|
|
294
|
-
}
|
|
295
|
-
function optionalNumber(key, value) {
|
|
296
|
-
return value == null ? {} : { [key]: value };
|
|
297
|
-
}
|
|
298
|
-
function asRecord(value) {
|
|
299
|
-
return value && typeof value === "object" && !Array.isArray(value)
|
|
300
|
-
? value
|
|
301
|
-
: null;
|
|
302
|
-
}
|
package/dist/inspection.d.ts
DELETED
|
@@ -1,117 +0,0 @@
|
|
|
1
|
-
import { type WorkbenchEvaluationComparison } from "@workbench-ai/workbench-contract";
|
|
2
|
-
import type { AuthoredWorkbenchSourceDocument, CandidateCaseReview, CandidateFilePreview, CandidateFileSummary, CandidateLineageGraph, CandidateRecord, EvaluationScorecard, RemoteWorkbenchJob, RunSummary, RuntimeSnapshot, WorkbenchExecutionEventRole, WorkbenchExecutionTrace, WorkbenchExecutionTraceDetail, WorkbenchTraceSession } from "@workbench-ai/workbench-contract";
|
|
3
|
-
export interface WorkbenchInspectionErrorOptions {
|
|
4
|
-
status?: number;
|
|
5
|
-
}
|
|
6
|
-
export declare class WorkbenchInspectionError extends Error {
|
|
7
|
-
readonly status: number;
|
|
8
|
-
readonly statusCode: number;
|
|
9
|
-
constructor(message: string, options?: WorkbenchInspectionErrorOptions);
|
|
10
|
-
}
|
|
11
|
-
export interface WorkbenchInspectionFileListInput {
|
|
12
|
-
fingerprint?: string | null;
|
|
13
|
-
}
|
|
14
|
-
export interface WorkbenchInspectionFileSurface {
|
|
15
|
-
files: CandidateFileSummary[];
|
|
16
|
-
preview: CandidateFilePreview | null;
|
|
17
|
-
}
|
|
18
|
-
export interface WorkbenchInspectionFileSurfaceInput extends WorkbenchInspectionFileListInput {
|
|
19
|
-
path?: string | null;
|
|
20
|
-
view?: "diff" | "raw" | "rendered";
|
|
21
|
-
}
|
|
22
|
-
export interface WorkbenchInspectionCandidateInput {
|
|
23
|
-
id: string;
|
|
24
|
-
}
|
|
25
|
-
export interface WorkbenchInspectionCandidateFileSurfaceInput extends WorkbenchInspectionCandidateInput {
|
|
26
|
-
path?: string | null;
|
|
27
|
-
view?: "diff" | "raw" | "rendered";
|
|
28
|
-
}
|
|
29
|
-
export interface WorkbenchInspectionEvaluationInput {
|
|
30
|
-
id: string;
|
|
31
|
-
}
|
|
32
|
-
export interface WorkbenchInspectionCaseReviewInput {
|
|
33
|
-
candidateId: string;
|
|
34
|
-
caseId: string;
|
|
35
|
-
runId: string;
|
|
36
|
-
sampleIndex?: number;
|
|
37
|
-
}
|
|
38
|
-
export interface WorkbenchInspectionRunInput {
|
|
39
|
-
id: string;
|
|
40
|
-
includeJobs?: boolean;
|
|
41
|
-
}
|
|
42
|
-
export interface WorkbenchInspectionExecutionInput {
|
|
43
|
-
runId: string;
|
|
44
|
-
jobId: string;
|
|
45
|
-
}
|
|
46
|
-
export interface WorkbenchInspectionExecutionFileSurfaceInput extends WorkbenchInspectionExecutionInput {
|
|
47
|
-
path?: string | null;
|
|
48
|
-
view?: "diff" | "raw" | "rendered";
|
|
49
|
-
}
|
|
50
|
-
export interface WorkbenchInspectionRunDetail {
|
|
51
|
-
run: RunSummary;
|
|
52
|
-
jobs?: RemoteWorkbenchJob[];
|
|
53
|
-
}
|
|
54
|
-
export type WorkbenchFailureKind = "run" | "evaluation" | "sample" | "case" | "job";
|
|
55
|
-
export interface WorkbenchFailureDetail {
|
|
56
|
-
kind: WorkbenchFailureKind;
|
|
57
|
-
id: string;
|
|
58
|
-
status?: string;
|
|
59
|
-
runId?: string;
|
|
60
|
-
candidateId?: string;
|
|
61
|
-
evaluationId?: string;
|
|
62
|
-
jobId?: string;
|
|
63
|
-
caseId?: string;
|
|
64
|
-
sampleIndex?: number;
|
|
65
|
-
attemptIndex?: number;
|
|
66
|
-
error?: string;
|
|
67
|
-
}
|
|
68
|
-
export interface WorkbenchFailureDiagnosis {
|
|
69
|
-
targetId: string | null;
|
|
70
|
-
failures: WorkbenchFailureDetail[];
|
|
71
|
-
failedRunCount: number;
|
|
72
|
-
failedEvaluationCount: number;
|
|
73
|
-
failedJobCount: number;
|
|
74
|
-
}
|
|
75
|
-
export interface WorkbenchInspectionBackend {
|
|
76
|
-
projectId: string;
|
|
77
|
-
snapshot(): Promise<RuntimeSnapshot>;
|
|
78
|
-
spec(input: WorkbenchInspectionFileListInput): Promise<AuthoredWorkbenchSourceDocument>;
|
|
79
|
-
sourceFiles(input: WorkbenchInspectionFileListInput): Promise<CandidateFileSummary[]>;
|
|
80
|
-
sourceFileSurface(input: WorkbenchInspectionFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
|
|
81
|
-
candidate(input: WorkbenchInspectionCandidateInput): Promise<CandidateRecord>;
|
|
82
|
-
candidateFiles(input: WorkbenchInspectionCandidateInput): Promise<CandidateFileSummary[]>;
|
|
83
|
-
candidateFileSurface(input: WorkbenchInspectionCandidateFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
|
|
84
|
-
evaluation(input: WorkbenchInspectionEvaluationInput): Promise<EvaluationScorecard>;
|
|
85
|
-
run(input: WorkbenchInspectionRunInput): Promise<WorkbenchInspectionRunDetail>;
|
|
86
|
-
jobInRun?(input: WorkbenchInspectionExecutionInput): Promise<RemoteWorkbenchJob>;
|
|
87
|
-
executionFiles(input: WorkbenchInspectionExecutionInput): Promise<CandidateFileSummary[]>;
|
|
88
|
-
executionFileSurface(input: WorkbenchInspectionExecutionFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
|
|
89
|
-
caseReview?(input: WorkbenchInspectionCaseReviewInput): Promise<CandidateCaseReview>;
|
|
90
|
-
executionTrace?(input: WorkbenchInspectionExecutionInput): Promise<WorkbenchExecutionTraceDetail>;
|
|
91
|
-
traceForJob?(job: RemoteWorkbenchJob, role: WorkbenchExecutionEventRole): WorkbenchExecutionTrace;
|
|
92
|
-
traceSessionsForJob?(job: RemoteWorkbenchJob, role: WorkbenchExecutionEventRole): WorkbenchTraceSession[];
|
|
93
|
-
}
|
|
94
|
-
export interface WorkbenchInspection {
|
|
95
|
-
snapshot(): Promise<RuntimeSnapshot>;
|
|
96
|
-
spec(input?: WorkbenchInspectionFileListInput): Promise<AuthoredWorkbenchSourceDocument>;
|
|
97
|
-
sourceFiles(input?: WorkbenchInspectionFileListInput): Promise<CandidateFileSummary[]>;
|
|
98
|
-
sourceFileSurface(input?: WorkbenchInspectionFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
|
|
99
|
-
candidate(input: WorkbenchInspectionCandidateInput): Promise<CandidateRecord>;
|
|
100
|
-
candidateFiles(input: WorkbenchInspectionCandidateInput): Promise<CandidateFileSummary[]>;
|
|
101
|
-
candidateFileSurface(input: WorkbenchInspectionCandidateFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
|
|
102
|
-
evaluations(): Promise<WorkbenchEvaluationComparison>;
|
|
103
|
-
evaluation(input: WorkbenchInspectionEvaluationInput): Promise<EvaluationScorecard>;
|
|
104
|
-
caseReview(input: WorkbenchInspectionCaseReviewInput): Promise<CandidateCaseReview>;
|
|
105
|
-
run(input: WorkbenchInspectionRunInput): Promise<WorkbenchInspectionRunDetail>;
|
|
106
|
-
executionTrace(input: WorkbenchInspectionExecutionInput): Promise<WorkbenchExecutionTraceDetail>;
|
|
107
|
-
executionFiles(input: WorkbenchInspectionExecutionInput): Promise<CandidateFileSummary[]>;
|
|
108
|
-
executionFileSurface(input: WorkbenchInspectionExecutionFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
|
|
109
|
-
lineage(): Promise<CandidateLineageGraph>;
|
|
110
|
-
diagnose(input?: {
|
|
111
|
-
targetId?: string | null;
|
|
112
|
-
}): Promise<WorkbenchFailureDiagnosis>;
|
|
113
|
-
}
|
|
114
|
-
export declare function createWorkbenchInspection(backend: WorkbenchInspectionBackend): WorkbenchInspection;
|
|
115
|
-
export declare function selectedFilePath(requestedPath: string | null | undefined, files: readonly CandidateFileSummary[]): string | null;
|
|
116
|
-
export declare function pickDefaultCandidateFilePath(files: readonly CandidateFileSummary[]): string | null;
|
|
117
|
-
//# sourceMappingURL=inspection.d.ts.map
|
package/dist/inspection.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"inspection.d.ts","sourceRoot":"","sources":["../src/inspection.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,KAAK,6BAA6B,EACnC,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,+BAA+B,EAC/B,mBAAmB,EACnB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,eAAe,EACf,mBAAmB,EAEnB,kBAAkB,EAElB,UAAU,EACV,eAAe,EACf,2BAA2B,EAC3B,uBAAuB,EACvB,6BAA6B,EAC7B,qBAAqB,EACtB,MAAM,kCAAkC,CAAC;AAW1C,MAAM,WAAW,+BAA+B;IAC9C,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,qBAAa,wBAAyB,SAAQ,KAAK;IACjD,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;gBAEhB,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,+BAAoC;CAM3E;AAED,MAAM,WAAW,gCAAgC;IAC/C,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B;AAED,MAAM,WAAW,8BAA8B;IAC7C,KAAK,EAAE,oBAAoB,EAAE,CAAC;IAC9B,OAAO,EAAE,oBAAoB,GAAG,IAAI,CAAC;CACtC;AAED,MAAM,WAAW,mCAAoC,SAAQ,gCAAgC;IAC3F,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACpC;AAED,MAAM,WAAW,iCAAiC;IAChD,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,4CAA6C,SAAQ,iCAAiC;IACrG,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACpC;AAED,MAAM,WAAW,kCAAkC;IACjD,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,kCAAkC;IACjD,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,2BAA2B;IAC1C,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED,MAAM,WAAW,iCAAiC;IAChD,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,4CAA6C,SAAQ,iCAAiC;IACrG,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACpC;AAED,MAAM,WAAW,4BAA4B;IAC3C,GAAG,EAAE,UAAU,CAAC;IAChB,IAAI,CAAC,EAAE,kBAAkB,EAAE,CAAC;CAC7B;AAED,MAAM,MAAM,oBAAoB,GAAG,KAAK,GAAG,YAAY,GAAG,QAAQ,GAAG,MAAM,GAAG,KAAK,CAAC;AAEpF,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,yBAAyB;IACxC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,QAAQ,EAAE,sBAAsB,EAAE,CAAC;IACnC,cAAc,EAAE,MAAM,CAAC;IACvB,qBAAqB,EAAE,MAAM,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,0BAA0B;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,IAAI,OAAO,CAAC,eAAe,CAAC,CAAC;IACrC,IAAI,CAAC,KAAK,EAAE,gCAAgC,GAAG,OAAO,CAAC,+BAA+B,CAAC,CAAC;IACxF,WAAW,CAAC,KAAK,EAAE,gCAAgC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IACtF,iBAAiB,CAAC,KAAK,EAAE,mCAAmC,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACvG,SAAS,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAC9E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,oBAAoB,CAAC,KAAK,EAAE,4CAA4C,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACnH,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,GAAG,CAAC,KAAK,EAAE,2BAA2B,GAAG,OAAO,CAAC,4BAA4B,CAAC,CAAC;IAC/E,QAAQ,CAAC,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACjF,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,oBAAoB,CAAC,KAAK,EAAE,4CAA4C,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACnH,UAAU,CAAC,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACrF,cAAc,CAAC,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;IAClG,WAAW,CAAC,CACV,GAAG,EAAE,kBAAkB,EACvB,IAAI,EAAE,2BAA2B,GAChC,uBAAuB,CAAC;IAC3B,mBAAmB,CAAC,CAClB,GAAG,EAAE,kBAAkB,EACvB,IAAI,EAAE,2BAA2B,GAChC,qBAAqB,EAAE,CAAC;CAC5B;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,IAAI,OAAO,CAAC,eAAe,CAAC,CAAC;IACrC,IAAI,CAAC,KAAK,CAAC,EAAE,gCAAgC,GAAG,OAAO,CAAC,+BAA+B,CAAC,CAAC;IACzF,WAAW,CAAC,KAAK,CAAC,EAAE,gCAAgC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IACvF,iBAAiB,CAAC,KAAK,CAAC,EAAE,mCAAmC,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACxG,SAAS,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAC9E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,oBAAoB,CAAC,KAAK,EAAE,4CAA4C,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACnH,WAAW,IAAI,OAAO,CAAC,6BAA6B,CAAC,CAAC;IACtD,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,GAAG,CAAC,KAAK,EAAE,2BAA2B,GAAG,OAAO,CAAC,4BAA4B,CAAC,CAAC;IAC/E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;IACjG,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,oBAAoB,CAAC,KAAK,EAAE,4CAA4C,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACnH,OAAO,IAAI,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAC1C,QAAQ,CAAC,KAAK,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAC;CACpF;AAED,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,0BAA0B,GAClC,mBAAmB,CAyErB;AAED,wBAAgB,gBAAgB,CAC9B,aAAa,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,EACxC,KAAK,EAAE,SAAS,oBAAoB,EAAE,GACrC,MAAM,GAAG,IAAI,CAMf;AAED,wBAAgB,4BAA4B,CAC1C,KAAK,EAAE,SAAS,oBAAoB,EAAE,GACrC,MAAM,GAAG,IAAI,CAIf"}
|
package/dist/inspection.js
DELETED
|
@@ -1,224 +0,0 @@
|
|
|
1
|
-
import { buildCandidateLineage, buildWorkbenchEvaluationComparison, } from "@workbench-ai/workbench-contract";
|
|
2
|
-
import { buildCandidateCaseExecutionRefs, buildWorkbenchExecutionEvidence, } from "./execution-evidence.js";
|
|
3
|
-
import { candidateRecordWithoutDerivedFields, createCaseReview, } from "./index.js";
|
|
4
|
-
export class WorkbenchInspectionError extends Error {
|
|
5
|
-
status;
|
|
6
|
-
statusCode;
|
|
7
|
-
constructor(message, options = {}) {
|
|
8
|
-
super(message);
|
|
9
|
-
this.name = "WorkbenchInspectionError";
|
|
10
|
-
this.status = options.status ?? 400;
|
|
11
|
-
this.statusCode = this.status;
|
|
12
|
-
}
|
|
13
|
-
}
|
|
14
|
-
export function createWorkbenchInspection(backend) {
|
|
15
|
-
return {
|
|
16
|
-
snapshot: () => backend.snapshot(),
|
|
17
|
-
spec: (input = {}) => backend.spec(input),
|
|
18
|
-
sourceFiles: (input = {}) => backend.sourceFiles(input),
|
|
19
|
-
sourceFileSurface: (input = {}) => backend.sourceFileSurface(input),
|
|
20
|
-
candidate: async (input) => candidateRecordWithoutDerivedFields(await backend.candidate(input)),
|
|
21
|
-
candidateFiles: (input) => backend.candidateFiles(input),
|
|
22
|
-
candidateFileSurface: (input) => backend.candidateFileSurface(input),
|
|
23
|
-
evaluations: async () => {
|
|
24
|
-
const snapshot = await backend.snapshot();
|
|
25
|
-
return buildWorkbenchEvaluationComparison(snapshot.evaluations);
|
|
26
|
-
},
|
|
27
|
-
evaluation: (input) => backend.evaluation(input),
|
|
28
|
-
caseReview: async (input) => {
|
|
29
|
-
if (backend.caseReview) {
|
|
30
|
-
return await backend.caseReview(input);
|
|
31
|
-
}
|
|
32
|
-
const candidate = await backend.candidate({ id: input.candidateId });
|
|
33
|
-
const jobs = (await backend.run({ id: input.runId, includeJobs: true })).jobs ?? [];
|
|
34
|
-
return createCaseReview({
|
|
35
|
-
candidate,
|
|
36
|
-
caseId: input.caseId,
|
|
37
|
-
executions: buildCandidateCaseExecutionRefs({
|
|
38
|
-
jobs,
|
|
39
|
-
candidateId: input.candidateId,
|
|
40
|
-
caseId: input.caseId,
|
|
41
|
-
sampleIndex: input.sampleIndex,
|
|
42
|
-
}),
|
|
43
|
-
});
|
|
44
|
-
},
|
|
45
|
-
run: (input) => backend.run(input),
|
|
46
|
-
executionTrace: async (input) => {
|
|
47
|
-
if (backend.executionTrace) {
|
|
48
|
-
return await backend.executionTrace(input);
|
|
49
|
-
}
|
|
50
|
-
if (!backend.jobInRun || !backend.traceForJob) {
|
|
51
|
-
throw new WorkbenchInspectionError("Execution traces are not available for this Workbench inspection backend.", { status: 404 });
|
|
52
|
-
}
|
|
53
|
-
const jobs = [await backend.jobInRun(input)];
|
|
54
|
-
return {
|
|
55
|
-
projectId: backend.projectId,
|
|
56
|
-
runId: input.runId,
|
|
57
|
-
executions: buildWorkbenchExecutionEvidence({
|
|
58
|
-
jobs,
|
|
59
|
-
traceIdPrefix: `${backend.projectId}-execution`,
|
|
60
|
-
traceForJob: backend.traceForJob,
|
|
61
|
-
traceSessionsForJob: backend.traceSessionsForJob,
|
|
62
|
-
}),
|
|
63
|
-
};
|
|
64
|
-
},
|
|
65
|
-
executionFiles: (input) => backend.executionFiles(input),
|
|
66
|
-
executionFileSurface: (input) => backend.executionFileSurface(input),
|
|
67
|
-
lineage: async () => {
|
|
68
|
-
const snapshot = await backend.snapshot();
|
|
69
|
-
return buildCandidateLineage({
|
|
70
|
-
summaries: snapshot.summaries,
|
|
71
|
-
activeId: snapshot.activeId,
|
|
72
|
-
});
|
|
73
|
-
},
|
|
74
|
-
diagnose: async (input = {}) => {
|
|
75
|
-
const snapshot = await backend.snapshot();
|
|
76
|
-
return await diagnoseWorkbenchFailures({
|
|
77
|
-
snapshot,
|
|
78
|
-
backend,
|
|
79
|
-
targetId: input.targetId?.trim() || null,
|
|
80
|
-
});
|
|
81
|
-
},
|
|
82
|
-
};
|
|
83
|
-
}
|
|
84
|
-
export function selectedFilePath(requestedPath, files) {
|
|
85
|
-
const normalizedPath = requestedPath?.trim();
|
|
86
|
-
if (normalizedPath && files.some((file) => file.path === normalizedPath)) {
|
|
87
|
-
return normalizedPath;
|
|
88
|
-
}
|
|
89
|
-
return pickDefaultCandidateFilePath(files);
|
|
90
|
-
}
|
|
91
|
-
export function pickDefaultCandidateFilePath(files) {
|
|
92
|
-
return files
|
|
93
|
-
.map((entry) => entry.path)
|
|
94
|
-
.sort(compareCandidateFilePreference)[0] ?? null;
|
|
95
|
-
}
|
|
96
|
-
function compareCandidateFilePreference(left, right) {
|
|
97
|
-
const order = scoreCandidateFilePreference(left) - scoreCandidateFilePreference(right);
|
|
98
|
-
return order === 0 ? left.localeCompare(right) : order;
|
|
99
|
-
}
|
|
100
|
-
function scoreCandidateFilePreference(path) {
|
|
101
|
-
if (path.endsWith("/SKILL.md") || path === "SKILL.md") {
|
|
102
|
-
return 0;
|
|
103
|
-
}
|
|
104
|
-
if (path.endsWith(".md")) {
|
|
105
|
-
return 1;
|
|
106
|
-
}
|
|
107
|
-
if (path.endsWith(".yaml") || path.endsWith(".yml")) {
|
|
108
|
-
return 2;
|
|
109
|
-
}
|
|
110
|
-
return 3;
|
|
111
|
-
}
|
|
112
|
-
async function diagnoseWorkbenchFailures(args) {
|
|
113
|
-
const targetRun = args.targetId
|
|
114
|
-
? args.snapshot.runs.find((run) => run.id === args.targetId)
|
|
115
|
-
: null;
|
|
116
|
-
const targetEvaluation = args.targetId
|
|
117
|
-
? args.snapshot.evaluations.find((evaluation) => evaluation.id === args.targetId)
|
|
118
|
-
: null;
|
|
119
|
-
const failures = [];
|
|
120
|
-
if (args.targetId && targetRun) {
|
|
121
|
-
const detail = await args.backend.run({ id: targetRun.id, includeJobs: true });
|
|
122
|
-
failures.push(...runFailures(detail.run));
|
|
123
|
-
failures.push(...jobFailures(detail.jobs ?? []));
|
|
124
|
-
}
|
|
125
|
-
else if (args.targetId && targetEvaluation) {
|
|
126
|
-
const evaluation = await args.backend.evaluation({ id: targetEvaluation.id });
|
|
127
|
-
failures.push(...evaluationFailures(evaluation));
|
|
128
|
-
}
|
|
129
|
-
else {
|
|
130
|
-
for (const run of args.snapshot.runs) {
|
|
131
|
-
failures.push(...runFailures(run));
|
|
132
|
-
}
|
|
133
|
-
for (const evaluation of args.snapshot.evaluations) {
|
|
134
|
-
failures.push(...evaluationSummaryFailures(evaluation));
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
return {
|
|
138
|
-
targetId: args.targetId,
|
|
139
|
-
failures,
|
|
140
|
-
failedRunCount: failures.filter((failure) => failure.kind === "run").length,
|
|
141
|
-
failedEvaluationCount: failures.filter((failure) => failure.kind === "evaluation").length,
|
|
142
|
-
failedJobCount: failures.filter((failure) => failure.kind === "job").length,
|
|
143
|
-
};
|
|
144
|
-
}
|
|
145
|
-
function runFailures(run) {
|
|
146
|
-
if (run.status !== "finished" || (run.outcome !== "error" && run.outcome !== "cancelled")) {
|
|
147
|
-
return [];
|
|
148
|
-
}
|
|
149
|
-
return [{
|
|
150
|
-
kind: "run",
|
|
151
|
-
id: run.id,
|
|
152
|
-
runId: run.id,
|
|
153
|
-
candidateId: run.outputCandidateId ?? run.candidateId ?? undefined,
|
|
154
|
-
status: run.outcome,
|
|
155
|
-
...(run.error ? { error: run.error } : {}),
|
|
156
|
-
}];
|
|
157
|
-
}
|
|
158
|
-
function evaluationSummaryFailures(evaluation) {
|
|
159
|
-
if (evaluation.status === "completed" &&
|
|
160
|
-
evaluation.errorSampleCount === 0 &&
|
|
161
|
-
!evaluation.error) {
|
|
162
|
-
return [];
|
|
163
|
-
}
|
|
164
|
-
return [{
|
|
165
|
-
kind: "evaluation",
|
|
166
|
-
id: evaluation.id,
|
|
167
|
-
evaluationId: evaluation.id,
|
|
168
|
-
runId: evaluation.runId,
|
|
169
|
-
candidateId: evaluation.candidateId,
|
|
170
|
-
status: evaluation.status,
|
|
171
|
-
...(evaluation.error ? { error: evaluation.error } : {}),
|
|
172
|
-
}];
|
|
173
|
-
}
|
|
174
|
-
function evaluationFailures(evaluation) {
|
|
175
|
-
const failures = evaluationSummaryFailures(evaluation);
|
|
176
|
-
for (const sample of evaluation.evaluation.samples) {
|
|
177
|
-
if (!sample.error && !(sample.cases ?? []).some((entry) => entry.status && entry.status !== "completed")) {
|
|
178
|
-
continue;
|
|
179
|
-
}
|
|
180
|
-
failures.push({
|
|
181
|
-
kind: "sample",
|
|
182
|
-
id: `${evaluation.id}:sample:${sample.index}`,
|
|
183
|
-
evaluationId: evaluation.id,
|
|
184
|
-
runId: evaluation.runId,
|
|
185
|
-
candidateId: evaluation.candidateId,
|
|
186
|
-
sampleIndex: sample.index,
|
|
187
|
-
status: sample.status,
|
|
188
|
-
...(sample.error ? { error: sample.error } : {}),
|
|
189
|
-
});
|
|
190
|
-
for (const result of sample.cases ?? []) {
|
|
191
|
-
if (!result.status || result.status === "completed") {
|
|
192
|
-
continue;
|
|
193
|
-
}
|
|
194
|
-
failures.push({
|
|
195
|
-
kind: "case",
|
|
196
|
-
id: `${evaluation.id}:case:${result.id}:sample:${sample.index}`,
|
|
197
|
-
evaluationId: evaluation.id,
|
|
198
|
-
runId: evaluation.runId,
|
|
199
|
-
candidateId: evaluation.candidateId,
|
|
200
|
-
caseId: result.id,
|
|
201
|
-
sampleIndex: sample.index,
|
|
202
|
-
status: result.status,
|
|
203
|
-
});
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
return failures;
|
|
207
|
-
}
|
|
208
|
-
function jobFailures(jobs) {
|
|
209
|
-
return jobs
|
|
210
|
-
.filter((job) => isFailedJobStatus(job.status))
|
|
211
|
-
.map((job) => ({
|
|
212
|
-
kind: "job",
|
|
213
|
-
id: job.id,
|
|
214
|
-
jobId: job.id,
|
|
215
|
-
runId: job.runId,
|
|
216
|
-
candidateId: job.candidateId,
|
|
217
|
-
status: job.status,
|
|
218
|
-
attemptIndex: typeof job.attempt === "number" ? job.attempt : undefined,
|
|
219
|
-
...(job.error ? { error: job.error } : {}),
|
|
220
|
-
}));
|
|
221
|
-
}
|
|
222
|
-
function isFailedJobStatus(status) {
|
|
223
|
-
return status === "failed" || status === "cancelled";
|
|
224
|
-
}
|