@workbench-ai/workbench-core 0.0.67 → 0.0.68
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/execution-graph.d.ts +4 -3
- package/dist/execution-graph.d.ts.map +1 -1
- package/dist/execution-graph.js +15 -14
- package/dist/execution-jobs.d.ts +5 -20
- package/dist/execution-jobs.d.ts.map +1 -1
- package/dist/execution-jobs.js +7 -91
- package/dist/execution-outputs.d.ts +2 -2
- package/dist/execution-outputs.d.ts.map +1 -1
- package/dist/execution-outputs.js +10 -10
- package/dist/execution-runtime-types.d.ts +1 -1
- package/dist/execution-runtime-types.d.ts.map +1 -1
- package/dist/execution-scheduler.d.ts.map +1 -1
- package/dist/execution-scheduler.js +4 -1
- package/dist/execution-traces.js +1 -1
- package/dist/generic-spec.d.ts +29 -29
- package/dist/generic-spec.d.ts.map +1 -1
- package/dist/generic-spec.js +94 -92
- package/dist/index.d.ts +325 -220
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5716 -3900
- package/dist/runtime-dockerfile.d.ts +1 -1
- package/dist/runtime-dockerfile.d.ts.map +1 -1
- package/dist/runtime-dockerfile.js +4 -4
- package/dist/runtime-utils.d.ts +1 -1
- package/dist/runtime-utils.d.ts.map +1 -1
- package/dist/runtime-utils.js +3 -3
- package/dist/sandbox-backends/docker.js +7 -5
- package/dist/sandbox-inputs.js +3 -3
- package/dist/sandbox-plane.d.ts.map +1 -1
- package/dist/sandbox-plane.js +13 -9
- package/dist/skill-patch.d.ts +8 -0
- package/dist/skill-patch.d.ts.map +1 -0
- package/dist/{candidate-patch.js → skill-patch.js} +5 -5
- package/package.json +3 -3
- package/worker/sandbox-adapter-runner.cjs +2 -2
- package/dist/candidate-patch.d.ts +0 -8
- package/dist/candidate-patch.d.ts.map +0 -1
- package/dist/execution-evidence.d.ts +0 -22
- package/dist/execution-evidence.d.ts.map +0 -1
- package/dist/execution-evidence.js +0 -302
- package/dist/inspection.d.ts +0 -117
- package/dist/inspection.d.ts.map +0 -1
- package/dist/inspection.js +0 -224
package/dist/inspection.d.ts
DELETED
|
@@ -1,117 +0,0 @@
|
|
|
1
|
-
import { type WorkbenchEvaluationComparison } from "@workbench-ai/workbench-contract";
|
|
2
|
-
import type { AuthoredWorkbenchSourceDocument, CandidateCaseReview, CandidateFilePreview, CandidateFileSummary, CandidateLineageGraph, CandidateRecord, EvaluationScorecard, RemoteWorkbenchJob, RunSummary, RuntimeSnapshot, WorkbenchExecutionEventRole, WorkbenchExecutionTrace, WorkbenchExecutionTraceDetail, WorkbenchTraceSession } from "@workbench-ai/workbench-contract";
|
|
3
|
-
export interface WorkbenchInspectionErrorOptions {
|
|
4
|
-
status?: number;
|
|
5
|
-
}
|
|
6
|
-
export declare class WorkbenchInspectionError extends Error {
|
|
7
|
-
readonly status: number;
|
|
8
|
-
readonly statusCode: number;
|
|
9
|
-
constructor(message: string, options?: WorkbenchInspectionErrorOptions);
|
|
10
|
-
}
|
|
11
|
-
export interface WorkbenchInspectionFileListInput {
|
|
12
|
-
fingerprint?: string | null;
|
|
13
|
-
}
|
|
14
|
-
export interface WorkbenchInspectionFileSurface {
|
|
15
|
-
files: CandidateFileSummary[];
|
|
16
|
-
preview: CandidateFilePreview | null;
|
|
17
|
-
}
|
|
18
|
-
export interface WorkbenchInspectionFileSurfaceInput extends WorkbenchInspectionFileListInput {
|
|
19
|
-
path?: string | null;
|
|
20
|
-
view?: "diff" | "raw" | "rendered";
|
|
21
|
-
}
|
|
22
|
-
export interface WorkbenchInspectionCandidateInput {
|
|
23
|
-
id: string;
|
|
24
|
-
}
|
|
25
|
-
export interface WorkbenchInspectionCandidateFileSurfaceInput extends WorkbenchInspectionCandidateInput {
|
|
26
|
-
path?: string | null;
|
|
27
|
-
view?: "diff" | "raw" | "rendered";
|
|
28
|
-
}
|
|
29
|
-
export interface WorkbenchInspectionEvaluationInput {
|
|
30
|
-
id: string;
|
|
31
|
-
}
|
|
32
|
-
export interface WorkbenchInspectionCaseReviewInput {
|
|
33
|
-
candidateId: string;
|
|
34
|
-
caseId: string;
|
|
35
|
-
runId: string;
|
|
36
|
-
sampleIndex?: number;
|
|
37
|
-
}
|
|
38
|
-
export interface WorkbenchInspectionRunInput {
|
|
39
|
-
id: string;
|
|
40
|
-
includeJobs?: boolean;
|
|
41
|
-
}
|
|
42
|
-
export interface WorkbenchInspectionExecutionInput {
|
|
43
|
-
runId: string;
|
|
44
|
-
jobId: string;
|
|
45
|
-
}
|
|
46
|
-
export interface WorkbenchInspectionExecutionFileSurfaceInput extends WorkbenchInspectionExecutionInput {
|
|
47
|
-
path?: string | null;
|
|
48
|
-
view?: "diff" | "raw" | "rendered";
|
|
49
|
-
}
|
|
50
|
-
export interface WorkbenchInspectionRunDetail {
|
|
51
|
-
run: RunSummary;
|
|
52
|
-
jobs?: RemoteWorkbenchJob[];
|
|
53
|
-
}
|
|
54
|
-
export type WorkbenchFailureKind = "run" | "evaluation" | "sample" | "case" | "job";
|
|
55
|
-
export interface WorkbenchFailureDetail {
|
|
56
|
-
kind: WorkbenchFailureKind;
|
|
57
|
-
id: string;
|
|
58
|
-
status?: string;
|
|
59
|
-
runId?: string;
|
|
60
|
-
candidateId?: string;
|
|
61
|
-
evaluationId?: string;
|
|
62
|
-
jobId?: string;
|
|
63
|
-
caseId?: string;
|
|
64
|
-
sampleIndex?: number;
|
|
65
|
-
attemptIndex?: number;
|
|
66
|
-
error?: string;
|
|
67
|
-
}
|
|
68
|
-
export interface WorkbenchFailureDiagnosis {
|
|
69
|
-
targetId: string | null;
|
|
70
|
-
failures: WorkbenchFailureDetail[];
|
|
71
|
-
failedRunCount: number;
|
|
72
|
-
failedEvaluationCount: number;
|
|
73
|
-
failedJobCount: number;
|
|
74
|
-
}
|
|
75
|
-
export interface WorkbenchInspectionBackend {
|
|
76
|
-
projectId: string;
|
|
77
|
-
snapshot(): Promise<RuntimeSnapshot>;
|
|
78
|
-
spec(input: WorkbenchInspectionFileListInput): Promise<AuthoredWorkbenchSourceDocument>;
|
|
79
|
-
sourceFiles(input: WorkbenchInspectionFileListInput): Promise<CandidateFileSummary[]>;
|
|
80
|
-
sourceFileSurface(input: WorkbenchInspectionFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
|
|
81
|
-
candidate(input: WorkbenchInspectionCandidateInput): Promise<CandidateRecord>;
|
|
82
|
-
candidateFiles(input: WorkbenchInspectionCandidateInput): Promise<CandidateFileSummary[]>;
|
|
83
|
-
candidateFileSurface(input: WorkbenchInspectionCandidateFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
|
|
84
|
-
evaluation(input: WorkbenchInspectionEvaluationInput): Promise<EvaluationScorecard>;
|
|
85
|
-
run(input: WorkbenchInspectionRunInput): Promise<WorkbenchInspectionRunDetail>;
|
|
86
|
-
jobInRun?(input: WorkbenchInspectionExecutionInput): Promise<RemoteWorkbenchJob>;
|
|
87
|
-
executionFiles(input: WorkbenchInspectionExecutionInput): Promise<CandidateFileSummary[]>;
|
|
88
|
-
executionFileSurface(input: WorkbenchInspectionExecutionFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
|
|
89
|
-
caseReview?(input: WorkbenchInspectionCaseReviewInput): Promise<CandidateCaseReview>;
|
|
90
|
-
executionTrace?(input: WorkbenchInspectionExecutionInput): Promise<WorkbenchExecutionTraceDetail>;
|
|
91
|
-
traceForJob?(job: RemoteWorkbenchJob, role: WorkbenchExecutionEventRole): WorkbenchExecutionTrace;
|
|
92
|
-
traceSessionsForJob?(job: RemoteWorkbenchJob, role: WorkbenchExecutionEventRole): WorkbenchTraceSession[];
|
|
93
|
-
}
|
|
94
|
-
export interface WorkbenchInspection {
|
|
95
|
-
snapshot(): Promise<RuntimeSnapshot>;
|
|
96
|
-
spec(input?: WorkbenchInspectionFileListInput): Promise<AuthoredWorkbenchSourceDocument>;
|
|
97
|
-
sourceFiles(input?: WorkbenchInspectionFileListInput): Promise<CandidateFileSummary[]>;
|
|
98
|
-
sourceFileSurface(input?: WorkbenchInspectionFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
|
|
99
|
-
candidate(input: WorkbenchInspectionCandidateInput): Promise<CandidateRecord>;
|
|
100
|
-
candidateFiles(input: WorkbenchInspectionCandidateInput): Promise<CandidateFileSummary[]>;
|
|
101
|
-
candidateFileSurface(input: WorkbenchInspectionCandidateFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
|
|
102
|
-
evaluations(): Promise<WorkbenchEvaluationComparison>;
|
|
103
|
-
evaluation(input: WorkbenchInspectionEvaluationInput): Promise<EvaluationScorecard>;
|
|
104
|
-
caseReview(input: WorkbenchInspectionCaseReviewInput): Promise<CandidateCaseReview>;
|
|
105
|
-
run(input: WorkbenchInspectionRunInput): Promise<WorkbenchInspectionRunDetail>;
|
|
106
|
-
executionTrace(input: WorkbenchInspectionExecutionInput): Promise<WorkbenchExecutionTraceDetail>;
|
|
107
|
-
executionFiles(input: WorkbenchInspectionExecutionInput): Promise<CandidateFileSummary[]>;
|
|
108
|
-
executionFileSurface(input: WorkbenchInspectionExecutionFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
|
|
109
|
-
lineage(): Promise<CandidateLineageGraph>;
|
|
110
|
-
diagnose(input?: {
|
|
111
|
-
targetId?: string | null;
|
|
112
|
-
}): Promise<WorkbenchFailureDiagnosis>;
|
|
113
|
-
}
|
|
114
|
-
export declare function createWorkbenchInspection(backend: WorkbenchInspectionBackend): WorkbenchInspection;
|
|
115
|
-
export declare function selectedFilePath(requestedPath: string | null | undefined, files: readonly CandidateFileSummary[]): string | null;
|
|
116
|
-
export declare function pickDefaultCandidateFilePath(files: readonly CandidateFileSummary[]): string | null;
|
|
117
|
-
//# sourceMappingURL=inspection.d.ts.map
|
package/dist/inspection.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"inspection.d.ts","sourceRoot":"","sources":["../src/inspection.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,KAAK,6BAA6B,EACnC,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,+BAA+B,EAC/B,mBAAmB,EACnB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,eAAe,EACf,mBAAmB,EAEnB,kBAAkB,EAElB,UAAU,EACV,eAAe,EACf,2BAA2B,EAC3B,uBAAuB,EACvB,6BAA6B,EAC7B,qBAAqB,EACtB,MAAM,kCAAkC,CAAC;AAW1C,MAAM,WAAW,+BAA+B;IAC9C,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,qBAAa,wBAAyB,SAAQ,KAAK;IACjD,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;gBAEhB,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,+BAAoC;CAM3E;AAED,MAAM,WAAW,gCAAgC;IAC/C,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B;AAED,MAAM,WAAW,8BAA8B;IAC7C,KAAK,EAAE,oBAAoB,EAAE,CAAC;IAC9B,OAAO,EAAE,oBAAoB,GAAG,IAAI,CAAC;CACtC;AAED,MAAM,WAAW,mCAAoC,SAAQ,gCAAgC;IAC3F,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACpC;AAED,MAAM,WAAW,iCAAiC;IAChD,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,4CAA6C,SAAQ,iCAAiC;IACrG,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACpC;AAED,MAAM,WAAW,kCAAkC;IACjD,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,kCAAkC;IACjD,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,2BAA2B;IAC1C,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED,MAAM,WAAW,iCAAiC;IAChD,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,4CAA6C,SAAQ,iCAAiC;IACrG,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACpC;AAED,MAAM,WAAW,4BAA4B;IAC3C,GAAG,EAAE,UAAU,CAAC;IAChB,IAAI,CAAC,EAAE,kBAAkB,EAAE,CAAC;CAC7B;AAED,MAAM,MAAM,oBAAoB,GAAG,KAAK,GAAG,YAAY,GAAG,QAAQ,GAAG,MAAM,GAAG,KAAK,CAAC;AAEpF,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,yBAAyB;IACxC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,QAAQ,EAAE,sBAAsB,EAAE,CAAC;IACnC,cAAc,EAAE,MAAM,CAAC;IACvB,qBAAqB,EAAE,MAAM,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,0BAA0B;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,IAAI,OAAO,CAAC,eAAe,CAAC,CAAC;IACrC,IAAI,CAAC,KAAK,EAAE,gCAAgC,GAAG,OAAO,CAAC,+BAA+B,CAAC,CAAC;IACxF,WAAW,CAAC,KAAK,EAAE,gCAAgC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IACtF,iBAAiB,CAAC,KAAK,EAAE,mCAAmC,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACvG,SAAS,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAC9E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,oBAAoB,CAAC,KAAK,EAAE,4CAA4C,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACnH,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,GAAG,CAAC,KAAK,EAAE,2BAA2B,GAAG,OAAO,CAAC,4BAA4B,CAAC,CAAC;IAC/E,QAAQ,CAAC,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACjF,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,oBAAoB,CAAC,KAAK,EAAE,4CAA4C,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACnH,UAAU,CAAC,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACrF,cAAc,CAAC,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;IAClG,WAAW,CAAC,CACV,GAAG,EAAE,kBAAkB,EACvB,IAAI,EAAE,2BAA2B,GAChC,uBAAuB,CAAC;IAC3B,mBAAmB,CAAC,CAClB,GAAG,EAAE,kBAAkB,EACvB,IAAI,EAAE,2BAA2B,GAChC,qBAAqB,EAAE,CAAC;CAC5B;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,IAAI,OAAO,CAAC,eAAe,CAAC,CAAC;IACrC,IAAI,CAAC,KAAK,CAAC,EAAE,gCAAgC,GAAG,OAAO,CAAC,+BAA+B,CAAC,CAAC;IACzF,WAAW,CAAC,KAAK,CAAC,EAAE,gCAAgC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IACvF,iBAAiB,CAAC,KAAK,CAAC,EAAE,mCAAmC,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACxG,SAAS,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAC9E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,oBAAoB,CAAC,KAAK,EAAE,4CAA4C,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACnH,WAAW,IAAI,OAAO,CAAC,6BAA6B,CAAC,CAAC;IACtD,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,GAAG,CAAC,KAAK,EAAE,2BAA2B,GAAG,OAAO,CAAC,4BAA4B,CAAC,CAAC;IAC/E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;IACjG,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,oBAAoB,CAAC,KAAK,EAAE,4CAA4C,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACnH,OAAO,IAAI,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAC1C,QAAQ,CAAC,KAAK,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAC;CACpF;AAED,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,0BAA0B,GAClC,mBAAmB,CAyErB;AAED,wBAAgB,gBAAgB,CAC9B,aAAa,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,EACxC,KAAK,EAAE,SAAS,oBAAoB,EAAE,GACrC,MAAM,GAAG,IAAI,CAMf;AAED,wBAAgB,4BAA4B,CAC1C,KAAK,EAAE,SAAS,oBAAoB,EAAE,GACrC,MAAM,GAAG,IAAI,CAIf"}
|
package/dist/inspection.js
DELETED
|
@@ -1,224 +0,0 @@
|
|
|
1
|
-
import { buildCandidateLineage, buildWorkbenchEvaluationComparison, } from "@workbench-ai/workbench-contract";
|
|
2
|
-
import { buildCandidateCaseExecutionRefs, buildWorkbenchExecutionEvidence, } from "./execution-evidence.js";
|
|
3
|
-
import { candidateRecordWithoutDerivedFields, createCaseReview, } from "./index.js";
|
|
4
|
-
export class WorkbenchInspectionError extends Error {
|
|
5
|
-
status;
|
|
6
|
-
statusCode;
|
|
7
|
-
constructor(message, options = {}) {
|
|
8
|
-
super(message);
|
|
9
|
-
this.name = "WorkbenchInspectionError";
|
|
10
|
-
this.status = options.status ?? 400;
|
|
11
|
-
this.statusCode = this.status;
|
|
12
|
-
}
|
|
13
|
-
}
|
|
14
|
-
export function createWorkbenchInspection(backend) {
|
|
15
|
-
return {
|
|
16
|
-
snapshot: () => backend.snapshot(),
|
|
17
|
-
spec: (input = {}) => backend.spec(input),
|
|
18
|
-
sourceFiles: (input = {}) => backend.sourceFiles(input),
|
|
19
|
-
sourceFileSurface: (input = {}) => backend.sourceFileSurface(input),
|
|
20
|
-
candidate: async (input) => candidateRecordWithoutDerivedFields(await backend.candidate(input)),
|
|
21
|
-
candidateFiles: (input) => backend.candidateFiles(input),
|
|
22
|
-
candidateFileSurface: (input) => backend.candidateFileSurface(input),
|
|
23
|
-
evaluations: async () => {
|
|
24
|
-
const snapshot = await backend.snapshot();
|
|
25
|
-
return buildWorkbenchEvaluationComparison(snapshot.evaluations);
|
|
26
|
-
},
|
|
27
|
-
evaluation: (input) => backend.evaluation(input),
|
|
28
|
-
caseReview: async (input) => {
|
|
29
|
-
if (backend.caseReview) {
|
|
30
|
-
return await backend.caseReview(input);
|
|
31
|
-
}
|
|
32
|
-
const candidate = await backend.candidate({ id: input.candidateId });
|
|
33
|
-
const jobs = (await backend.run({ id: input.runId, includeJobs: true })).jobs ?? [];
|
|
34
|
-
return createCaseReview({
|
|
35
|
-
candidate,
|
|
36
|
-
caseId: input.caseId,
|
|
37
|
-
executions: buildCandidateCaseExecutionRefs({
|
|
38
|
-
jobs,
|
|
39
|
-
candidateId: input.candidateId,
|
|
40
|
-
caseId: input.caseId,
|
|
41
|
-
sampleIndex: input.sampleIndex,
|
|
42
|
-
}),
|
|
43
|
-
});
|
|
44
|
-
},
|
|
45
|
-
run: (input) => backend.run(input),
|
|
46
|
-
executionTrace: async (input) => {
|
|
47
|
-
if (backend.executionTrace) {
|
|
48
|
-
return await backend.executionTrace(input);
|
|
49
|
-
}
|
|
50
|
-
if (!backend.jobInRun || !backend.traceForJob) {
|
|
51
|
-
throw new WorkbenchInspectionError("Execution traces are not available for this Workbench inspection backend.", { status: 404 });
|
|
52
|
-
}
|
|
53
|
-
const jobs = [await backend.jobInRun(input)];
|
|
54
|
-
return {
|
|
55
|
-
projectId: backend.projectId,
|
|
56
|
-
runId: input.runId,
|
|
57
|
-
executions: buildWorkbenchExecutionEvidence({
|
|
58
|
-
jobs,
|
|
59
|
-
traceIdPrefix: `${backend.projectId}-execution`,
|
|
60
|
-
traceForJob: backend.traceForJob,
|
|
61
|
-
traceSessionsForJob: backend.traceSessionsForJob,
|
|
62
|
-
}),
|
|
63
|
-
};
|
|
64
|
-
},
|
|
65
|
-
executionFiles: (input) => backend.executionFiles(input),
|
|
66
|
-
executionFileSurface: (input) => backend.executionFileSurface(input),
|
|
67
|
-
lineage: async () => {
|
|
68
|
-
const snapshot = await backend.snapshot();
|
|
69
|
-
return buildCandidateLineage({
|
|
70
|
-
summaries: snapshot.summaries,
|
|
71
|
-
activeId: snapshot.activeId,
|
|
72
|
-
});
|
|
73
|
-
},
|
|
74
|
-
diagnose: async (input = {}) => {
|
|
75
|
-
const snapshot = await backend.snapshot();
|
|
76
|
-
return await diagnoseWorkbenchFailures({
|
|
77
|
-
snapshot,
|
|
78
|
-
backend,
|
|
79
|
-
targetId: input.targetId?.trim() || null,
|
|
80
|
-
});
|
|
81
|
-
},
|
|
82
|
-
};
|
|
83
|
-
}
|
|
84
|
-
export function selectedFilePath(requestedPath, files) {
|
|
85
|
-
const normalizedPath = requestedPath?.trim();
|
|
86
|
-
if (normalizedPath && files.some((file) => file.path === normalizedPath)) {
|
|
87
|
-
return normalizedPath;
|
|
88
|
-
}
|
|
89
|
-
return pickDefaultCandidateFilePath(files);
|
|
90
|
-
}
|
|
91
|
-
export function pickDefaultCandidateFilePath(files) {
|
|
92
|
-
return files
|
|
93
|
-
.map((entry) => entry.path)
|
|
94
|
-
.sort(compareCandidateFilePreference)[0] ?? null;
|
|
95
|
-
}
|
|
96
|
-
function compareCandidateFilePreference(left, right) {
|
|
97
|
-
const order = scoreCandidateFilePreference(left) - scoreCandidateFilePreference(right);
|
|
98
|
-
return order === 0 ? left.localeCompare(right) : order;
|
|
99
|
-
}
|
|
100
|
-
function scoreCandidateFilePreference(path) {
|
|
101
|
-
if (path.endsWith("/SKILL.md") || path === "SKILL.md") {
|
|
102
|
-
return 0;
|
|
103
|
-
}
|
|
104
|
-
if (path.endsWith(".md")) {
|
|
105
|
-
return 1;
|
|
106
|
-
}
|
|
107
|
-
if (path.endsWith(".yaml") || path.endsWith(".yml")) {
|
|
108
|
-
return 2;
|
|
109
|
-
}
|
|
110
|
-
return 3;
|
|
111
|
-
}
|
|
112
|
-
async function diagnoseWorkbenchFailures(args) {
|
|
113
|
-
const targetRun = args.targetId
|
|
114
|
-
? args.snapshot.runs.find((run) => run.id === args.targetId)
|
|
115
|
-
: null;
|
|
116
|
-
const targetEvaluation = args.targetId
|
|
117
|
-
? args.snapshot.evaluations.find((evaluation) => evaluation.id === args.targetId)
|
|
118
|
-
: null;
|
|
119
|
-
const failures = [];
|
|
120
|
-
if (args.targetId && targetRun) {
|
|
121
|
-
const detail = await args.backend.run({ id: targetRun.id, includeJobs: true });
|
|
122
|
-
failures.push(...runFailures(detail.run));
|
|
123
|
-
failures.push(...jobFailures(detail.jobs ?? []));
|
|
124
|
-
}
|
|
125
|
-
else if (args.targetId && targetEvaluation) {
|
|
126
|
-
const evaluation = await args.backend.evaluation({ id: targetEvaluation.id });
|
|
127
|
-
failures.push(...evaluationFailures(evaluation));
|
|
128
|
-
}
|
|
129
|
-
else {
|
|
130
|
-
for (const run of args.snapshot.runs) {
|
|
131
|
-
failures.push(...runFailures(run));
|
|
132
|
-
}
|
|
133
|
-
for (const evaluation of args.snapshot.evaluations) {
|
|
134
|
-
failures.push(...evaluationSummaryFailures(evaluation));
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
return {
|
|
138
|
-
targetId: args.targetId,
|
|
139
|
-
failures,
|
|
140
|
-
failedRunCount: failures.filter((failure) => failure.kind === "run").length,
|
|
141
|
-
failedEvaluationCount: failures.filter((failure) => failure.kind === "evaluation").length,
|
|
142
|
-
failedJobCount: failures.filter((failure) => failure.kind === "job").length,
|
|
143
|
-
};
|
|
144
|
-
}
|
|
145
|
-
function runFailures(run) {
|
|
146
|
-
if (run.status !== "finished" || (run.outcome !== "error" && run.outcome !== "cancelled")) {
|
|
147
|
-
return [];
|
|
148
|
-
}
|
|
149
|
-
return [{
|
|
150
|
-
kind: "run",
|
|
151
|
-
id: run.id,
|
|
152
|
-
runId: run.id,
|
|
153
|
-
candidateId: run.outputCandidateId ?? run.candidateId ?? undefined,
|
|
154
|
-
status: run.outcome,
|
|
155
|
-
...(run.error ? { error: run.error } : {}),
|
|
156
|
-
}];
|
|
157
|
-
}
|
|
158
|
-
function evaluationSummaryFailures(evaluation) {
|
|
159
|
-
if (evaluation.status === "completed" &&
|
|
160
|
-
evaluation.errorSampleCount === 0 &&
|
|
161
|
-
!evaluation.error) {
|
|
162
|
-
return [];
|
|
163
|
-
}
|
|
164
|
-
return [{
|
|
165
|
-
kind: "evaluation",
|
|
166
|
-
id: evaluation.id,
|
|
167
|
-
evaluationId: evaluation.id,
|
|
168
|
-
runId: evaluation.runId,
|
|
169
|
-
candidateId: evaluation.candidateId,
|
|
170
|
-
status: evaluation.status,
|
|
171
|
-
...(evaluation.error ? { error: evaluation.error } : {}),
|
|
172
|
-
}];
|
|
173
|
-
}
|
|
174
|
-
function evaluationFailures(evaluation) {
|
|
175
|
-
const failures = evaluationSummaryFailures(evaluation);
|
|
176
|
-
for (const sample of evaluation.evaluation.samples) {
|
|
177
|
-
if (!sample.error && !(sample.cases ?? []).some((entry) => entry.status && entry.status !== "completed")) {
|
|
178
|
-
continue;
|
|
179
|
-
}
|
|
180
|
-
failures.push({
|
|
181
|
-
kind: "sample",
|
|
182
|
-
id: `${evaluation.id}:sample:${sample.index}`,
|
|
183
|
-
evaluationId: evaluation.id,
|
|
184
|
-
runId: evaluation.runId,
|
|
185
|
-
candidateId: evaluation.candidateId,
|
|
186
|
-
sampleIndex: sample.index,
|
|
187
|
-
status: sample.status,
|
|
188
|
-
...(sample.error ? { error: sample.error } : {}),
|
|
189
|
-
});
|
|
190
|
-
for (const result of sample.cases ?? []) {
|
|
191
|
-
if (!result.status || result.status === "completed") {
|
|
192
|
-
continue;
|
|
193
|
-
}
|
|
194
|
-
failures.push({
|
|
195
|
-
kind: "case",
|
|
196
|
-
id: `${evaluation.id}:case:${result.id}:sample:${sample.index}`,
|
|
197
|
-
evaluationId: evaluation.id,
|
|
198
|
-
runId: evaluation.runId,
|
|
199
|
-
candidateId: evaluation.candidateId,
|
|
200
|
-
caseId: result.id,
|
|
201
|
-
sampleIndex: sample.index,
|
|
202
|
-
status: result.status,
|
|
203
|
-
});
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
return failures;
|
|
207
|
-
}
|
|
208
|
-
function jobFailures(jobs) {
|
|
209
|
-
return jobs
|
|
210
|
-
.filter((job) => isFailedJobStatus(job.status))
|
|
211
|
-
.map((job) => ({
|
|
212
|
-
kind: "job",
|
|
213
|
-
id: job.id,
|
|
214
|
-
jobId: job.id,
|
|
215
|
-
runId: job.runId,
|
|
216
|
-
candidateId: job.candidateId,
|
|
217
|
-
status: job.status,
|
|
218
|
-
attemptIndex: typeof job.attempt === "number" ? job.attempt : undefined,
|
|
219
|
-
...(job.error ? { error: job.error } : {}),
|
|
220
|
-
}));
|
|
221
|
-
}
|
|
222
|
-
function isFailedJobStatus(status) {
|
|
223
|
-
return status === "failed" || status === "cancelled";
|
|
224
|
-
}
|