@workbench-ai/workbench-core 0.0.67 → 0.0.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/dist/execution-graph.d.ts +4 -3
  2. package/dist/execution-graph.d.ts.map +1 -1
  3. package/dist/execution-graph.js +15 -14
  4. package/dist/execution-jobs.d.ts +5 -20
  5. package/dist/execution-jobs.d.ts.map +1 -1
  6. package/dist/execution-jobs.js +7 -91
  7. package/dist/execution-outputs.d.ts +2 -2
  8. package/dist/execution-outputs.d.ts.map +1 -1
  9. package/dist/execution-outputs.js +10 -10
  10. package/dist/execution-runtime-types.d.ts +1 -1
  11. package/dist/execution-runtime-types.d.ts.map +1 -1
  12. package/dist/execution-scheduler.d.ts.map +1 -1
  13. package/dist/execution-scheduler.js +4 -1
  14. package/dist/execution-traces.js +1 -1
  15. package/dist/generic-spec.d.ts +29 -29
  16. package/dist/generic-spec.d.ts.map +1 -1
  17. package/dist/generic-spec.js +94 -92
  18. package/dist/index.d.ts +325 -220
  19. package/dist/index.d.ts.map +1 -1
  20. package/dist/index.js +5716 -3900
  21. package/dist/runtime-dockerfile.d.ts +1 -1
  22. package/dist/runtime-dockerfile.d.ts.map +1 -1
  23. package/dist/runtime-dockerfile.js +4 -4
  24. package/dist/runtime-utils.d.ts +1 -1
  25. package/dist/runtime-utils.d.ts.map +1 -1
  26. package/dist/runtime-utils.js +3 -3
  27. package/dist/sandbox-backends/docker.js +7 -5
  28. package/dist/sandbox-inputs.js +3 -3
  29. package/dist/sandbox-plane.d.ts.map +1 -1
  30. package/dist/sandbox-plane.js +13 -9
  31. package/dist/skill-patch.d.ts +8 -0
  32. package/dist/skill-patch.d.ts.map +1 -0
  33. package/dist/{candidate-patch.js → skill-patch.js} +5 -5
  34. package/package.json +3 -3
  35. package/worker/sandbox-adapter-runner.cjs +2 -2
  36. package/dist/candidate-patch.d.ts +0 -8
  37. package/dist/candidate-patch.d.ts.map +0 -1
  38. package/dist/execution-evidence.d.ts +0 -22
  39. package/dist/execution-evidence.d.ts.map +0 -1
  40. package/dist/execution-evidence.js +0 -302
  41. package/dist/inspection.d.ts +0 -117
  42. package/dist/inspection.d.ts.map +0 -1
  43. package/dist/inspection.js +0 -224
@@ -1,117 +0,0 @@
1
- import { type WorkbenchEvaluationComparison } from "@workbench-ai/workbench-contract";
2
- import type { AuthoredWorkbenchSourceDocument, CandidateCaseReview, CandidateFilePreview, CandidateFileSummary, CandidateLineageGraph, CandidateRecord, EvaluationScorecard, RemoteWorkbenchJob, RunSummary, RuntimeSnapshot, WorkbenchExecutionEventRole, WorkbenchExecutionTrace, WorkbenchExecutionTraceDetail, WorkbenchTraceSession } from "@workbench-ai/workbench-contract";
3
- export interface WorkbenchInspectionErrorOptions {
4
- status?: number;
5
- }
6
- export declare class WorkbenchInspectionError extends Error {
7
- readonly status: number;
8
- readonly statusCode: number;
9
- constructor(message: string, options?: WorkbenchInspectionErrorOptions);
10
- }
11
- export interface WorkbenchInspectionFileListInput {
12
- fingerprint?: string | null;
13
- }
14
- export interface WorkbenchInspectionFileSurface {
15
- files: CandidateFileSummary[];
16
- preview: CandidateFilePreview | null;
17
- }
18
- export interface WorkbenchInspectionFileSurfaceInput extends WorkbenchInspectionFileListInput {
19
- path?: string | null;
20
- view?: "diff" | "raw" | "rendered";
21
- }
22
- export interface WorkbenchInspectionCandidateInput {
23
- id: string;
24
- }
25
- export interface WorkbenchInspectionCandidateFileSurfaceInput extends WorkbenchInspectionCandidateInput {
26
- path?: string | null;
27
- view?: "diff" | "raw" | "rendered";
28
- }
29
- export interface WorkbenchInspectionEvaluationInput {
30
- id: string;
31
- }
32
- export interface WorkbenchInspectionCaseReviewInput {
33
- candidateId: string;
34
- caseId: string;
35
- runId: string;
36
- sampleIndex?: number;
37
- }
38
- export interface WorkbenchInspectionRunInput {
39
- id: string;
40
- includeJobs?: boolean;
41
- }
42
- export interface WorkbenchInspectionExecutionInput {
43
- runId: string;
44
- jobId: string;
45
- }
46
- export interface WorkbenchInspectionExecutionFileSurfaceInput extends WorkbenchInspectionExecutionInput {
47
- path?: string | null;
48
- view?: "diff" | "raw" | "rendered";
49
- }
50
- export interface WorkbenchInspectionRunDetail {
51
- run: RunSummary;
52
- jobs?: RemoteWorkbenchJob[];
53
- }
54
- export type WorkbenchFailureKind = "run" | "evaluation" | "sample" | "case" | "job";
55
- export interface WorkbenchFailureDetail {
56
- kind: WorkbenchFailureKind;
57
- id: string;
58
- status?: string;
59
- runId?: string;
60
- candidateId?: string;
61
- evaluationId?: string;
62
- jobId?: string;
63
- caseId?: string;
64
- sampleIndex?: number;
65
- attemptIndex?: number;
66
- error?: string;
67
- }
68
- export interface WorkbenchFailureDiagnosis {
69
- targetId: string | null;
70
- failures: WorkbenchFailureDetail[];
71
- failedRunCount: number;
72
- failedEvaluationCount: number;
73
- failedJobCount: number;
74
- }
75
- export interface WorkbenchInspectionBackend {
76
- projectId: string;
77
- snapshot(): Promise<RuntimeSnapshot>;
78
- spec(input: WorkbenchInspectionFileListInput): Promise<AuthoredWorkbenchSourceDocument>;
79
- sourceFiles(input: WorkbenchInspectionFileListInput): Promise<CandidateFileSummary[]>;
80
- sourceFileSurface(input: WorkbenchInspectionFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
81
- candidate(input: WorkbenchInspectionCandidateInput): Promise<CandidateRecord>;
82
- candidateFiles(input: WorkbenchInspectionCandidateInput): Promise<CandidateFileSummary[]>;
83
- candidateFileSurface(input: WorkbenchInspectionCandidateFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
84
- evaluation(input: WorkbenchInspectionEvaluationInput): Promise<EvaluationScorecard>;
85
- run(input: WorkbenchInspectionRunInput): Promise<WorkbenchInspectionRunDetail>;
86
- jobInRun?(input: WorkbenchInspectionExecutionInput): Promise<RemoteWorkbenchJob>;
87
- executionFiles(input: WorkbenchInspectionExecutionInput): Promise<CandidateFileSummary[]>;
88
- executionFileSurface(input: WorkbenchInspectionExecutionFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
89
- caseReview?(input: WorkbenchInspectionCaseReviewInput): Promise<CandidateCaseReview>;
90
- executionTrace?(input: WorkbenchInspectionExecutionInput): Promise<WorkbenchExecutionTraceDetail>;
91
- traceForJob?(job: RemoteWorkbenchJob, role: WorkbenchExecutionEventRole): WorkbenchExecutionTrace;
92
- traceSessionsForJob?(job: RemoteWorkbenchJob, role: WorkbenchExecutionEventRole): WorkbenchTraceSession[];
93
- }
94
- export interface WorkbenchInspection {
95
- snapshot(): Promise<RuntimeSnapshot>;
96
- spec(input?: WorkbenchInspectionFileListInput): Promise<AuthoredWorkbenchSourceDocument>;
97
- sourceFiles(input?: WorkbenchInspectionFileListInput): Promise<CandidateFileSummary[]>;
98
- sourceFileSurface(input?: WorkbenchInspectionFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
99
- candidate(input: WorkbenchInspectionCandidateInput): Promise<CandidateRecord>;
100
- candidateFiles(input: WorkbenchInspectionCandidateInput): Promise<CandidateFileSummary[]>;
101
- candidateFileSurface(input: WorkbenchInspectionCandidateFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
102
- evaluations(): Promise<WorkbenchEvaluationComparison>;
103
- evaluation(input: WorkbenchInspectionEvaluationInput): Promise<EvaluationScorecard>;
104
- caseReview(input: WorkbenchInspectionCaseReviewInput): Promise<CandidateCaseReview>;
105
- run(input: WorkbenchInspectionRunInput): Promise<WorkbenchInspectionRunDetail>;
106
- executionTrace(input: WorkbenchInspectionExecutionInput): Promise<WorkbenchExecutionTraceDetail>;
107
- executionFiles(input: WorkbenchInspectionExecutionInput): Promise<CandidateFileSummary[]>;
108
- executionFileSurface(input: WorkbenchInspectionExecutionFileSurfaceInput): Promise<WorkbenchInspectionFileSurface>;
109
- lineage(): Promise<CandidateLineageGraph>;
110
- diagnose(input?: {
111
- targetId?: string | null;
112
- }): Promise<WorkbenchFailureDiagnosis>;
113
- }
114
- export declare function createWorkbenchInspection(backend: WorkbenchInspectionBackend): WorkbenchInspection;
115
- export declare function selectedFilePath(requestedPath: string | null | undefined, files: readonly CandidateFileSummary[]): string | null;
116
- export declare function pickDefaultCandidateFilePath(files: readonly CandidateFileSummary[]): string | null;
117
- //# sourceMappingURL=inspection.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"inspection.d.ts","sourceRoot":"","sources":["../src/inspection.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,KAAK,6BAA6B,EACnC,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,+BAA+B,EAC/B,mBAAmB,EACnB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,eAAe,EACf,mBAAmB,EAEnB,kBAAkB,EAElB,UAAU,EACV,eAAe,EACf,2BAA2B,EAC3B,uBAAuB,EACvB,6BAA6B,EAC7B,qBAAqB,EACtB,MAAM,kCAAkC,CAAC;AAW1C,MAAM,WAAW,+BAA+B;IAC9C,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,qBAAa,wBAAyB,SAAQ,KAAK;IACjD,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;gBAEhB,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,+BAAoC;CAM3E;AAED,MAAM,WAAW,gCAAgC;IAC/C,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B;AAED,MAAM,WAAW,8BAA8B;IAC7C,KAAK,EAAE,oBAAoB,EAAE,CAAC;IAC9B,OAAO,EAAE,oBAAoB,GAAG,IAAI,CAAC;CACtC;AAED,MAAM,WAAW,mCAAoC,SAAQ,gCAAgC;IAC3F,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACpC;AAED,MAAM,WAAW,iCAAiC;IAChD,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,4CAA6C,SAAQ,iCAAiC;IACrG,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACpC;AAED,MAAM,WAAW,kCAAkC;IACjD,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,kCAAkC;IACjD,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,2BAA2B;IAC1C,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED,MAAM,WAAW,iCAAiC;IAChD,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,4CAA6C,SAAQ,iCAAiC;IACrG,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,UAAU,CAAC;CACpC;AAED,MAAM,WAAW,4BAA4B;IAC3C,GAAG,EAAE,UAAU,CAAC;IAChB,IAAI,CAAC,EAAE,kBAAkB,EAAE,CAAC;CAC7B;AAED,MAAM,MAAM,oBAAoB,GAAG,KAAK,GAAG,YAAY,GAAG,QAAQ,GAAG,MAAM,GAAG,KAAK,CAAC;AAEpF,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,yBAAyB;IACxC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,QAAQ,EAAE,sBAAsB,EAAE,CAAC;IACnC,cAAc,EAAE,MAAM,CAAC;IACvB,qBAAqB,EAAE,MAAM,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,0BAA0B;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,IAAI,OAAO,CAAC,eAAe,CAAC,CAAC;IACrC,IAAI,CAAC,KAAK,EAAE,gCAAgC,GAAG,OAAO,CAAC,+BAA+B,CAAC,CAAC;IACxF,WAAW,CAAC,KAAK,EAAE,gCAAgC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IACtF,iBAAiB,CAAC,KAAK,EAAE,mCAAmC,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACvG,SAAS,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAC9E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,oBAAoB,CAAC,KAAK,EAAE,4CAA4C,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACnH,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,GAAG,CAAC,KAAK,EAAE,2BAA2B,GAAG,OAAO,CAAC,4BAA4B,CAAC,CAAC;IAC/E,QAAQ,CAAC,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACjF,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,oBAAoB,CAAC,KAAK,EAAE,4CAA4C,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACnH,UAAU,CAAC,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACrF,cAAc,CAAC,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;IAClG,WAAW,CAAC,CACV,GAAG,EAAE,kBAAkB,EACvB,IAAI,EAAE,2BAA2B,GAChC,uBAAuB,CAAC;IAC3B,mBAAmB,CAAC,CAClB,GAAG,EAAE,kBAAkB,EACvB,IAAI,EAAE,2BAA2B,GAChC,qBAAqB,EAAE,CAAC;CAC5B;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,IAAI,OAAO,CAAC,eAAe,CAAC,CAAC;IACrC,IAAI,CAAC,KAAK,CAAC,EAAE,gCAAgC,GAAG,OAAO,CAAC,+BAA+B,CAAC,CAAC;IACzF,WAAW,CAAC,KAAK,CAAC,EAAE,gCAAgC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IACvF,iBAAiB,CAAC,KAAK,CAAC,EAAE,mCAAmC,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACxG,SAAS,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAC9E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,oBAAoB,CAAC,KAAK,EAAE,4CAA4C,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACnH,WAAW,IAAI,OAAO,CAAC,6BAA6B,CAAC,CAAC;IACtD,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,UAAU,CAAC,KAAK,EAAE,kCAAkC,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACpF,GAAG,CAAC,KAAK,EAAE,2BAA2B,GAAG,OAAO,CAAC,4BAA4B,CAAC,CAAC;IAC/E,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;IACjG,cAAc,CAAC,KAAK,EAAE,iCAAiC,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAC1F,oBAAoB,CAAC,KAAK,EAAE,4CAA4C,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;IACnH,OAAO,IAAI,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAC1C,QAAQ,CAAC,KAAK,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAC;CACpF;AAED,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,0BAA0B,GAClC,mBAAmB,CAyErB;AAED,wBAAgB,gBAAgB,CAC9B,aAAa,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,EACxC,KAAK,EAAE,SAAS,oBAAoB,EAAE,GACrC,MAAM,GAAG,IAAI,CAMf;AAED,wBAAgB,4BAA4B,CAC1C,KAAK,EAAE,SAAS,oBAAoB,EAAE,GACrC,MAAM,GAAG,IAAI,CAIf"}
@@ -1,224 +0,0 @@
1
- import { buildCandidateLineage, buildWorkbenchEvaluationComparison, } from "@workbench-ai/workbench-contract";
2
- import { buildCandidateCaseExecutionRefs, buildWorkbenchExecutionEvidence, } from "./execution-evidence.js";
3
- import { candidateRecordWithoutDerivedFields, createCaseReview, } from "./index.js";
4
- export class WorkbenchInspectionError extends Error {
5
- status;
6
- statusCode;
7
- constructor(message, options = {}) {
8
- super(message);
9
- this.name = "WorkbenchInspectionError";
10
- this.status = options.status ?? 400;
11
- this.statusCode = this.status;
12
- }
13
- }
14
- export function createWorkbenchInspection(backend) {
15
- return {
16
- snapshot: () => backend.snapshot(),
17
- spec: (input = {}) => backend.spec(input),
18
- sourceFiles: (input = {}) => backend.sourceFiles(input),
19
- sourceFileSurface: (input = {}) => backend.sourceFileSurface(input),
20
- candidate: async (input) => candidateRecordWithoutDerivedFields(await backend.candidate(input)),
21
- candidateFiles: (input) => backend.candidateFiles(input),
22
- candidateFileSurface: (input) => backend.candidateFileSurface(input),
23
- evaluations: async () => {
24
- const snapshot = await backend.snapshot();
25
- return buildWorkbenchEvaluationComparison(snapshot.evaluations);
26
- },
27
- evaluation: (input) => backend.evaluation(input),
28
- caseReview: async (input) => {
29
- if (backend.caseReview) {
30
- return await backend.caseReview(input);
31
- }
32
- const candidate = await backend.candidate({ id: input.candidateId });
33
- const jobs = (await backend.run({ id: input.runId, includeJobs: true })).jobs ?? [];
34
- return createCaseReview({
35
- candidate,
36
- caseId: input.caseId,
37
- executions: buildCandidateCaseExecutionRefs({
38
- jobs,
39
- candidateId: input.candidateId,
40
- caseId: input.caseId,
41
- sampleIndex: input.sampleIndex,
42
- }),
43
- });
44
- },
45
- run: (input) => backend.run(input),
46
- executionTrace: async (input) => {
47
- if (backend.executionTrace) {
48
- return await backend.executionTrace(input);
49
- }
50
- if (!backend.jobInRun || !backend.traceForJob) {
51
- throw new WorkbenchInspectionError("Execution traces are not available for this Workbench inspection backend.", { status: 404 });
52
- }
53
- const jobs = [await backend.jobInRun(input)];
54
- return {
55
- projectId: backend.projectId,
56
- runId: input.runId,
57
- executions: buildWorkbenchExecutionEvidence({
58
- jobs,
59
- traceIdPrefix: `${backend.projectId}-execution`,
60
- traceForJob: backend.traceForJob,
61
- traceSessionsForJob: backend.traceSessionsForJob,
62
- }),
63
- };
64
- },
65
- executionFiles: (input) => backend.executionFiles(input),
66
- executionFileSurface: (input) => backend.executionFileSurface(input),
67
- lineage: async () => {
68
- const snapshot = await backend.snapshot();
69
- return buildCandidateLineage({
70
- summaries: snapshot.summaries,
71
- activeId: snapshot.activeId,
72
- });
73
- },
74
- diagnose: async (input = {}) => {
75
- const snapshot = await backend.snapshot();
76
- return await diagnoseWorkbenchFailures({
77
- snapshot,
78
- backend,
79
- targetId: input.targetId?.trim() || null,
80
- });
81
- },
82
- };
83
- }
84
- export function selectedFilePath(requestedPath, files) {
85
- const normalizedPath = requestedPath?.trim();
86
- if (normalizedPath && files.some((file) => file.path === normalizedPath)) {
87
- return normalizedPath;
88
- }
89
- return pickDefaultCandidateFilePath(files);
90
- }
91
- export function pickDefaultCandidateFilePath(files) {
92
- return files
93
- .map((entry) => entry.path)
94
- .sort(compareCandidateFilePreference)[0] ?? null;
95
- }
96
- function compareCandidateFilePreference(left, right) {
97
- const order = scoreCandidateFilePreference(left) - scoreCandidateFilePreference(right);
98
- return order === 0 ? left.localeCompare(right) : order;
99
- }
100
- function scoreCandidateFilePreference(path) {
101
- if (path.endsWith("/SKILL.md") || path === "SKILL.md") {
102
- return 0;
103
- }
104
- if (path.endsWith(".md")) {
105
- return 1;
106
- }
107
- if (path.endsWith(".yaml") || path.endsWith(".yml")) {
108
- return 2;
109
- }
110
- return 3;
111
- }
112
- async function diagnoseWorkbenchFailures(args) {
113
- const targetRun = args.targetId
114
- ? args.snapshot.runs.find((run) => run.id === args.targetId)
115
- : null;
116
- const targetEvaluation = args.targetId
117
- ? args.snapshot.evaluations.find((evaluation) => evaluation.id === args.targetId)
118
- : null;
119
- const failures = [];
120
- if (args.targetId && targetRun) {
121
- const detail = await args.backend.run({ id: targetRun.id, includeJobs: true });
122
- failures.push(...runFailures(detail.run));
123
- failures.push(...jobFailures(detail.jobs ?? []));
124
- }
125
- else if (args.targetId && targetEvaluation) {
126
- const evaluation = await args.backend.evaluation({ id: targetEvaluation.id });
127
- failures.push(...evaluationFailures(evaluation));
128
- }
129
- else {
130
- for (const run of args.snapshot.runs) {
131
- failures.push(...runFailures(run));
132
- }
133
- for (const evaluation of args.snapshot.evaluations) {
134
- failures.push(...evaluationSummaryFailures(evaluation));
135
- }
136
- }
137
- return {
138
- targetId: args.targetId,
139
- failures,
140
- failedRunCount: failures.filter((failure) => failure.kind === "run").length,
141
- failedEvaluationCount: failures.filter((failure) => failure.kind === "evaluation").length,
142
- failedJobCount: failures.filter((failure) => failure.kind === "job").length,
143
- };
144
- }
145
- function runFailures(run) {
146
- if (run.status !== "finished" || (run.outcome !== "error" && run.outcome !== "cancelled")) {
147
- return [];
148
- }
149
- return [{
150
- kind: "run",
151
- id: run.id,
152
- runId: run.id,
153
- candidateId: run.outputCandidateId ?? run.candidateId ?? undefined,
154
- status: run.outcome,
155
- ...(run.error ? { error: run.error } : {}),
156
- }];
157
- }
158
- function evaluationSummaryFailures(evaluation) {
159
- if (evaluation.status === "completed" &&
160
- evaluation.errorSampleCount === 0 &&
161
- !evaluation.error) {
162
- return [];
163
- }
164
- return [{
165
- kind: "evaluation",
166
- id: evaluation.id,
167
- evaluationId: evaluation.id,
168
- runId: evaluation.runId,
169
- candidateId: evaluation.candidateId,
170
- status: evaluation.status,
171
- ...(evaluation.error ? { error: evaluation.error } : {}),
172
- }];
173
- }
174
- function evaluationFailures(evaluation) {
175
- const failures = evaluationSummaryFailures(evaluation);
176
- for (const sample of evaluation.evaluation.samples) {
177
- if (!sample.error && !(sample.cases ?? []).some((entry) => entry.status && entry.status !== "completed")) {
178
- continue;
179
- }
180
- failures.push({
181
- kind: "sample",
182
- id: `${evaluation.id}:sample:${sample.index}`,
183
- evaluationId: evaluation.id,
184
- runId: evaluation.runId,
185
- candidateId: evaluation.candidateId,
186
- sampleIndex: sample.index,
187
- status: sample.status,
188
- ...(sample.error ? { error: sample.error } : {}),
189
- });
190
- for (const result of sample.cases ?? []) {
191
- if (!result.status || result.status === "completed") {
192
- continue;
193
- }
194
- failures.push({
195
- kind: "case",
196
- id: `${evaluation.id}:case:${result.id}:sample:${sample.index}`,
197
- evaluationId: evaluation.id,
198
- runId: evaluation.runId,
199
- candidateId: evaluation.candidateId,
200
- caseId: result.id,
201
- sampleIndex: sample.index,
202
- status: result.status,
203
- });
204
- }
205
- }
206
- return failures;
207
- }
208
- function jobFailures(jobs) {
209
- return jobs
210
- .filter((job) => isFailedJobStatus(job.status))
211
- .map((job) => ({
212
- kind: "job",
213
- id: job.id,
214
- jobId: job.id,
215
- runId: job.runId,
216
- candidateId: job.candidateId,
217
- status: job.status,
218
- attemptIndex: typeof job.attempt === "number" ? job.attempt : undefined,
219
- ...(job.error ? { error: job.error } : {}),
220
- }));
221
- }
222
- function isFailedJobStatus(status) {
223
- return status === "failed" || status === "cancelled";
224
- }