@workbench-ai/workbench-core 0.0.49 → 0.0.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/candidate-patch.d.ts +8 -0
- package/dist/candidate-patch.d.ts.map +1 -0
- package/dist/{subject-patch.js → candidate-patch.js} +5 -5
- package/dist/execution-evidence.d.ts +5 -5
- package/dist/execution-evidence.d.ts.map +1 -1
- package/dist/execution-evidence.js +8 -8
- package/dist/execution-graph.d.ts +2 -2
- package/dist/execution-graph.d.ts.map +1 -1
- package/dist/execution-graph.js +13 -13
- package/dist/execution-jobs.d.ts +7 -6
- package/dist/execution-jobs.d.ts.map +1 -1
- package/dist/execution-jobs.js +32 -17
- package/dist/execution-outputs.d.ts +2 -2
- package/dist/execution-outputs.d.ts.map +1 -1
- package/dist/execution-outputs.js +25 -13
- package/dist/execution-runtime-types.d.ts +1 -1
- package/dist/execution-runtime-types.d.ts.map +1 -1
- package/dist/execution-traces.js +7 -7
- package/dist/execution-usage.js +9 -9
- package/dist/generic-spec.d.ts +46 -30
- package/dist/generic-spec.d.ts.map +1 -1
- package/dist/generic-spec.js +173 -80
- package/dist/index.d.ts +68 -39
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +805 -359
- package/dist/runtime-utils.d.ts +1 -1
- package/dist/runtime-utils.d.ts.map +1 -1
- package/dist/runtime-utils.js +3 -3
- package/dist/sandbox-backends/docker.js +5 -5
- package/dist/sandbox-inputs.js +3 -3
- package/dist/sandbox-plane.js +7 -7
- package/package.json +3 -3
- package/worker/sandbox-adapter-runner.cjs +2 -2
- package/dist/subject-patch.d.ts +0 -8
- package/dist/subject-patch.d.ts.map +0 -1
package/dist/execution-usage.js
CHANGED
|
@@ -12,7 +12,7 @@ const NUMERIC_USAGE_FIELDS = [
|
|
|
12
12
|
"costUsd",
|
|
13
13
|
];
|
|
14
14
|
const USAGE_ROLES = [
|
|
15
|
-
"
|
|
15
|
+
"improver",
|
|
16
16
|
"runner",
|
|
17
17
|
"engine",
|
|
18
18
|
];
|
|
@@ -44,18 +44,18 @@ export function completeUsageSummary(usage) {
|
|
|
44
44
|
if (!usage) {
|
|
45
45
|
return undefined;
|
|
46
46
|
}
|
|
47
|
-
const
|
|
47
|
+
const improver = usage.improver ? normalizeExecutionUsage(usage.improver) : undefined;
|
|
48
48
|
const runner = usage.runner ? normalizeExecutionUsage(usage.runner) : undefined;
|
|
49
49
|
const engine = usage.engine ? normalizeExecutionUsage(usage.engine) : undefined;
|
|
50
50
|
const roleTotal = mergeExecutionUsage([
|
|
51
|
-
|
|
51
|
+
improver,
|
|
52
52
|
runner,
|
|
53
53
|
engine,
|
|
54
54
|
]);
|
|
55
55
|
const total = roleTotal ?? normalizeExecutionUsage(usage.total);
|
|
56
56
|
return compactUsageSummary({
|
|
57
57
|
...(total ? { total } : {}),
|
|
58
|
-
...(
|
|
58
|
+
...(improver ? { improver } : {}),
|
|
59
59
|
...(runner ? { runner } : {}),
|
|
60
60
|
...(engine ? { engine } : {}),
|
|
61
61
|
});
|
|
@@ -63,12 +63,12 @@ export function completeUsageSummary(usage) {
|
|
|
63
63
|
export function normalizeUsageSummary(value) {
|
|
64
64
|
const record = jsonRecord(value);
|
|
65
65
|
const total = normalizeExecutionUsage(record.total);
|
|
66
|
-
const
|
|
66
|
+
const improver = normalizeExecutionUsage(record.improver);
|
|
67
67
|
const runner = normalizeExecutionUsage(record.runner);
|
|
68
68
|
const engine = normalizeExecutionUsage(record.engine);
|
|
69
69
|
return completeUsageSummary({
|
|
70
70
|
...(total ? { total } : {}),
|
|
71
|
-
...(
|
|
71
|
+
...(improver ? { improver } : {}),
|
|
72
72
|
...(runner ? { runner } : {}),
|
|
73
73
|
...(engine ? { engine } : {}),
|
|
74
74
|
});
|
|
@@ -83,17 +83,17 @@ export function mergeUsageSummaries(summaries) {
|
|
|
83
83
|
}
|
|
84
84
|
return compactUsageSummary({
|
|
85
85
|
total: mergeExecutionUsage(entries.map((entry) => entry.total)),
|
|
86
|
-
|
|
86
|
+
improver: mergeExecutionUsage(entries.map((entry) => entry.improver)),
|
|
87
87
|
runner: mergeExecutionUsage(entries.map((entry) => entry.runner)),
|
|
88
88
|
engine: mergeExecutionUsage(entries.map((entry) => entry.engine)),
|
|
89
89
|
});
|
|
90
90
|
}
|
|
91
91
|
export function mergeUsageRoles(roles) {
|
|
92
|
-
const
|
|
92
|
+
const improver = completeUsageSummary(roles.improver);
|
|
93
93
|
const runner = completeUsageSummary(roles.runner);
|
|
94
94
|
const engine = completeUsageSummary(roles.engine);
|
|
95
95
|
return completeUsageSummary({
|
|
96
|
-
|
|
96
|
+
improver: improver?.improver ?? improver?.total,
|
|
97
97
|
runner: runner?.runner ?? runner?.total,
|
|
98
98
|
engine: engine?.engine ?? engine?.total,
|
|
99
99
|
});
|
package/dist/generic-spec.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { type EngineResolveBinding, type SurfaceSnapshotFile, type WorkbenchAdapterInvocation, type WorkbenchExecutionNetworkPolicy, type WorkbenchExecutionResources, type WorkbenchSpecValidation } from "@workbench-ai/workbench-contract";
|
|
2
2
|
import type { WorkbenchEngineCase, WorkbenchEngineCaseSpec } from "@workbench-ai/workbench-protocol";
|
|
3
3
|
export declare const BENCHMARK_SPEC_FILE = "benchmark.yaml";
|
|
4
|
+
export declare const CANDIDATE_SPEC_FILE = "candidate.yaml";
|
|
4
5
|
export interface WorkbenchRuntimeSpec {
|
|
5
6
|
dockerfile: string;
|
|
6
7
|
workdir?: string;
|
|
@@ -15,42 +16,53 @@ export interface WorkbenchRuntimeSpec {
|
|
|
15
16
|
export interface WorkbenchPathRef {
|
|
16
17
|
path: string;
|
|
17
18
|
}
|
|
18
|
-
export interface
|
|
19
|
+
export interface WorkbenchCandidatePrepareSpec {
|
|
19
20
|
command: string;
|
|
20
21
|
}
|
|
21
22
|
export interface AuthoredBenchmarkSpec {
|
|
22
|
-
version:
|
|
23
|
+
version: 4;
|
|
23
24
|
name: string;
|
|
24
25
|
description: string;
|
|
25
26
|
adapters: string[];
|
|
26
27
|
engine: WorkbenchAdapterInvocation;
|
|
27
28
|
}
|
|
28
|
-
export interface
|
|
29
|
-
version: 3;
|
|
29
|
+
export interface WorkbenchCandidateRunSpec extends WorkbenchAdapterInvocation {
|
|
30
30
|
name: string;
|
|
31
|
-
description?: string;
|
|
32
|
-
files: WorkbenchPathRef;
|
|
33
|
-
prepare?: WorkbenchSubjectPrepareSpec;
|
|
34
|
-
adapters: string[];
|
|
35
|
-
run: WorkbenchAdapterInvocation;
|
|
36
31
|
}
|
|
37
|
-
export
|
|
38
|
-
|
|
39
|
-
|
|
32
|
+
export interface WorkbenchCaseSelector {
|
|
33
|
+
all?: true;
|
|
34
|
+
split?: string;
|
|
35
|
+
}
|
|
36
|
+
export interface WorkbenchSelectionSpec {
|
|
37
|
+
metric: string;
|
|
38
|
+
cases?: WorkbenchCaseSelector;
|
|
39
|
+
}
|
|
40
|
+
export interface WorkbenchCandidateImproveSpec extends WorkbenchAdapterInvocation {
|
|
41
|
+
edits: string[];
|
|
42
|
+
optimizeOn?: WorkbenchCaseSelector;
|
|
43
|
+
selectBy?: WorkbenchSelectionSpec;
|
|
44
|
+
}
|
|
45
|
+
export interface WorkbenchCandidateManifestSpec {
|
|
46
|
+
version: 4;
|
|
40
47
|
name: string;
|
|
41
48
|
description?: string;
|
|
42
|
-
|
|
49
|
+
files: WorkbenchPathRef;
|
|
50
|
+
prepare?: WorkbenchCandidatePrepareSpec;
|
|
43
51
|
adapters: string[];
|
|
44
|
-
|
|
52
|
+
defaultRun?: string;
|
|
53
|
+
runs: Record<string, WorkbenchCandidateRunSpec>;
|
|
54
|
+
improve?: WorkbenchCandidateImproveSpec;
|
|
55
|
+
}
|
|
56
|
+
export interface ResolvedCandidateSpec extends WorkbenchCandidateManifestSpec {
|
|
57
|
+
selectedRunId: string;
|
|
45
58
|
}
|
|
46
59
|
export interface WorkbenchResolvedSource {
|
|
47
|
-
version:
|
|
60
|
+
version: 4;
|
|
48
61
|
benchmark: AuthoredBenchmarkSpec;
|
|
49
|
-
|
|
50
|
-
optimizer?: AuthoredOptimizerSpec;
|
|
62
|
+
candidate: ResolvedCandidateSpec;
|
|
51
63
|
}
|
|
52
64
|
export interface GenericRunSpec {
|
|
53
|
-
version:
|
|
65
|
+
version: 4;
|
|
54
66
|
name: string;
|
|
55
67
|
description: string;
|
|
56
68
|
benchmark: {
|
|
@@ -58,16 +70,20 @@ export interface GenericRunSpec {
|
|
|
58
70
|
description: string;
|
|
59
71
|
engine: WorkbenchAdapterInvocation;
|
|
60
72
|
};
|
|
61
|
-
|
|
73
|
+
candidate: {
|
|
62
74
|
name: string;
|
|
63
75
|
description?: string;
|
|
64
76
|
files: WorkbenchPathRef;
|
|
65
|
-
prepare?:
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
77
|
+
prepare?: WorkbenchCandidatePrepareSpec;
|
|
78
|
+
defaultRun: string;
|
|
79
|
+
selectedRunId: string;
|
|
80
|
+
selectedRunName: string;
|
|
81
|
+
runs: Record<string, WorkbenchCandidateRunSpec>;
|
|
82
|
+
improve?: {
|
|
83
|
+
edits: string[];
|
|
84
|
+
optimizeOn?: WorkbenchCaseSelector;
|
|
85
|
+
selectBy?: WorkbenchSelectionSpec;
|
|
86
|
+
};
|
|
71
87
|
};
|
|
72
88
|
environment: WorkbenchRuntimeSpec;
|
|
73
89
|
adapters: string[];
|
|
@@ -91,16 +107,16 @@ export declare function engineResolveBindingForSourceYaml(source: string): Engin
|
|
|
91
107
|
export declare function engineResolveBindingForSpec(spec: GenericRunSpec): EngineResolveBinding;
|
|
92
108
|
export declare function resolveWorkbenchSourceFiles(args: {
|
|
93
109
|
benchmarkSource: string;
|
|
94
|
-
|
|
95
|
-
|
|
110
|
+
candidateSource: string;
|
|
111
|
+
runId?: string | null;
|
|
96
112
|
}): GenericRunSpec;
|
|
97
113
|
export declare function parseWorkbenchSourceFiles(args: {
|
|
98
114
|
benchmarkSource: string;
|
|
99
|
-
|
|
100
|
-
|
|
115
|
+
candidateSource?: string;
|
|
116
|
+
runId?: string | null;
|
|
101
117
|
}): WorkbenchResolvedSource;
|
|
102
118
|
export declare function serializeWorkbenchResolvedSourceYaml(source: WorkbenchResolvedSource): string;
|
|
103
|
-
export declare function
|
|
119
|
+
export declare function isWorkbenchCandidateManifestPath(filePath: string): boolean;
|
|
104
120
|
export declare function resolveEngineCaseExecutionConfig(args: {
|
|
105
121
|
spec: GenericRunSpec;
|
|
106
122
|
engineCase: GenericEngineCaseSpec;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,KAAK,oBAAoB,EAEzB,KAAK,mBAAmB,EACxB,KAAK,0BAA0B,EAC/B,KAAK,+BAA+B,EACpC,KAAK,2BAA2B,EAChC,KAAK,uBAAuB,EAC7B,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,mBAAmB,EACnB,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAG1C,eAAO,MAAM,mBAAmB,mBAAmB,CAAC;AAEpD,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE;QACV,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;IACF,OAAO,CAAC,EAAE,+BAA+B,CAAC;CAC3C;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,
|
|
1
|
+
{"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,KAAK,oBAAoB,EAEzB,KAAK,mBAAmB,EACxB,KAAK,0BAA0B,EAC/B,KAAK,+BAA+B,EACpC,KAAK,2BAA2B,EAChC,KAAK,uBAAuB,EAC7B,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,mBAAmB,EACnB,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAG1C,eAAO,MAAM,mBAAmB,mBAAmB,CAAC;AACpD,eAAO,MAAM,mBAAmB,mBAAmB,CAAC;AAEpD,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE;QACV,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;IACF,OAAO,CAAC,EAAE,+BAA+B,CAAC;CAC3C;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,6BAA6B;IAC5C,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;CACpC;AAED,MAAM,WAAW,yBAA0B,SAAQ,0BAA0B;IAC3E,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,qBAAqB;IACpC,GAAG,CAAC,EAAE,IAAI,CAAC;IACX,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,sBAAsB;IACrC,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,qBAAqB,CAAC;CAC/B;AAED,MAAM,WAAW,6BAA8B,SAAQ,0BAA0B;IAC/E,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,UAAU,CAAC,EAAE,qBAAqB,CAAC;IACnC,QAAQ,CAAC,EAAE,sBAAsB,CAAC;CACnC;AAED,MAAM,WAAW,8BAA8B;IAC7C,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,gBAAgB,CAAC;IACxB,OAAO,CAAC,EAAE,6BAA6B,CAAC;IACxC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,yBAAyB,CAAC,CAAC;IAChD,OAAO,CAAC,EAAE,6BAA6B,CAAC;CACzC;AAED,MAAM,WAAW,qBAAsB,SAAQ,8BAA8B;IAC3E,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,CAAC,CAAC;IACX,SAAS,EAAE,qBAAqB,CAAC;IACjC,SAAS,EAAE,qBAAqB,CAAC;CAClC;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE;QACT,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,MAAM,EAAE,0BAA0B,CAAC;KACpC,CAAC;IACF,SAAS,EAAE;QACT,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,gBAAgB,CAAC;QACxB,OAAO,CAAC,EAAE,6BAA6B,CAAC;QACxC,UAAU,EAAE,MAAM,CAAC;QACnB,aAAa,EAAE,MAAM,CAAC;QACtB,eAAe,EAAE,MAAM,CAAC;QACxB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,yBAAyB,CAAC,CAAC;QAChD,OAAO,CAAC,EAAE;YACR,KAAK,EAAE,MAAM,EAAE,CAAC;YAChB,UAAU,CAAC,EAAE,qBAAqB,CAAC;YACnC,QAAQ,CAAC,EAAE,sBAAsB,CAAC;SACnC,CAAC;KACH,CAAC;IACF,WAAW,EAAE,oBAAoB,CAAC;IAClC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;IACnC,aAAa,EAAE,0BAA0B,CAAC;IAC1C,OAAO,CAAC,EAAE,0BAA0B,CAAC;IACrC,GAAG,EAAE,0BAA0B,CAAC;IAChC,SAAS,EAAE,0BAA0B,CAAC;CACvC;AAED,MAAM,MAAM,qBAAqB,GAAG,uBAAuB,CAAC;AAC5D,YAAY,EAAE,mBAAmB,EAAE,MAAM,kCAAkC,CAAC;AAE5E,MAAM,WAAW,iCAAiC;IAChD,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,oBAAoB,CAAC;IAClC,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAQD,eAAO,MAAM,2BAA2B,EAAE,2BAKzC,CAAC;AAEF,wBAAgB,mCAAmC,CACjD,MAAM,EAAE,MAAM,GACb,uBAAuB,CAmBzB;AAED,wBAAgB,kCAAkC,CAChD,MAAM,EAAE,MAAM,GACb,cAAc,CA6BhB;AAED,wBAAgB,iCAAiC,CAC/C,MAAM,EAAE,MAAM,GACb,oBAAoB,CAEtB;AAED,wBAAgB,2BAA2B,CACzC,IAAI,EAAE,cAAc,GACnB,oBAAoB,CAStB;AAED,wBAAgB,2BAA2B,CAAC,IAAI,EAAE;IAChD,eAAe,EAAE,MAAM,CAAC;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACvB,GAAG,cAAc,CAMjB;AAED,wBAAgB,yBAAyB,CAAC,IAAI,EAAE;IAC9C,eAAe,EAAE,MAAM,CAAC;IACxB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACvB,GAAG,uBAAuB,CAqB1B;AAED,wBAAgB,oCAAoC,CAClD,MAAM,EAAE,uBAAuB,GAC9B,MAAM,CAER;AAED,wBAAgB,gCAAgC,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAI1E;AAED,wBAAgB,gCAAgC,CAAC,IAAI,EAAE;IACrD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,qBAAqB,CAAC;CACnC,GAAG,iCAAiC,CAMpC;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE,cAAc,GAAG,0BAA0B,CAE/F;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE;IACnD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,mBAAmB,CAAC;CACjC,GAAG,mBAAmB,EAAE,CAGxB;AAED,wBAAgB,qBAAqB,CACnC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,sBAAsB,CACpC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,oBAAoB,GAC5B,2BAA2B,CAiB7B;AAED,wBAAgB,cAAc,CAC5B,OAAO,EAAE,oBAAoB,GAC5B,+BAA+B,CAEjC;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM,CAEvE"}
|
package/dist/generic-spec.js
CHANGED
|
@@ -2,6 +2,7 @@ import { createHash } from "node:crypto";
|
|
|
2
2
|
import { isWorkbenchExecutionNetworkEgress, } from "@workbench-ai/workbench-contract";
|
|
3
3
|
import YAML from "yaml";
|
|
4
4
|
export const BENCHMARK_SPEC_FILE = "benchmark.yaml";
|
|
5
|
+
export const CANDIDATE_SPEC_FILE = "candidate.yaml";
|
|
5
6
|
export const DEFAULT_EXECUTION_RESOURCES = {
|
|
6
7
|
cpu: 2,
|
|
7
8
|
memoryGb: 4,
|
|
@@ -35,25 +36,20 @@ export function resolveWorkbenchResolvedSourceYaml(source) {
|
|
|
35
36
|
rejectUnknownKeys(parsed, "resolved Workbench source", [
|
|
36
37
|
"version",
|
|
37
38
|
"benchmark",
|
|
38
|
-
"
|
|
39
|
-
"optimizer",
|
|
39
|
+
"candidate",
|
|
40
40
|
], errors);
|
|
41
|
-
if (parsed.version !==
|
|
42
|
-
throw new Error("Resolved Workbench source version must be
|
|
41
|
+
if (parsed.version !== 4) {
|
|
42
|
+
throw new Error("Resolved Workbench source version must be 4.");
|
|
43
43
|
}
|
|
44
44
|
const benchmark = normalizeBenchmarkRecord(readRequiredRecord(parsed.benchmark, "resolved Workbench source.benchmark", errors), "benchmark.yaml", errors);
|
|
45
|
-
const
|
|
46
|
-
const optimizer = parsed.optimizer === undefined
|
|
47
|
-
? undefined
|
|
48
|
-
: normalizeOptimizerRecord(readRequiredRecord(parsed.optimizer, "resolved Workbench source.optimizer", errors), "optimizer YAML", errors);
|
|
45
|
+
const candidate = normalizeCandidateRecord(readRequiredRecord(parsed.candidate, "resolved Workbench source.candidate", errors), "resolved Workbench source.candidate", errors);
|
|
49
46
|
if (errors.length > 0) {
|
|
50
47
|
throw new Error(errors.join("\n"));
|
|
51
48
|
}
|
|
52
49
|
return genericSpecFromAuthoredBundle({
|
|
53
|
-
version:
|
|
50
|
+
version: 4,
|
|
54
51
|
benchmark: benchmark,
|
|
55
|
-
|
|
56
|
-
...(optimizer ? { optimizer } : {}),
|
|
52
|
+
candidate: candidate,
|
|
57
53
|
});
|
|
58
54
|
}
|
|
59
55
|
export function engineResolveBindingForSourceYaml(source) {
|
|
@@ -72,32 +68,28 @@ export function engineResolveBindingForSpec(spec) {
|
|
|
72
68
|
export function resolveWorkbenchSourceFiles(args) {
|
|
73
69
|
return genericSpecFromAuthoredBundle(parseWorkbenchSourceFiles({
|
|
74
70
|
benchmarkSource: args.benchmarkSource,
|
|
75
|
-
|
|
76
|
-
|
|
71
|
+
candidateSource: args.candidateSource,
|
|
72
|
+
runId: args.runId,
|
|
77
73
|
}));
|
|
78
74
|
}
|
|
79
75
|
export function parseWorkbenchSourceFiles(args) {
|
|
80
76
|
const errors = [];
|
|
81
77
|
const benchmark = normalizeBenchmarkRecord(parseYamlRecord(args.benchmarkSource, BENCHMARK_SPEC_FILE), BENCHMARK_SPEC_FILE, errors);
|
|
82
|
-
const
|
|
83
|
-
const optimizer = args.optimizerSource?.trim()
|
|
84
|
-
? normalizeOptimizerRecord(parseYamlRecord(args.optimizerSource, "optimizer YAML"), "optimizer YAML", errors)
|
|
85
|
-
: undefined;
|
|
78
|
+
const candidate = normalizeCandidateRecord(parseYamlRecord(args.candidateSource ?? "", "candidate YAML"), "candidate YAML", errors, args.runId ?? undefined);
|
|
86
79
|
if (errors.length > 0) {
|
|
87
80
|
throw new Error(errors.join("\n"));
|
|
88
81
|
}
|
|
89
82
|
return {
|
|
90
|
-
version:
|
|
83
|
+
version: 4,
|
|
91
84
|
benchmark: benchmark,
|
|
92
|
-
|
|
93
|
-
...(optimizer ? { optimizer } : {}),
|
|
85
|
+
candidate: candidate,
|
|
94
86
|
};
|
|
95
87
|
}
|
|
96
88
|
export function serializeWorkbenchResolvedSourceYaml(source) {
|
|
97
89
|
return YAML.stringify(source).trimEnd() + "\n";
|
|
98
90
|
}
|
|
99
|
-
export function
|
|
100
|
-
return /^
|
|
91
|
+
export function isWorkbenchCandidateManifestPath(filePath) {
|
|
92
|
+
return /^candidates\/[^/]+\/candidate\.ya?ml$/iu.test(filePath.replace(/\\/gu, "/").replace(/^\/+/u, "").replace(/^(?:\.\/)+/u, ""));
|
|
101
93
|
}
|
|
102
94
|
export function resolveEngineCaseExecutionConfig(args) {
|
|
103
95
|
return {
|
|
@@ -142,8 +134,13 @@ function genericSpecFromAuthoredBundle(source) {
|
|
|
142
134
|
const engineRuntime = engineRuntimeFromConfig(source.benchmark.engine);
|
|
143
135
|
const engineRun = cloneEngineInvocation(source.benchmark.engine);
|
|
144
136
|
const engineResolve = cloneEngineInvocation(source.benchmark.engine);
|
|
137
|
+
const candidate = source.candidate;
|
|
138
|
+
const selectedRun = candidate.runs[candidate.selectedRunId];
|
|
139
|
+
if (!selectedRun) {
|
|
140
|
+
throw new Error(`Candidate run not found: ${candidate.selectedRunId}`);
|
|
141
|
+
}
|
|
145
142
|
return {
|
|
146
|
-
version:
|
|
143
|
+
version: 4,
|
|
147
144
|
name: source.benchmark.name,
|
|
148
145
|
description: source.benchmark.description,
|
|
149
146
|
benchmark: {
|
|
@@ -151,33 +148,36 @@ function genericSpecFromAuthoredBundle(source) {
|
|
|
151
148
|
description: source.benchmark.description,
|
|
152
149
|
engine: cloneJson(source.benchmark.engine),
|
|
153
150
|
},
|
|
154
|
-
|
|
155
|
-
name:
|
|
156
|
-
...(
|
|
157
|
-
files: cloneJson(
|
|
158
|
-
...(
|
|
151
|
+
candidate: {
|
|
152
|
+
name: candidate.name,
|
|
153
|
+
...(candidate.description ? { description: candidate.description } : {}),
|
|
154
|
+
files: cloneJson(candidate.files),
|
|
155
|
+
...(candidate.prepare ? { prepare: cloneJson(candidate.prepare) } : {}),
|
|
156
|
+
defaultRun: candidate.defaultRun ?? candidate.selectedRunId,
|
|
157
|
+
selectedRunId: candidate.selectedRunId,
|
|
158
|
+
selectedRunName: selectedRun.name,
|
|
159
|
+
runs: cloneJson(candidate.runs),
|
|
160
|
+
...(candidate.improve
|
|
161
|
+
? {
|
|
162
|
+
improve: {
|
|
163
|
+
edits: [...candidate.improve.edits],
|
|
164
|
+
...(candidate.improve.optimizeOn ? { optimizeOn: cloneJson(candidate.improve.optimizeOn) } : {}),
|
|
165
|
+
...(candidate.improve.selectBy ? { selectBy: cloneJson(candidate.improve.selectBy) } : {}),
|
|
166
|
+
},
|
|
167
|
+
}
|
|
168
|
+
: {}),
|
|
159
169
|
},
|
|
160
|
-
...(source.optimizer
|
|
161
|
-
? {
|
|
162
|
-
optimizer: {
|
|
163
|
-
name: source.optimizer.name,
|
|
164
|
-
...(source.optimizer.description ? { description: source.optimizer.description } : {}),
|
|
165
|
-
edits: [...source.optimizer.edits],
|
|
166
|
-
},
|
|
167
|
-
}
|
|
168
|
-
: {}),
|
|
169
170
|
environment: cloneJson(engineRuntime),
|
|
170
171
|
adapters: [
|
|
171
172
|
...new Set([
|
|
172
173
|
...source.benchmark.adapters,
|
|
173
|
-
...
|
|
174
|
-
...(source.optimizer?.adapters ?? []),
|
|
174
|
+
...candidate.adapters,
|
|
175
175
|
]),
|
|
176
176
|
],
|
|
177
177
|
engine: cloneJson(source.benchmark.engine),
|
|
178
178
|
engineResolve: cloneJson(engineResolve),
|
|
179
|
-
...(
|
|
180
|
-
run:
|
|
179
|
+
...(candidate.improve ? { improve: clonePhaseAdapter(candidate.improve) } : {}),
|
|
180
|
+
run: clonePhaseAdapter(selectedRun),
|
|
181
181
|
engineRun: cloneJson(engineRun),
|
|
182
182
|
};
|
|
183
183
|
}
|
|
@@ -192,7 +192,7 @@ function normalizeBenchmarkRecord(record, label, errors) {
|
|
|
192
192
|
"adapters",
|
|
193
193
|
"engine",
|
|
194
194
|
], errors);
|
|
195
|
-
|
|
195
|
+
requireVersionFour(record.version, label, errors);
|
|
196
196
|
const name = readRequiredString(record.name, `${label}.name`, errors);
|
|
197
197
|
const description = readRequiredString(record.description, `${label}.description`, errors);
|
|
198
198
|
const adapters = normalizeAdapterSources(record.adapters, `${label}.adapters`, errors);
|
|
@@ -202,7 +202,7 @@ function normalizeBenchmarkRecord(record, label, errors) {
|
|
|
202
202
|
}
|
|
203
203
|
return name && description && engine
|
|
204
204
|
? {
|
|
205
|
-
version:
|
|
205
|
+
version: 4,
|
|
206
206
|
name,
|
|
207
207
|
description,
|
|
208
208
|
adapters,
|
|
@@ -222,7 +222,7 @@ function normalizeEngineRuntimeConfig(engine, label, errors) {
|
|
|
222
222
|
}
|
|
223
223
|
}
|
|
224
224
|
}
|
|
225
|
-
function
|
|
225
|
+
function normalizeCandidateRecord(record, label, errors, selectedRunId) {
|
|
226
226
|
if (!record) {
|
|
227
227
|
return null;
|
|
228
228
|
}
|
|
@@ -233,28 +233,41 @@ function normalizeSubjectRecord(record, label, errors) {
|
|
|
233
233
|
"files",
|
|
234
234
|
"prepare",
|
|
235
235
|
"adapters",
|
|
236
|
-
"
|
|
236
|
+
"defaultRun",
|
|
237
|
+
"runs",
|
|
238
|
+
"improve",
|
|
239
|
+
"selectedRunId",
|
|
237
240
|
], errors);
|
|
238
|
-
|
|
241
|
+
requireVersionFour(record.version, label, errors);
|
|
239
242
|
const name = readRequiredString(record.name, `${label}.name`, errors);
|
|
240
243
|
const description = readOptionalString(record.description, `${label}.description`, errors);
|
|
241
244
|
const files = normalizePathRef(record.files, `${label}.files`, errors);
|
|
242
|
-
const prepare =
|
|
245
|
+
const prepare = normalizeCandidatePrepare(record.prepare, `${label}.prepare`, errors);
|
|
243
246
|
const adapters = normalizeAdapterSources(record.adapters, `${label}.adapters`, errors);
|
|
244
|
-
const
|
|
245
|
-
|
|
247
|
+
const runs = normalizeCandidateRuns(record.runs, `${label}.runs`, errors);
|
|
248
|
+
const defaultRun = readOptionalString(record.defaultRun, `${label}.defaultRun`, errors);
|
|
249
|
+
const embeddedSelectedRun = readOptionalString(record.selectedRunId, `${label}.selectedRunId`, errors);
|
|
250
|
+
const selected = selectedRunId ?? embeddedSelectedRun ?? defaultRun ?? Object.keys(runs).sort()[0];
|
|
251
|
+
if (selected && !runs[selected]) {
|
|
252
|
+
errors.push(`${label}.selectedRunId references unknown run ${selected}.`);
|
|
253
|
+
}
|
|
254
|
+
const improve = normalizeCandidateImprove(record.improve, `${label}.improve`, errors);
|
|
255
|
+
return name && files && selected && Object.keys(runs).length > 0
|
|
246
256
|
? {
|
|
247
|
-
version:
|
|
257
|
+
version: 4,
|
|
248
258
|
name,
|
|
249
259
|
...(description ? { description } : {}),
|
|
250
260
|
files,
|
|
251
261
|
...(prepare ? { prepare } : {}),
|
|
252
262
|
adapters,
|
|
253
|
-
|
|
263
|
+
...(defaultRun ? { defaultRun } : {}),
|
|
264
|
+
runs,
|
|
265
|
+
...(improve ? { improve } : {}),
|
|
266
|
+
selectedRunId: selected,
|
|
254
267
|
}
|
|
255
268
|
: null;
|
|
256
269
|
}
|
|
257
|
-
function
|
|
270
|
+
function normalizeCandidatePrepare(value, label, errors) {
|
|
258
271
|
if (value === undefined) {
|
|
259
272
|
return undefined;
|
|
260
273
|
}
|
|
@@ -266,38 +279,115 @@ function normalizeSubjectPrepare(value, label, errors) {
|
|
|
266
279
|
const command = readRequiredString(record.command, `${label}.command`, errors);
|
|
267
280
|
return command ? { command } : undefined;
|
|
268
281
|
}
|
|
269
|
-
function
|
|
282
|
+
function normalizeCandidateRuns(value, label, errors) {
|
|
283
|
+
const record = readRequiredRecord(value, label, errors);
|
|
270
284
|
if (!record) {
|
|
271
|
-
return
|
|
285
|
+
return {};
|
|
272
286
|
}
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
287
|
+
const runs = {};
|
|
288
|
+
for (const [runId, runValue] of Object.entries(record).sort(([left], [right]) => left.localeCompare(right))) {
|
|
289
|
+
if (!/^[a-zA-Z0-9][a-zA-Z0-9._-]*$/u.test(runId)) {
|
|
290
|
+
errors.push(`${label}.${runId} must use letters, numbers, dots, underscores, or dashes.`);
|
|
291
|
+
continue;
|
|
292
|
+
}
|
|
293
|
+
const runRecord = readRequiredRecord(runValue, `${label}.${runId}`, errors);
|
|
294
|
+
if (!runRecord) {
|
|
295
|
+
continue;
|
|
296
|
+
}
|
|
297
|
+
rejectUnknownKeys(runRecord, `${label}.${runId}`, ["name", "use", "with", "auth"], errors);
|
|
298
|
+
const name = readRequiredString(runRecord.name, `${label}.${runId}.name`, errors);
|
|
299
|
+
const invocation = normalizePhaseAdapter(adapterRecordFrom(runRecord), `${label}.${runId}`, errors);
|
|
300
|
+
if (name && invocation) {
|
|
301
|
+
runs[runId] = {
|
|
302
|
+
name,
|
|
303
|
+
...invocation,
|
|
304
|
+
};
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
if (Object.keys(runs).length === 0) {
|
|
308
|
+
errors.push(`${label} must declare at least one run.`);
|
|
309
|
+
}
|
|
310
|
+
return runs;
|
|
311
|
+
}
|
|
312
|
+
function normalizeCandidateImprove(value, label, errors) {
|
|
313
|
+
if (value === undefined) {
|
|
314
|
+
return undefined;
|
|
315
|
+
}
|
|
316
|
+
const record = readRequiredRecord(value, label, errors);
|
|
317
|
+
if (!record) {
|
|
318
|
+
return undefined;
|
|
319
|
+
}
|
|
320
|
+
rejectUnknownKeys(record, label, ["edits", "use", "with", "auth", "optimizeOn", "selectBy"], errors);
|
|
284
321
|
const edits = normalizeRelativePathList(record.edits, `${label}.edits`, errors);
|
|
285
|
-
const
|
|
286
|
-
const
|
|
287
|
-
|
|
322
|
+
const invocation = normalizePhaseAdapter(adapterRecordFrom(record), label, errors);
|
|
323
|
+
const optimizeOn = normalizeCaseSelector(record.optimizeOn, `${label}.optimizeOn`, errors);
|
|
324
|
+
const selectBy = normalizeSelectionSpec(record.selectBy, `${label}.selectBy`, errors);
|
|
325
|
+
return edits.length > 0 && invocation
|
|
288
326
|
? {
|
|
289
|
-
|
|
290
|
-
name,
|
|
291
|
-
...(description ? { description } : {}),
|
|
327
|
+
...invocation,
|
|
292
328
|
edits,
|
|
293
|
-
|
|
294
|
-
|
|
329
|
+
...(optimizeOn ? { optimizeOn } : {}),
|
|
330
|
+
...(selectBy ? { selectBy } : {}),
|
|
295
331
|
}
|
|
296
|
-
:
|
|
332
|
+
: undefined;
|
|
333
|
+
}
|
|
334
|
+
function normalizeSelectionSpec(value, label, errors) {
|
|
335
|
+
if (value === undefined) {
|
|
336
|
+
return undefined;
|
|
337
|
+
}
|
|
338
|
+
const record = readRequiredRecord(value, label, errors);
|
|
339
|
+
if (!record) {
|
|
340
|
+
return undefined;
|
|
341
|
+
}
|
|
342
|
+
rejectUnknownKeys(record, label, ["metric", "cases"], errors);
|
|
343
|
+
const metric = readRequiredString(record.metric, `${label}.metric`, errors);
|
|
344
|
+
const cases = normalizeCaseSelector(record.cases, `${label}.cases`, errors);
|
|
345
|
+
return metric
|
|
346
|
+
? {
|
|
347
|
+
metric,
|
|
348
|
+
...(cases ? { cases } : {}),
|
|
349
|
+
}
|
|
350
|
+
: undefined;
|
|
351
|
+
}
|
|
352
|
+
function normalizeCaseSelector(value, label, errors) {
|
|
353
|
+
if (value === undefined) {
|
|
354
|
+
return undefined;
|
|
355
|
+
}
|
|
356
|
+
const record = readRequiredRecord(value, label, errors);
|
|
357
|
+
if (!record) {
|
|
358
|
+
return undefined;
|
|
359
|
+
}
|
|
360
|
+
rejectUnknownKeys(record, label, ["all", "split"], errors);
|
|
361
|
+
const hasAll = Object.prototype.hasOwnProperty.call(record, "all");
|
|
362
|
+
const hasSplit = Object.prototype.hasOwnProperty.call(record, "split");
|
|
363
|
+
if (hasAll && hasSplit) {
|
|
364
|
+
errors.push(`${label} must specify either all or split, not both.`);
|
|
365
|
+
return undefined;
|
|
366
|
+
}
|
|
367
|
+
if (!hasAll && !hasSplit) {
|
|
368
|
+
errors.push(`${label} must specify all: true or split.`);
|
|
369
|
+
return undefined;
|
|
370
|
+
}
|
|
371
|
+
if (hasAll) {
|
|
372
|
+
if (record.all !== true) {
|
|
373
|
+
errors.push(`${label}.all must be true when provided.`);
|
|
374
|
+
return undefined;
|
|
375
|
+
}
|
|
376
|
+
return { all: true };
|
|
377
|
+
}
|
|
378
|
+
const split = readRequiredString(record.split, `${label}.split`, errors);
|
|
379
|
+
return split ? { split } : undefined;
|
|
297
380
|
}
|
|
298
|
-
function
|
|
299
|
-
|
|
300
|
-
|
|
381
|
+
function adapterRecordFrom(record) {
|
|
382
|
+
return {
|
|
383
|
+
use: record.use,
|
|
384
|
+
...(record.with !== undefined ? { with: record.with } : {}),
|
|
385
|
+
...(record.auth !== undefined ? { auth: record.auth } : {}),
|
|
386
|
+
};
|
|
387
|
+
}
|
|
388
|
+
function requireVersionFour(value, label, errors) {
|
|
389
|
+
if (value !== 4) {
|
|
390
|
+
errors.push(`${label}.version must be 4.`);
|
|
301
391
|
}
|
|
302
392
|
}
|
|
303
393
|
function normalizeRuntime(value, label, errors) {
|
|
@@ -417,10 +507,13 @@ function engineRuntimeFromConfig(engine) {
|
|
|
417
507
|
};
|
|
418
508
|
}
|
|
419
509
|
function cloneEngineInvocation(engine) {
|
|
510
|
+
return clonePhaseAdapter(engine);
|
|
511
|
+
}
|
|
512
|
+
function clonePhaseAdapter(adapter) {
|
|
420
513
|
return {
|
|
421
|
-
use:
|
|
422
|
-
with: cloneJson(
|
|
423
|
-
...(
|
|
514
|
+
use: adapter.use,
|
|
515
|
+
with: cloneJson(adapter.with ?? {}),
|
|
516
|
+
...(adapter.auth !== undefined ? { auth: cloneJson(adapter.auth) } : {}),
|
|
424
517
|
};
|
|
425
518
|
}
|
|
426
519
|
function mergeRuntime(base, override) {
|