@workbench-ai/workbench-core 0.0.49 → 0.0.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/candidate-patch.d.ts +8 -0
- package/dist/candidate-patch.d.ts.map +1 -0
- package/dist/{subject-patch.js → candidate-patch.js} +5 -5
- package/dist/execution-evidence.d.ts +5 -5
- package/dist/execution-evidence.d.ts.map +1 -1
- package/dist/execution-evidence.js +8 -8
- package/dist/execution-graph.d.ts +2 -2
- package/dist/execution-graph.d.ts.map +1 -1
- package/dist/execution-graph.js +13 -13
- package/dist/execution-jobs.d.ts +7 -6
- package/dist/execution-jobs.d.ts.map +1 -1
- package/dist/execution-jobs.js +32 -17
- package/dist/execution-outputs.d.ts +2 -2
- package/dist/execution-outputs.d.ts.map +1 -1
- package/dist/execution-outputs.js +25 -13
- package/dist/execution-runtime-types.d.ts +1 -1
- package/dist/execution-runtime-types.d.ts.map +1 -1
- package/dist/execution-traces.js +7 -7
- package/dist/execution-usage.js +9 -9
- package/dist/generic-spec.d.ts +34 -30
- package/dist/generic-spec.d.ts.map +1 -1
- package/dist/generic-spec.js +120 -80
- package/dist/index.d.ts +41 -38
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +575 -353
- package/dist/runtime-utils.d.ts +1 -1
- package/dist/runtime-utils.d.ts.map +1 -1
- package/dist/runtime-utils.js +3 -3
- package/dist/sandbox-backends/docker.js +5 -5
- package/dist/sandbox-inputs.js +3 -3
- package/dist/sandbox-plane.js +7 -7
- package/package.json +3 -3
- package/worker/sandbox-adapter-runner.cjs +2 -2
- package/dist/subject-patch.d.ts +0 -8
- package/dist/subject-patch.d.ts.map +0 -1
package/dist/execution-usage.js
CHANGED
|
@@ -12,7 +12,7 @@ const NUMERIC_USAGE_FIELDS = [
|
|
|
12
12
|
"costUsd",
|
|
13
13
|
];
|
|
14
14
|
const USAGE_ROLES = [
|
|
15
|
-
"
|
|
15
|
+
"improver",
|
|
16
16
|
"runner",
|
|
17
17
|
"engine",
|
|
18
18
|
];
|
|
@@ -44,18 +44,18 @@ export function completeUsageSummary(usage) {
|
|
|
44
44
|
if (!usage) {
|
|
45
45
|
return undefined;
|
|
46
46
|
}
|
|
47
|
-
const
|
|
47
|
+
const improver = usage.improver ? normalizeExecutionUsage(usage.improver) : undefined;
|
|
48
48
|
const runner = usage.runner ? normalizeExecutionUsage(usage.runner) : undefined;
|
|
49
49
|
const engine = usage.engine ? normalizeExecutionUsage(usage.engine) : undefined;
|
|
50
50
|
const roleTotal = mergeExecutionUsage([
|
|
51
|
-
|
|
51
|
+
improver,
|
|
52
52
|
runner,
|
|
53
53
|
engine,
|
|
54
54
|
]);
|
|
55
55
|
const total = roleTotal ?? normalizeExecutionUsage(usage.total);
|
|
56
56
|
return compactUsageSummary({
|
|
57
57
|
...(total ? { total } : {}),
|
|
58
|
-
...(
|
|
58
|
+
...(improver ? { improver } : {}),
|
|
59
59
|
...(runner ? { runner } : {}),
|
|
60
60
|
...(engine ? { engine } : {}),
|
|
61
61
|
});
|
|
@@ -63,12 +63,12 @@ export function completeUsageSummary(usage) {
|
|
|
63
63
|
export function normalizeUsageSummary(value) {
|
|
64
64
|
const record = jsonRecord(value);
|
|
65
65
|
const total = normalizeExecutionUsage(record.total);
|
|
66
|
-
const
|
|
66
|
+
const improver = normalizeExecutionUsage(record.improver);
|
|
67
67
|
const runner = normalizeExecutionUsage(record.runner);
|
|
68
68
|
const engine = normalizeExecutionUsage(record.engine);
|
|
69
69
|
return completeUsageSummary({
|
|
70
70
|
...(total ? { total } : {}),
|
|
71
|
-
...(
|
|
71
|
+
...(improver ? { improver } : {}),
|
|
72
72
|
...(runner ? { runner } : {}),
|
|
73
73
|
...(engine ? { engine } : {}),
|
|
74
74
|
});
|
|
@@ -83,17 +83,17 @@ export function mergeUsageSummaries(summaries) {
|
|
|
83
83
|
}
|
|
84
84
|
return compactUsageSummary({
|
|
85
85
|
total: mergeExecutionUsage(entries.map((entry) => entry.total)),
|
|
86
|
-
|
|
86
|
+
improver: mergeExecutionUsage(entries.map((entry) => entry.improver)),
|
|
87
87
|
runner: mergeExecutionUsage(entries.map((entry) => entry.runner)),
|
|
88
88
|
engine: mergeExecutionUsage(entries.map((entry) => entry.engine)),
|
|
89
89
|
});
|
|
90
90
|
}
|
|
91
91
|
export function mergeUsageRoles(roles) {
|
|
92
|
-
const
|
|
92
|
+
const improver = completeUsageSummary(roles.improver);
|
|
93
93
|
const runner = completeUsageSummary(roles.runner);
|
|
94
94
|
const engine = completeUsageSummary(roles.engine);
|
|
95
95
|
return completeUsageSummary({
|
|
96
|
-
|
|
96
|
+
improver: improver?.improver ?? improver?.total,
|
|
97
97
|
runner: runner?.runner ?? runner?.total,
|
|
98
98
|
engine: engine?.engine ?? engine?.total,
|
|
99
99
|
});
|
package/dist/generic-spec.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { type EngineResolveBinding, type SurfaceSnapshotFile, type WorkbenchAdapterInvocation, type WorkbenchExecutionNetworkPolicy, type WorkbenchExecutionResources, type WorkbenchSpecValidation } from "@workbench-ai/workbench-contract";
|
|
2
2
|
import type { WorkbenchEngineCase, WorkbenchEngineCaseSpec } from "@workbench-ai/workbench-protocol";
|
|
3
3
|
export declare const BENCHMARK_SPEC_FILE = "benchmark.yaml";
|
|
4
|
+
export declare const CANDIDATE_SPEC_FILE = "candidate.yaml";
|
|
4
5
|
export interface WorkbenchRuntimeSpec {
|
|
5
6
|
dockerfile: string;
|
|
6
7
|
workdir?: string;
|
|
@@ -15,42 +16,43 @@ export interface WorkbenchRuntimeSpec {
|
|
|
15
16
|
export interface WorkbenchPathRef {
|
|
16
17
|
path: string;
|
|
17
18
|
}
|
|
18
|
-
export interface
|
|
19
|
+
export interface WorkbenchCandidatePrepareSpec {
|
|
19
20
|
command: string;
|
|
20
21
|
}
|
|
21
22
|
export interface AuthoredBenchmarkSpec {
|
|
22
|
-
version:
|
|
23
|
+
version: 4;
|
|
23
24
|
name: string;
|
|
24
25
|
description: string;
|
|
25
26
|
adapters: string[];
|
|
26
27
|
engine: WorkbenchAdapterInvocation;
|
|
27
28
|
}
|
|
28
|
-
export interface
|
|
29
|
-
version: 3;
|
|
29
|
+
export interface WorkbenchCandidateRunSpec extends WorkbenchAdapterInvocation {
|
|
30
30
|
name: string;
|
|
31
|
-
description?: string;
|
|
32
|
-
files: WorkbenchPathRef;
|
|
33
|
-
prepare?: WorkbenchSubjectPrepareSpec;
|
|
34
|
-
adapters: string[];
|
|
35
|
-
run: WorkbenchAdapterInvocation;
|
|
36
31
|
}
|
|
37
|
-
export
|
|
38
|
-
|
|
39
|
-
|
|
32
|
+
export interface WorkbenchCandidateImproveSpec extends WorkbenchAdapterInvocation {
|
|
33
|
+
edits: string[];
|
|
34
|
+
}
|
|
35
|
+
export interface WorkbenchCandidateManifestSpec {
|
|
36
|
+
version: 4;
|
|
40
37
|
name: string;
|
|
41
38
|
description?: string;
|
|
42
|
-
|
|
39
|
+
files: WorkbenchPathRef;
|
|
40
|
+
prepare?: WorkbenchCandidatePrepareSpec;
|
|
43
41
|
adapters: string[];
|
|
44
|
-
|
|
42
|
+
defaultRun?: string;
|
|
43
|
+
runs: Record<string, WorkbenchCandidateRunSpec>;
|
|
44
|
+
improve?: WorkbenchCandidateImproveSpec;
|
|
45
|
+
}
|
|
46
|
+
export interface ResolvedCandidateSpec extends WorkbenchCandidateManifestSpec {
|
|
47
|
+
selectedRunId: string;
|
|
45
48
|
}
|
|
46
49
|
export interface WorkbenchResolvedSource {
|
|
47
|
-
version:
|
|
50
|
+
version: 4;
|
|
48
51
|
benchmark: AuthoredBenchmarkSpec;
|
|
49
|
-
|
|
50
|
-
optimizer?: AuthoredOptimizerSpec;
|
|
52
|
+
candidate: ResolvedCandidateSpec;
|
|
51
53
|
}
|
|
52
54
|
export interface GenericRunSpec {
|
|
53
|
-
version:
|
|
55
|
+
version: 4;
|
|
54
56
|
name: string;
|
|
55
57
|
description: string;
|
|
56
58
|
benchmark: {
|
|
@@ -58,16 +60,18 @@ export interface GenericRunSpec {
|
|
|
58
60
|
description: string;
|
|
59
61
|
engine: WorkbenchAdapterInvocation;
|
|
60
62
|
};
|
|
61
|
-
|
|
63
|
+
candidate: {
|
|
62
64
|
name: string;
|
|
63
65
|
description?: string;
|
|
64
66
|
files: WorkbenchPathRef;
|
|
65
|
-
prepare?:
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
67
|
+
prepare?: WorkbenchCandidatePrepareSpec;
|
|
68
|
+
defaultRun: string;
|
|
69
|
+
selectedRunId: string;
|
|
70
|
+
selectedRunName: string;
|
|
71
|
+
runs: Record<string, WorkbenchCandidateRunSpec>;
|
|
72
|
+
improve?: {
|
|
73
|
+
edits: string[];
|
|
74
|
+
};
|
|
71
75
|
};
|
|
72
76
|
environment: WorkbenchRuntimeSpec;
|
|
73
77
|
adapters: string[];
|
|
@@ -91,16 +95,16 @@ export declare function engineResolveBindingForSourceYaml(source: string): Engin
|
|
|
91
95
|
export declare function engineResolveBindingForSpec(spec: GenericRunSpec): EngineResolveBinding;
|
|
92
96
|
export declare function resolveWorkbenchSourceFiles(args: {
|
|
93
97
|
benchmarkSource: string;
|
|
94
|
-
|
|
95
|
-
|
|
98
|
+
candidateSource: string;
|
|
99
|
+
runId?: string | null;
|
|
96
100
|
}): GenericRunSpec;
|
|
97
101
|
export declare function parseWorkbenchSourceFiles(args: {
|
|
98
102
|
benchmarkSource: string;
|
|
99
|
-
|
|
100
|
-
|
|
103
|
+
candidateSource?: string;
|
|
104
|
+
runId?: string | null;
|
|
101
105
|
}): WorkbenchResolvedSource;
|
|
102
106
|
export declare function serializeWorkbenchResolvedSourceYaml(source: WorkbenchResolvedSource): string;
|
|
103
|
-
export declare function
|
|
107
|
+
export declare function isWorkbenchCandidateManifestPath(filePath: string): boolean;
|
|
104
108
|
export declare function resolveEngineCaseExecutionConfig(args: {
|
|
105
109
|
spec: GenericRunSpec;
|
|
106
110
|
engineCase: GenericEngineCaseSpec;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,KAAK,oBAAoB,EAEzB,KAAK,mBAAmB,EACxB,KAAK,0BAA0B,EAC/B,KAAK,+BAA+B,EACpC,KAAK,2BAA2B,EAChC,KAAK,uBAAuB,EAC7B,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,mBAAmB,EACnB,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAG1C,eAAO,MAAM,mBAAmB,mBAAmB,CAAC;AAEpD,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE;QACV,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;IACF,OAAO,CAAC,EAAE,+BAA+B,CAAC;CAC3C;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,
|
|
1
|
+
{"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,KAAK,oBAAoB,EAEzB,KAAK,mBAAmB,EACxB,KAAK,0BAA0B,EAC/B,KAAK,+BAA+B,EACpC,KAAK,2BAA2B,EAChC,KAAK,uBAAuB,EAC7B,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,mBAAmB,EACnB,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAG1C,eAAO,MAAM,mBAAmB,mBAAmB,CAAC;AACpD,eAAO,MAAM,mBAAmB,mBAAmB,CAAC;AAEpD,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE;QACV,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;IACF,OAAO,CAAC,EAAE,+BAA+B,CAAC;CAC3C;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,6BAA6B;IAC5C,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;CACpC;AAED,MAAM,WAAW,yBAA0B,SAAQ,0BAA0B;IAC3E,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,6BAA8B,SAAQ,0BAA0B;IAC/E,KAAK,EAAE,MAAM,EAAE,CAAC;CACjB;AAED,MAAM,WAAW,8BAA8B;IAC7C,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,gBAAgB,CAAC;IACxB,OAAO,CAAC,EAAE,6BAA6B,CAAC;IACxC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,yBAAyB,CAAC,CAAC;IAChD,OAAO,CAAC,EAAE,6BAA6B,CAAC;CACzC;AAED,MAAM,WAAW,qBAAsB,SAAQ,8BAA8B;IAC3E,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,CAAC,CAAC;IACX,SAAS,EAAE,qBAAqB,CAAC;IACjC,SAAS,EAAE,qBAAqB,CAAC;CAClC;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE;QACT,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,MAAM,EAAE,0BAA0B,CAAC;KACpC,CAAC;IACF,SAAS,EAAE;QACT,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,gBAAgB,CAAC;QACxB,OAAO,CAAC,EAAE,6BAA6B,CAAC;QACxC,UAAU,EAAE,MAAM,CAAC;QACnB,aAAa,EAAE,MAAM,CAAC;QACtB,eAAe,EAAE,MAAM,CAAC;QACxB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,yBAAyB,CAAC,CAAC;QAChD,OAAO,CAAC,EAAE;YACR,KAAK,EAAE,MAAM,EAAE,CAAC;SACjB,CAAC;KACH,CAAC;IACF,WAAW,EAAE,oBAAoB,CAAC;IAClC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;IACnC,aAAa,EAAE,0BAA0B,CAAC;IAC1C,OAAO,CAAC,EAAE,0BAA0B,CAAC;IACrC,GAAG,EAAE,0BAA0B,CAAC;IAChC,SAAS,EAAE,0BAA0B,CAAC;CACvC;AAED,MAAM,MAAM,qBAAqB,GAAG,uBAAuB,CAAC;AAC5D,YAAY,EAAE,mBAAmB,EAAE,MAAM,kCAAkC,CAAC;AAE5E,MAAM,WAAW,iCAAiC;IAChD,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,oBAAoB,CAAC;IAClC,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAQD,eAAO,MAAM,2BAA2B,EAAE,2BAKzC,CAAC;AAEF,wBAAgB,mCAAmC,CACjD,MAAM,EAAE,MAAM,GACb,uBAAuB,CAmBzB;AAED,wBAAgB,kCAAkC,CAChD,MAAM,EAAE,MAAM,GACb,cAAc,CA6BhB;AAED,wBAAgB,iCAAiC,CAC/C,MAAM,EAAE,MAAM,GACb,oBAAoB,CAEtB;AAED,wBAAgB,2BAA2B,CACzC,IAAI,EAAE,cAAc,GACnB,oBAAoB,CAStB;AAED,wBAAgB,2BAA2B,CAAC,IAAI,EAAE;IAChD,eAAe,EAAE,MAAM,CAAC;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACvB,GAAG,cAAc,CAMjB;AAED,wBAAgB,yBAAyB,CAAC,IAAI,EAAE;IAC9C,eAAe,EAAE,MAAM,CAAC;IACxB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACvB,GAAG,uBAAuB,CAqB1B;AAED,wBAAgB,oCAAoC,CAClD,MAAM,EAAE,uBAAuB,GAC9B,MAAM,CAER;AAED,wBAAgB,gCAAgC,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAI1E;AAED,wBAAgB,gCAAgC,CAAC,IAAI,EAAE;IACrD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,qBAAqB,CAAC;CACnC,GAAG,iCAAiC,CAMpC;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE,cAAc,GAAG,0BAA0B,CAE/F;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE;IACnD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,mBAAmB,CAAC;CACjC,GAAG,mBAAmB,EAAE,CAGxB;AAED,wBAAgB,qBAAqB,CACnC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,sBAAsB,CACpC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,oBAAoB,GAC5B,2BAA2B,CAiB7B;AAED,wBAAgB,cAAc,CAC5B,OAAO,EAAE,oBAAoB,GAC5B,+BAA+B,CAEjC;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM,CAEvE"}
|
package/dist/generic-spec.js
CHANGED
|
@@ -2,6 +2,7 @@ import { createHash } from "node:crypto";
|
|
|
2
2
|
import { isWorkbenchExecutionNetworkEgress, } from "@workbench-ai/workbench-contract";
|
|
3
3
|
import YAML from "yaml";
|
|
4
4
|
export const BENCHMARK_SPEC_FILE = "benchmark.yaml";
|
|
5
|
+
export const CANDIDATE_SPEC_FILE = "candidate.yaml";
|
|
5
6
|
export const DEFAULT_EXECUTION_RESOURCES = {
|
|
6
7
|
cpu: 2,
|
|
7
8
|
memoryGb: 4,
|
|
@@ -35,25 +36,20 @@ export function resolveWorkbenchResolvedSourceYaml(source) {
|
|
|
35
36
|
rejectUnknownKeys(parsed, "resolved Workbench source", [
|
|
36
37
|
"version",
|
|
37
38
|
"benchmark",
|
|
38
|
-
"
|
|
39
|
-
"optimizer",
|
|
39
|
+
"candidate",
|
|
40
40
|
], errors);
|
|
41
|
-
if (parsed.version !==
|
|
42
|
-
throw new Error("Resolved Workbench source version must be
|
|
41
|
+
if (parsed.version !== 4) {
|
|
42
|
+
throw new Error("Resolved Workbench source version must be 4.");
|
|
43
43
|
}
|
|
44
44
|
const benchmark = normalizeBenchmarkRecord(readRequiredRecord(parsed.benchmark, "resolved Workbench source.benchmark", errors), "benchmark.yaml", errors);
|
|
45
|
-
const
|
|
46
|
-
const optimizer = parsed.optimizer === undefined
|
|
47
|
-
? undefined
|
|
48
|
-
: normalizeOptimizerRecord(readRequiredRecord(parsed.optimizer, "resolved Workbench source.optimizer", errors), "optimizer YAML", errors);
|
|
45
|
+
const candidate = normalizeCandidateRecord(readRequiredRecord(parsed.candidate, "resolved Workbench source.candidate", errors), "resolved Workbench source.candidate", errors);
|
|
49
46
|
if (errors.length > 0) {
|
|
50
47
|
throw new Error(errors.join("\n"));
|
|
51
48
|
}
|
|
52
49
|
return genericSpecFromAuthoredBundle({
|
|
53
|
-
version:
|
|
50
|
+
version: 4,
|
|
54
51
|
benchmark: benchmark,
|
|
55
|
-
|
|
56
|
-
...(optimizer ? { optimizer } : {}),
|
|
52
|
+
candidate: candidate,
|
|
57
53
|
});
|
|
58
54
|
}
|
|
59
55
|
export function engineResolveBindingForSourceYaml(source) {
|
|
@@ -72,32 +68,28 @@ export function engineResolveBindingForSpec(spec) {
|
|
|
72
68
|
export function resolveWorkbenchSourceFiles(args) {
|
|
73
69
|
return genericSpecFromAuthoredBundle(parseWorkbenchSourceFiles({
|
|
74
70
|
benchmarkSource: args.benchmarkSource,
|
|
75
|
-
|
|
76
|
-
|
|
71
|
+
candidateSource: args.candidateSource,
|
|
72
|
+
runId: args.runId,
|
|
77
73
|
}));
|
|
78
74
|
}
|
|
79
75
|
export function parseWorkbenchSourceFiles(args) {
|
|
80
76
|
const errors = [];
|
|
81
77
|
const benchmark = normalizeBenchmarkRecord(parseYamlRecord(args.benchmarkSource, BENCHMARK_SPEC_FILE), BENCHMARK_SPEC_FILE, errors);
|
|
82
|
-
const
|
|
83
|
-
const optimizer = args.optimizerSource?.trim()
|
|
84
|
-
? normalizeOptimizerRecord(parseYamlRecord(args.optimizerSource, "optimizer YAML"), "optimizer YAML", errors)
|
|
85
|
-
: undefined;
|
|
78
|
+
const candidate = normalizeCandidateRecord(parseYamlRecord(args.candidateSource ?? "", "candidate YAML"), "candidate YAML", errors, args.runId ?? undefined);
|
|
86
79
|
if (errors.length > 0) {
|
|
87
80
|
throw new Error(errors.join("\n"));
|
|
88
81
|
}
|
|
89
82
|
return {
|
|
90
|
-
version:
|
|
83
|
+
version: 4,
|
|
91
84
|
benchmark: benchmark,
|
|
92
|
-
|
|
93
|
-
...(optimizer ? { optimizer } : {}),
|
|
85
|
+
candidate: candidate,
|
|
94
86
|
};
|
|
95
87
|
}
|
|
96
88
|
export function serializeWorkbenchResolvedSourceYaml(source) {
|
|
97
89
|
return YAML.stringify(source).trimEnd() + "\n";
|
|
98
90
|
}
|
|
99
|
-
export function
|
|
100
|
-
return /^
|
|
91
|
+
export function isWorkbenchCandidateManifestPath(filePath) {
|
|
92
|
+
return /^candidates\/[^/]+\/candidate\.ya?ml$/iu.test(filePath.replace(/\\/gu, "/").replace(/^\/+/u, "").replace(/^(?:\.\/)+/u, ""));
|
|
101
93
|
}
|
|
102
94
|
export function resolveEngineCaseExecutionConfig(args) {
|
|
103
95
|
return {
|
|
@@ -142,8 +134,13 @@ function genericSpecFromAuthoredBundle(source) {
|
|
|
142
134
|
const engineRuntime = engineRuntimeFromConfig(source.benchmark.engine);
|
|
143
135
|
const engineRun = cloneEngineInvocation(source.benchmark.engine);
|
|
144
136
|
const engineResolve = cloneEngineInvocation(source.benchmark.engine);
|
|
137
|
+
const candidate = source.candidate;
|
|
138
|
+
const selectedRun = candidate.runs[candidate.selectedRunId];
|
|
139
|
+
if (!selectedRun) {
|
|
140
|
+
throw new Error(`Candidate run not found: ${candidate.selectedRunId}`);
|
|
141
|
+
}
|
|
145
142
|
return {
|
|
146
|
-
version:
|
|
143
|
+
version: 4,
|
|
147
144
|
name: source.benchmark.name,
|
|
148
145
|
description: source.benchmark.description,
|
|
149
146
|
benchmark: {
|
|
@@ -151,33 +148,34 @@ function genericSpecFromAuthoredBundle(source) {
|
|
|
151
148
|
description: source.benchmark.description,
|
|
152
149
|
engine: cloneJson(source.benchmark.engine),
|
|
153
150
|
},
|
|
154
|
-
|
|
155
|
-
name:
|
|
156
|
-
...(
|
|
157
|
-
files: cloneJson(
|
|
158
|
-
...(
|
|
151
|
+
candidate: {
|
|
152
|
+
name: candidate.name,
|
|
153
|
+
...(candidate.description ? { description: candidate.description } : {}),
|
|
154
|
+
files: cloneJson(candidate.files),
|
|
155
|
+
...(candidate.prepare ? { prepare: cloneJson(candidate.prepare) } : {}),
|
|
156
|
+
defaultRun: candidate.defaultRun ?? candidate.selectedRunId,
|
|
157
|
+
selectedRunId: candidate.selectedRunId,
|
|
158
|
+
selectedRunName: selectedRun.name,
|
|
159
|
+
runs: cloneJson(candidate.runs),
|
|
160
|
+
...(candidate.improve
|
|
161
|
+
? {
|
|
162
|
+
improve: {
|
|
163
|
+
edits: [...candidate.improve.edits],
|
|
164
|
+
},
|
|
165
|
+
}
|
|
166
|
+
: {}),
|
|
159
167
|
},
|
|
160
|
-
...(source.optimizer
|
|
161
|
-
? {
|
|
162
|
-
optimizer: {
|
|
163
|
-
name: source.optimizer.name,
|
|
164
|
-
...(source.optimizer.description ? { description: source.optimizer.description } : {}),
|
|
165
|
-
edits: [...source.optimizer.edits],
|
|
166
|
-
},
|
|
167
|
-
}
|
|
168
|
-
: {}),
|
|
169
168
|
environment: cloneJson(engineRuntime),
|
|
170
169
|
adapters: [
|
|
171
170
|
...new Set([
|
|
172
171
|
...source.benchmark.adapters,
|
|
173
|
-
...
|
|
174
|
-
...(source.optimizer?.adapters ?? []),
|
|
172
|
+
...candidate.adapters,
|
|
175
173
|
]),
|
|
176
174
|
],
|
|
177
175
|
engine: cloneJson(source.benchmark.engine),
|
|
178
176
|
engineResolve: cloneJson(engineResolve),
|
|
179
|
-
...(
|
|
180
|
-
run:
|
|
177
|
+
...(candidate.improve ? { improve: clonePhaseAdapter(candidate.improve) } : {}),
|
|
178
|
+
run: clonePhaseAdapter(selectedRun),
|
|
181
179
|
engineRun: cloneJson(engineRun),
|
|
182
180
|
};
|
|
183
181
|
}
|
|
@@ -192,7 +190,7 @@ function normalizeBenchmarkRecord(record, label, errors) {
|
|
|
192
190
|
"adapters",
|
|
193
191
|
"engine",
|
|
194
192
|
], errors);
|
|
195
|
-
|
|
193
|
+
requireVersionFour(record.version, label, errors);
|
|
196
194
|
const name = readRequiredString(record.name, `${label}.name`, errors);
|
|
197
195
|
const description = readRequiredString(record.description, `${label}.description`, errors);
|
|
198
196
|
const adapters = normalizeAdapterSources(record.adapters, `${label}.adapters`, errors);
|
|
@@ -202,7 +200,7 @@ function normalizeBenchmarkRecord(record, label, errors) {
|
|
|
202
200
|
}
|
|
203
201
|
return name && description && engine
|
|
204
202
|
? {
|
|
205
|
-
version:
|
|
203
|
+
version: 4,
|
|
206
204
|
name,
|
|
207
205
|
description,
|
|
208
206
|
adapters,
|
|
@@ -222,7 +220,7 @@ function normalizeEngineRuntimeConfig(engine, label, errors) {
|
|
|
222
220
|
}
|
|
223
221
|
}
|
|
224
222
|
}
|
|
225
|
-
function
|
|
223
|
+
function normalizeCandidateRecord(record, label, errors, selectedRunId) {
|
|
226
224
|
if (!record) {
|
|
227
225
|
return null;
|
|
228
226
|
}
|
|
@@ -233,28 +231,41 @@ function normalizeSubjectRecord(record, label, errors) {
|
|
|
233
231
|
"files",
|
|
234
232
|
"prepare",
|
|
235
233
|
"adapters",
|
|
236
|
-
"
|
|
234
|
+
"defaultRun",
|
|
235
|
+
"runs",
|
|
236
|
+
"improve",
|
|
237
|
+
"selectedRunId",
|
|
237
238
|
], errors);
|
|
238
|
-
|
|
239
|
+
requireVersionFour(record.version, label, errors);
|
|
239
240
|
const name = readRequiredString(record.name, `${label}.name`, errors);
|
|
240
241
|
const description = readOptionalString(record.description, `${label}.description`, errors);
|
|
241
242
|
const files = normalizePathRef(record.files, `${label}.files`, errors);
|
|
242
|
-
const prepare =
|
|
243
|
+
const prepare = normalizeCandidatePrepare(record.prepare, `${label}.prepare`, errors);
|
|
243
244
|
const adapters = normalizeAdapterSources(record.adapters, `${label}.adapters`, errors);
|
|
244
|
-
const
|
|
245
|
-
|
|
245
|
+
const runs = normalizeCandidateRuns(record.runs, `${label}.runs`, errors);
|
|
246
|
+
const defaultRun = readOptionalString(record.defaultRun, `${label}.defaultRun`, errors);
|
|
247
|
+
const embeddedSelectedRun = readOptionalString(record.selectedRunId, `${label}.selectedRunId`, errors);
|
|
248
|
+
const selected = selectedRunId ?? embeddedSelectedRun ?? defaultRun ?? Object.keys(runs).sort()[0];
|
|
249
|
+
if (selected && !runs[selected]) {
|
|
250
|
+
errors.push(`${label}.selectedRunId references unknown run ${selected}.`);
|
|
251
|
+
}
|
|
252
|
+
const improve = normalizeCandidateImprove(record.improve, `${label}.improve`, errors);
|
|
253
|
+
return name && files && selected && Object.keys(runs).length > 0
|
|
246
254
|
? {
|
|
247
|
-
version:
|
|
255
|
+
version: 4,
|
|
248
256
|
name,
|
|
249
257
|
...(description ? { description } : {}),
|
|
250
258
|
files,
|
|
251
259
|
...(prepare ? { prepare } : {}),
|
|
252
260
|
adapters,
|
|
253
|
-
|
|
261
|
+
...(defaultRun ? { defaultRun } : {}),
|
|
262
|
+
runs,
|
|
263
|
+
...(improve ? { improve } : {}),
|
|
264
|
+
selectedRunId: selected,
|
|
254
265
|
}
|
|
255
266
|
: null;
|
|
256
267
|
}
|
|
257
|
-
function
|
|
268
|
+
function normalizeCandidatePrepare(value, label, errors) {
|
|
258
269
|
if (value === undefined) {
|
|
259
270
|
return undefined;
|
|
260
271
|
}
|
|
@@ -266,38 +277,64 @@ function normalizeSubjectPrepare(value, label, errors) {
|
|
|
266
277
|
const command = readRequiredString(record.command, `${label}.command`, errors);
|
|
267
278
|
return command ? { command } : undefined;
|
|
268
279
|
}
|
|
269
|
-
function
|
|
280
|
+
function normalizeCandidateRuns(value, label, errors) {
|
|
281
|
+
const record = readRequiredRecord(value, label, errors);
|
|
270
282
|
if (!record) {
|
|
271
|
-
return
|
|
283
|
+
return {};
|
|
272
284
|
}
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
285
|
+
const runs = {};
|
|
286
|
+
for (const [runId, runValue] of Object.entries(record).sort(([left], [right]) => left.localeCompare(right))) {
|
|
287
|
+
if (!/^[a-zA-Z0-9][a-zA-Z0-9._-]*$/u.test(runId)) {
|
|
288
|
+
errors.push(`${label}.${runId} must use letters, numbers, dots, underscores, or dashes.`);
|
|
289
|
+
continue;
|
|
290
|
+
}
|
|
291
|
+
const runRecord = readRequiredRecord(runValue, `${label}.${runId}`, errors);
|
|
292
|
+
if (!runRecord) {
|
|
293
|
+
continue;
|
|
294
|
+
}
|
|
295
|
+
rejectUnknownKeys(runRecord, `${label}.${runId}`, ["name", "use", "with", "auth"], errors);
|
|
296
|
+
const name = readRequiredString(runRecord.name, `${label}.${runId}.name`, errors);
|
|
297
|
+
const invocation = normalizePhaseAdapter(adapterRecordFrom(runRecord), `${label}.${runId}`, errors);
|
|
298
|
+
if (name && invocation) {
|
|
299
|
+
runs[runId] = {
|
|
300
|
+
name,
|
|
301
|
+
...invocation,
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
if (Object.keys(runs).length === 0) {
|
|
306
|
+
errors.push(`${label} must declare at least one run.`);
|
|
307
|
+
}
|
|
308
|
+
return runs;
|
|
309
|
+
}
|
|
310
|
+
function normalizeCandidateImprove(value, label, errors) {
|
|
311
|
+
if (value === undefined) {
|
|
312
|
+
return undefined;
|
|
313
|
+
}
|
|
314
|
+
const record = readRequiredRecord(value, label, errors);
|
|
315
|
+
if (!record) {
|
|
316
|
+
return undefined;
|
|
317
|
+
}
|
|
318
|
+
rejectUnknownKeys(record, label, ["edits", "use", "with", "auth"], errors);
|
|
284
319
|
const edits = normalizeRelativePathList(record.edits, `${label}.edits`, errors);
|
|
285
|
-
const
|
|
286
|
-
|
|
287
|
-
return name && edits.length > 0 && improve
|
|
320
|
+
const invocation = normalizePhaseAdapter(adapterRecordFrom(record), label, errors);
|
|
321
|
+
return edits.length > 0 && invocation
|
|
288
322
|
? {
|
|
289
|
-
|
|
290
|
-
name,
|
|
291
|
-
...(description ? { description } : {}),
|
|
323
|
+
...invocation,
|
|
292
324
|
edits,
|
|
293
|
-
adapters,
|
|
294
|
-
improve,
|
|
295
325
|
}
|
|
296
|
-
:
|
|
326
|
+
: undefined;
|
|
297
327
|
}
|
|
298
|
-
function
|
|
299
|
-
|
|
300
|
-
|
|
328
|
+
function adapterRecordFrom(record) {
|
|
329
|
+
return {
|
|
330
|
+
use: record.use,
|
|
331
|
+
...(record.with !== undefined ? { with: record.with } : {}),
|
|
332
|
+
...(record.auth !== undefined ? { auth: record.auth } : {}),
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
function requireVersionFour(value, label, errors) {
|
|
336
|
+
if (value !== 4) {
|
|
337
|
+
errors.push(`${label}.version must be 4.`);
|
|
301
338
|
}
|
|
302
339
|
}
|
|
303
340
|
function normalizeRuntime(value, label, errors) {
|
|
@@ -417,10 +454,13 @@ function engineRuntimeFromConfig(engine) {
|
|
|
417
454
|
};
|
|
418
455
|
}
|
|
419
456
|
function cloneEngineInvocation(engine) {
|
|
457
|
+
return clonePhaseAdapter(engine);
|
|
458
|
+
}
|
|
459
|
+
function clonePhaseAdapter(adapter) {
|
|
420
460
|
return {
|
|
421
|
-
use:
|
|
422
|
-
with: cloneJson(
|
|
423
|
-
...(
|
|
461
|
+
use: adapter.use,
|
|
462
|
+
with: cloneJson(adapter.with ?? {}),
|
|
463
|
+
...(adapter.auth !== undefined ? { auth: cloneJson(adapter.auth) } : {}),
|
|
424
464
|
};
|
|
425
465
|
}
|
|
426
466
|
function mergeRuntime(base, override) {
|