@workbench-ai/workbench-core 0.0.49 → 0.0.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,7 @@ const NUMERIC_USAGE_FIELDS = [
12
12
  "costUsd",
13
13
  ];
14
14
  const USAGE_ROLES = [
15
- "optimizer",
15
+ "improver",
16
16
  "runner",
17
17
  "engine",
18
18
  ];
@@ -44,18 +44,18 @@ export function completeUsageSummary(usage) {
44
44
  if (!usage) {
45
45
  return undefined;
46
46
  }
47
- const optimizer = usage.optimizer ? normalizeExecutionUsage(usage.optimizer) : undefined;
47
+ const improver = usage.improver ? normalizeExecutionUsage(usage.improver) : undefined;
48
48
  const runner = usage.runner ? normalizeExecutionUsage(usage.runner) : undefined;
49
49
  const engine = usage.engine ? normalizeExecutionUsage(usage.engine) : undefined;
50
50
  const roleTotal = mergeExecutionUsage([
51
- optimizer,
51
+ improver,
52
52
  runner,
53
53
  engine,
54
54
  ]);
55
55
  const total = roleTotal ?? normalizeExecutionUsage(usage.total);
56
56
  return compactUsageSummary({
57
57
  ...(total ? { total } : {}),
58
- ...(optimizer ? { optimizer } : {}),
58
+ ...(improver ? { improver } : {}),
59
59
  ...(runner ? { runner } : {}),
60
60
  ...(engine ? { engine } : {}),
61
61
  });
@@ -63,12 +63,12 @@ export function completeUsageSummary(usage) {
63
63
  export function normalizeUsageSummary(value) {
64
64
  const record = jsonRecord(value);
65
65
  const total = normalizeExecutionUsage(record.total);
66
- const optimizer = normalizeExecutionUsage(record.optimizer);
66
+ const improver = normalizeExecutionUsage(record.improver);
67
67
  const runner = normalizeExecutionUsage(record.runner);
68
68
  const engine = normalizeExecutionUsage(record.engine);
69
69
  return completeUsageSummary({
70
70
  ...(total ? { total } : {}),
71
- ...(optimizer ? { optimizer } : {}),
71
+ ...(improver ? { improver } : {}),
72
72
  ...(runner ? { runner } : {}),
73
73
  ...(engine ? { engine } : {}),
74
74
  });
@@ -83,17 +83,17 @@ export function mergeUsageSummaries(summaries) {
83
83
  }
84
84
  return compactUsageSummary({
85
85
  total: mergeExecutionUsage(entries.map((entry) => entry.total)),
86
- optimizer: mergeExecutionUsage(entries.map((entry) => entry.optimizer)),
86
+ improver: mergeExecutionUsage(entries.map((entry) => entry.improver)),
87
87
  runner: mergeExecutionUsage(entries.map((entry) => entry.runner)),
88
88
  engine: mergeExecutionUsage(entries.map((entry) => entry.engine)),
89
89
  });
90
90
  }
91
91
  export function mergeUsageRoles(roles) {
92
- const optimizer = completeUsageSummary(roles.optimizer);
92
+ const improver = completeUsageSummary(roles.improver);
93
93
  const runner = completeUsageSummary(roles.runner);
94
94
  const engine = completeUsageSummary(roles.engine);
95
95
  return completeUsageSummary({
96
- optimizer: optimizer?.optimizer ?? optimizer?.total,
96
+ improver: improver?.improver ?? improver?.total,
97
97
  runner: runner?.runner ?? runner?.total,
98
98
  engine: engine?.engine ?? engine?.total,
99
99
  });
@@ -1,6 +1,7 @@
1
1
  import { type EngineResolveBinding, type SurfaceSnapshotFile, type WorkbenchAdapterInvocation, type WorkbenchExecutionNetworkPolicy, type WorkbenchExecutionResources, type WorkbenchSpecValidation } from "@workbench-ai/workbench-contract";
2
2
  import type { WorkbenchEngineCase, WorkbenchEngineCaseSpec } from "@workbench-ai/workbench-protocol";
3
3
  export declare const BENCHMARK_SPEC_FILE = "benchmark.yaml";
4
+ export declare const CANDIDATE_SPEC_FILE = "candidate.yaml";
4
5
  export interface WorkbenchRuntimeSpec {
5
6
  dockerfile: string;
6
7
  workdir?: string;
@@ -15,42 +16,53 @@ export interface WorkbenchRuntimeSpec {
15
16
  export interface WorkbenchPathRef {
16
17
  path: string;
17
18
  }
18
- export interface WorkbenchSubjectPrepareSpec {
19
+ export interface WorkbenchCandidatePrepareSpec {
19
20
  command: string;
20
21
  }
21
22
  export interface AuthoredBenchmarkSpec {
22
- version: 3;
23
+ version: 4;
23
24
  name: string;
24
25
  description: string;
25
26
  adapters: string[];
26
27
  engine: WorkbenchAdapterInvocation;
27
28
  }
28
- export interface WorkbenchSubjectManifestSpec {
29
- version: 3;
29
+ export interface WorkbenchCandidateRunSpec extends WorkbenchAdapterInvocation {
30
30
  name: string;
31
- description?: string;
32
- files: WorkbenchPathRef;
33
- prepare?: WorkbenchSubjectPrepareSpec;
34
- adapters: string[];
35
- run: WorkbenchAdapterInvocation;
36
31
  }
37
- export type ResolvedSubjectSpec = WorkbenchSubjectManifestSpec;
38
- export interface AuthoredOptimizerSpec {
39
- version: 3;
32
+ export interface WorkbenchCaseSelector {
33
+ all?: true;
34
+ split?: string;
35
+ }
36
+ export interface WorkbenchSelectionSpec {
37
+ metric: string;
38
+ cases?: WorkbenchCaseSelector;
39
+ }
40
+ export interface WorkbenchCandidateImproveSpec extends WorkbenchAdapterInvocation {
41
+ edits: string[];
42
+ optimizeOn?: WorkbenchCaseSelector;
43
+ selectBy?: WorkbenchSelectionSpec;
44
+ }
45
+ export interface WorkbenchCandidateManifestSpec {
46
+ version: 4;
40
47
  name: string;
41
48
  description?: string;
42
- edits: string[];
49
+ files: WorkbenchPathRef;
50
+ prepare?: WorkbenchCandidatePrepareSpec;
43
51
  adapters: string[];
44
- improve: WorkbenchAdapterInvocation;
52
+ defaultRun?: string;
53
+ runs: Record<string, WorkbenchCandidateRunSpec>;
54
+ improve?: WorkbenchCandidateImproveSpec;
55
+ }
56
+ export interface ResolvedCandidateSpec extends WorkbenchCandidateManifestSpec {
57
+ selectedRunId: string;
45
58
  }
46
59
  export interface WorkbenchResolvedSource {
47
- version: 3;
60
+ version: 4;
48
61
  benchmark: AuthoredBenchmarkSpec;
49
- subject: ResolvedSubjectSpec;
50
- optimizer?: AuthoredOptimizerSpec;
62
+ candidate: ResolvedCandidateSpec;
51
63
  }
52
64
  export interface GenericRunSpec {
53
- version: 3;
65
+ version: 4;
54
66
  name: string;
55
67
  description: string;
56
68
  benchmark: {
@@ -58,16 +70,20 @@ export interface GenericRunSpec {
58
70
  description: string;
59
71
  engine: WorkbenchAdapterInvocation;
60
72
  };
61
- subject: {
73
+ candidate: {
62
74
  name: string;
63
75
  description?: string;
64
76
  files: WorkbenchPathRef;
65
- prepare?: WorkbenchSubjectPrepareSpec;
66
- };
67
- optimizer?: {
68
- name: string;
69
- description?: string;
70
- edits: string[];
77
+ prepare?: WorkbenchCandidatePrepareSpec;
78
+ defaultRun: string;
79
+ selectedRunId: string;
80
+ selectedRunName: string;
81
+ runs: Record<string, WorkbenchCandidateRunSpec>;
82
+ improve?: {
83
+ edits: string[];
84
+ optimizeOn?: WorkbenchCaseSelector;
85
+ selectBy?: WorkbenchSelectionSpec;
86
+ };
71
87
  };
72
88
  environment: WorkbenchRuntimeSpec;
73
89
  adapters: string[];
@@ -91,16 +107,16 @@ export declare function engineResolveBindingForSourceYaml(source: string): Engin
91
107
  export declare function engineResolveBindingForSpec(spec: GenericRunSpec): EngineResolveBinding;
92
108
  export declare function resolveWorkbenchSourceFiles(args: {
93
109
  benchmarkSource: string;
94
- subjectSource: string;
95
- optimizerSource?: string | null;
110
+ candidateSource: string;
111
+ runId?: string | null;
96
112
  }): GenericRunSpec;
97
113
  export declare function parseWorkbenchSourceFiles(args: {
98
114
  benchmarkSource: string;
99
- subjectSource?: string;
100
- optimizerSource?: string | null;
115
+ candidateSource?: string;
116
+ runId?: string | null;
101
117
  }): WorkbenchResolvedSource;
102
118
  export declare function serializeWorkbenchResolvedSourceYaml(source: WorkbenchResolvedSource): string;
103
- export declare function isWorkbenchSubjectManifestPath(filePath: string): boolean;
119
+ export declare function isWorkbenchCandidateManifestPath(filePath: string): boolean;
104
120
  export declare function resolveEngineCaseExecutionConfig(args: {
105
121
  spec: GenericRunSpec;
106
122
  engineCase: GenericEngineCaseSpec;
@@ -1 +1 @@
1
- {"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,KAAK,oBAAoB,EAEzB,KAAK,mBAAmB,EACxB,KAAK,0BAA0B,EAC/B,KAAK,+BAA+B,EACpC,KAAK,2BAA2B,EAChC,KAAK,uBAAuB,EAC7B,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,mBAAmB,EACnB,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAG1C,eAAO,MAAM,mBAAmB,mBAAmB,CAAC;AAEpD,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE;QACV,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;IACF,OAAO,CAAC,EAAE,+BAA+B,CAAC;CAC3C;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,2BAA2B;IAC1C,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;CACpC;AAED,MAAM,WAAW,4BAA4B;IAC3C,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,gBAAgB,CAAC;IACxB,OAAO,CAAC,EAAE,2BAA2B,CAAC;IACtC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAED,MAAM,MAAM,mBAAmB,GAAG,4BAA4B,CAAC;AAE/D,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,EAAE,0BAA0B,CAAC;CACrC;AAED,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,CAAC,CAAC;IACX,SAAS,EAAE,qBAAqB,CAAC;IACjC,OAAO,EAAE,mBAAmB,CAAC;IAC7B,SAAS,CAAC,EAAE,qBAAqB,CAAC;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE;QACT,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,MAAM,EAAE,0BAA0B,CAAC;KACpC,CAAC;IACF,OAAO,EAAE;QACP,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,gBAAgB,CAAC;QACxB,OAAO,CAAC,EAAE,2BAA2B,CAAC;KACvC,CAAC;IACF,SAAS,CAAC,EAAE;QACV,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,MAAM,EAAE,CAAC;KACjB,CAAC;IACF,WAAW,EAAE,oBAAoB,CAAC;IAClC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;IACnC,aAAa,EAAE,0BAA0B,CAAC;IAC1C,OAAO,CAAC,EAAE,0BAA0B,CAAC;IACrC,GAAG,EAAE,0BAA0B,CAAC;IAChC,SAAS,EAAE,0BAA0B,CAAC;CACvC;AAED,MAAM,MAAM,qBAAqB,GAAG,uBAAuB,CAAC;AAC5D,YAAY,EAAE,mBAAmB,EAAE,MAAM,kCAAkC,CAAC;AAE5E,MAAM,WAAW,iCAAiC;IAChD,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,oBAAoB,CAAC;IAClC,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAQD,eAAO,MAAM,2BAA2B,EAAE,2BAKzC,CAAC;AAEF,wBAAgB,mCAAmC,CACjD,MAAM,EAAE,MAAM,GACb,uBAAuB,CAmBzB;AAED,wBAAgB,kCAAkC,CAChD,MAAM,EAAE,MAAM,GACb,cAAc,CAsChB;AAED,wBAAgB,iCAAiC,CAC/C,MAAM,EAAE,MAAM,GACb,oBAAoB,CAEtB;AAED,wBAAgB,2BAA2B,CACzC,IAAI,EAAE,cAAc,GACnB,oBAAoB,CAStB;AAED,wBAAgB,2BAA2B,CAAC,IAAI,EAAE;IAChD,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACjC,GAAG,cAAc,CAMjB;AAED,wBAAgB,yBAAyB,CAAC,IAAI,EAAE;IAC9C,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACjC,GAAG,uBAAuB,CA4B1B;AAED,wBAAgB,oCAAoC,CAClD,MAAM,EAAE,uBAAuB,GAC9B,MAAM,CAER;AAED,wBAAgB,8BAA8B,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAIxE;AAED,wBAAgB,gCAAgC,CAAC,IAAI,EAAE;IACrD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,qBAAqB,CAAC;CACnC,GAAG,iCAAiC,CAMpC;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE,cAAc,GAAG,0BAA0B,CAE/F;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE;IACnD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,mBAAmB,CAAC;CACjC,GAAG,mBAAmB,EAAE,CAGxB;AAED,wBAAgB,qBAAqB,CACnC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,sBAAsB,CACpC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,oBAAoB,GAC5B,2BAA2B,CAiB7B;AAED,wBAAgB,cAAc,CAC5B,OAAO,EAAE,oBAAoB,GAC5B,+BAA+B,CAEjC;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM,CAEvE"}
1
+ {"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,KAAK,oBAAoB,EAEzB,KAAK,mBAAmB,EACxB,KAAK,0BAA0B,EAC/B,KAAK,+BAA+B,EACpC,KAAK,2BAA2B,EAChC,KAAK,uBAAuB,EAC7B,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,mBAAmB,EACnB,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAG1C,eAAO,MAAM,mBAAmB,mBAAmB,CAAC;AACpD,eAAO,MAAM,mBAAmB,mBAAmB,CAAC;AAEpD,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE;QACV,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;IACF,OAAO,CAAC,EAAE,+BAA+B,CAAC;CAC3C;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,6BAA6B;IAC5C,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;CACpC;AAED,MAAM,WAAW,yBAA0B,SAAQ,0BAA0B;IAC3E,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,qBAAqB;IACpC,GAAG,CAAC,EAAE,IAAI,CAAC;IACX,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,sBAAsB;IACrC,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,qBAAqB,CAAC;CAC/B;AAED,MAAM,WAAW,6BAA8B,SAAQ,0BAA0B;IAC/E,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,UAAU,CAAC,EAAE,qBAAqB,CAAC;IACnC,QAAQ,CAAC,EAAE,sBAAsB,CAAC;CACnC;AAED,MAAM,WAAW,8BAA8B;IAC7C,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,gBAAgB,CAAC;IACxB,OAAO,CAAC,EAAE,6BAA6B,CAAC;IACxC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,yBAAyB,CAAC,CAAC;IAChD,OAAO,CAAC,EAAE,6BAA6B,CAAC;CACzC;AAED,MAAM,WAAW,qBAAsB,SAAQ,8BAA8B;IAC3E,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,CAAC,CAAC;IACX,SAAS,EAAE,qBAAqB,CAAC;IACjC,SAAS,EAAE,qBAAqB,CAAC;CAClC;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE;QACT,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,MAAM,EAAE,0BAA0B,CAAC;KACpC,CAAC;IACF,SAAS,EAAE;QACT,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,gBAAgB,CAAC;QACxB,OAAO,CAAC,EAAE,6BAA6B,CAAC;QACxC,UAAU,EAAE,MAAM,CAAC;QACnB,aAAa,EAAE,MAAM,CAAC;QACtB,eAAe,EAAE,MAAM,CAAC;QACxB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,yBAAyB,CAAC,CAAC;QAChD,OAAO,CAAC,EAAE;YACR,KAAK,EAAE,MAAM,EAAE,CAAC;YAChB,UAAU,CAAC,EAAE,qBAAqB,CAAC;YACnC,QAAQ,CAAC,EAAE,sBAAsB,CAAC;SACnC,CAAC;KACH,CAAC;IACF,WAAW,EAAE,oBAAoB,CAAC;IAClC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;IACnC,aAAa,EAAE,0BAA0B,CAAC;IAC1C,OAAO,CAAC,EAAE,0BAA0B,CAAC;IACrC,GAAG,EAAE,0BAA0B,CAAC;IAChC,SAAS,EAAE,0BAA0B,CAAC;CACvC;AAED,MAAM,MAAM,qBAAqB,GAAG,uBAAuB,CAAC;AAC5D,YAAY,EAAE,mBAAmB,EAAE,MAAM,kCAAkC,CAAC;AAE5E,MAAM,WAAW,iCAAiC;IAChD,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,oBAAoB,CAAC;IAClC,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAQD,eAAO,MAAM,2BAA2B,EAAE,2BAKzC,CAAC;AAEF,wBAAgB,mCAAmC,CACjD,MAAM,EAAE,MAAM,GACb,uBAAuB,CAmBzB;AAED,wBAAgB,kCAAkC,CAChD,MAAM,EAAE,MAAM,GACb,cAAc,CA6BhB;AAED,wBAAgB,iCAAiC,CAC/C,MAAM,EAAE,MAAM,GACb,oBAAoB,CAEtB;AAED,wBAAgB,2BAA2B,CACzC,IAAI,EAAE,cAAc,GACnB,oBAAoB,CAStB;AAED,wBAAgB,2BAA2B,CAAC,IAAI,EAAE;IAChD,eAAe,EAAE,MAAM,CAAC;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACvB,GAAG,cAAc,CAMjB;AAED,wBAAgB,yBAAyB,CAAC,IAAI,EAAE;IAC9C,eAAe,EAAE,MAAM,CAAC;IACxB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACvB,GAAG,uBAAuB,CAqB1B;AAED,wBAAgB,oCAAoC,CAClD,MAAM,EAAE,uBAAuB,GAC9B,MAAM,CAER;AAED,wBAAgB,gCAAgC,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAI1E;AAED,wBAAgB,gCAAgC,CAAC,IAAI,EAAE;IACrD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,qBAAqB,CAAC;CACnC,GAAG,iCAAiC,CAMpC;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE,cAAc,GAAG,0BAA0B,CAE/F;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE;IACnD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,mBAAmB,CAAC;CACjC,GAAG,mBAAmB,EAAE,CAGxB;AAED,wBAAgB,qBAAqB,CACnC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,sBAAsB,CACpC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,oBAAoB,GAC5B,2BAA2B,CAiB7B;AAED,wBAAgB,cAAc,CAC5B,OAAO,EAAE,oBAAoB,GAC5B,+BAA+B,CAEjC;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM,CAEvE"}
@@ -2,6 +2,7 @@ import { createHash } from "node:crypto";
2
2
  import { isWorkbenchExecutionNetworkEgress, } from "@workbench-ai/workbench-contract";
3
3
  import YAML from "yaml";
4
4
  export const BENCHMARK_SPEC_FILE = "benchmark.yaml";
5
+ export const CANDIDATE_SPEC_FILE = "candidate.yaml";
5
6
  export const DEFAULT_EXECUTION_RESOURCES = {
6
7
  cpu: 2,
7
8
  memoryGb: 4,
@@ -35,25 +36,20 @@ export function resolveWorkbenchResolvedSourceYaml(source) {
35
36
  rejectUnknownKeys(parsed, "resolved Workbench source", [
36
37
  "version",
37
38
  "benchmark",
38
- "subject",
39
- "optimizer",
39
+ "candidate",
40
40
  ], errors);
41
- if (parsed.version !== 3) {
42
- throw new Error("Resolved Workbench source version must be 3.");
41
+ if (parsed.version !== 4) {
42
+ throw new Error("Resolved Workbench source version must be 4.");
43
43
  }
44
44
  const benchmark = normalizeBenchmarkRecord(readRequiredRecord(parsed.benchmark, "resolved Workbench source.benchmark", errors), "benchmark.yaml", errors);
45
- const subject = normalizeSubjectRecord(readRequiredRecord(parsed.subject, "resolved Workbench source.subject", errors), "resolved Workbench source.subject", errors);
46
- const optimizer = parsed.optimizer === undefined
47
- ? undefined
48
- : normalizeOptimizerRecord(readRequiredRecord(parsed.optimizer, "resolved Workbench source.optimizer", errors), "optimizer YAML", errors);
45
+ const candidate = normalizeCandidateRecord(readRequiredRecord(parsed.candidate, "resolved Workbench source.candidate", errors), "resolved Workbench source.candidate", errors);
49
46
  if (errors.length > 0) {
50
47
  throw new Error(errors.join("\n"));
51
48
  }
52
49
  return genericSpecFromAuthoredBundle({
53
- version: 3,
50
+ version: 4,
54
51
  benchmark: benchmark,
55
- subject: subject,
56
- ...(optimizer ? { optimizer } : {}),
52
+ candidate: candidate,
57
53
  });
58
54
  }
59
55
  export function engineResolveBindingForSourceYaml(source) {
@@ -72,32 +68,28 @@ export function engineResolveBindingForSpec(spec) {
72
68
  export function resolveWorkbenchSourceFiles(args) {
73
69
  return genericSpecFromAuthoredBundle(parseWorkbenchSourceFiles({
74
70
  benchmarkSource: args.benchmarkSource,
75
- subjectSource: args.subjectSource,
76
- optimizerSource: args.optimizerSource,
71
+ candidateSource: args.candidateSource,
72
+ runId: args.runId,
77
73
  }));
78
74
  }
79
75
  export function parseWorkbenchSourceFiles(args) {
80
76
  const errors = [];
81
77
  const benchmark = normalizeBenchmarkRecord(parseYamlRecord(args.benchmarkSource, BENCHMARK_SPEC_FILE), BENCHMARK_SPEC_FILE, errors);
82
- const subject = normalizeSubjectRecord(parseYamlRecord(args.subjectSource ?? "", "subject YAML"), "subject YAML", errors);
83
- const optimizer = args.optimizerSource?.trim()
84
- ? normalizeOptimizerRecord(parseYamlRecord(args.optimizerSource, "optimizer YAML"), "optimizer YAML", errors)
85
- : undefined;
78
+ const candidate = normalizeCandidateRecord(parseYamlRecord(args.candidateSource ?? "", "candidate YAML"), "candidate YAML", errors, args.runId ?? undefined);
86
79
  if (errors.length > 0) {
87
80
  throw new Error(errors.join("\n"));
88
81
  }
89
82
  return {
90
- version: 3,
83
+ version: 4,
91
84
  benchmark: benchmark,
92
- subject: subject,
93
- ...(optimizer ? { optimizer } : {}),
85
+ candidate: candidate,
94
86
  };
95
87
  }
96
88
  export function serializeWorkbenchResolvedSourceYaml(source) {
97
89
  return YAML.stringify(source).trimEnd() + "\n";
98
90
  }
99
- export function isWorkbenchSubjectManifestPath(filePath) {
100
- return /^subjects\/[^/]+\/subject\.ya?ml$/iu.test(filePath.replace(/\\/gu, "/").replace(/^\/+/u, "").replace(/^(?:\.\/)+/u, ""));
91
+ export function isWorkbenchCandidateManifestPath(filePath) {
92
+ return /^candidates\/[^/]+\/candidate\.ya?ml$/iu.test(filePath.replace(/\\/gu, "/").replace(/^\/+/u, "").replace(/^(?:\.\/)+/u, ""));
101
93
  }
102
94
  export function resolveEngineCaseExecutionConfig(args) {
103
95
  return {
@@ -142,8 +134,13 @@ function genericSpecFromAuthoredBundle(source) {
142
134
  const engineRuntime = engineRuntimeFromConfig(source.benchmark.engine);
143
135
  const engineRun = cloneEngineInvocation(source.benchmark.engine);
144
136
  const engineResolve = cloneEngineInvocation(source.benchmark.engine);
137
+ const candidate = source.candidate;
138
+ const selectedRun = candidate.runs[candidate.selectedRunId];
139
+ if (!selectedRun) {
140
+ throw new Error(`Candidate run not found: ${candidate.selectedRunId}`);
141
+ }
145
142
  return {
146
- version: 3,
143
+ version: 4,
147
144
  name: source.benchmark.name,
148
145
  description: source.benchmark.description,
149
146
  benchmark: {
@@ -151,33 +148,36 @@ function genericSpecFromAuthoredBundle(source) {
151
148
  description: source.benchmark.description,
152
149
  engine: cloneJson(source.benchmark.engine),
153
150
  },
154
- subject: {
155
- name: source.subject.name,
156
- ...(source.subject.description ? { description: source.subject.description } : {}),
157
- files: cloneJson(source.subject.files),
158
- ...(source.subject.prepare ? { prepare: cloneJson(source.subject.prepare) } : {}),
151
+ candidate: {
152
+ name: candidate.name,
153
+ ...(candidate.description ? { description: candidate.description } : {}),
154
+ files: cloneJson(candidate.files),
155
+ ...(candidate.prepare ? { prepare: cloneJson(candidate.prepare) } : {}),
156
+ defaultRun: candidate.defaultRun ?? candidate.selectedRunId,
157
+ selectedRunId: candidate.selectedRunId,
158
+ selectedRunName: selectedRun.name,
159
+ runs: cloneJson(candidate.runs),
160
+ ...(candidate.improve
161
+ ? {
162
+ improve: {
163
+ edits: [...candidate.improve.edits],
164
+ ...(candidate.improve.optimizeOn ? { optimizeOn: cloneJson(candidate.improve.optimizeOn) } : {}),
165
+ ...(candidate.improve.selectBy ? { selectBy: cloneJson(candidate.improve.selectBy) } : {}),
166
+ },
167
+ }
168
+ : {}),
159
169
  },
160
- ...(source.optimizer
161
- ? {
162
- optimizer: {
163
- name: source.optimizer.name,
164
- ...(source.optimizer.description ? { description: source.optimizer.description } : {}),
165
- edits: [...source.optimizer.edits],
166
- },
167
- }
168
- : {}),
169
170
  environment: cloneJson(engineRuntime),
170
171
  adapters: [
171
172
  ...new Set([
172
173
  ...source.benchmark.adapters,
173
- ...source.subject.adapters,
174
- ...(source.optimizer?.adapters ?? []),
174
+ ...candidate.adapters,
175
175
  ]),
176
176
  ],
177
177
  engine: cloneJson(source.benchmark.engine),
178
178
  engineResolve: cloneJson(engineResolve),
179
- ...(source.optimizer ? { improve: cloneJson(source.optimizer.improve) } : {}),
180
- run: cloneJson(source.subject.run),
179
+ ...(candidate.improve ? { improve: clonePhaseAdapter(candidate.improve) } : {}),
180
+ run: clonePhaseAdapter(selectedRun),
181
181
  engineRun: cloneJson(engineRun),
182
182
  };
183
183
  }
@@ -192,7 +192,7 @@ function normalizeBenchmarkRecord(record, label, errors) {
192
192
  "adapters",
193
193
  "engine",
194
194
  ], errors);
195
- requireVersionThree(record.version, label, errors);
195
+ requireVersionFour(record.version, label, errors);
196
196
  const name = readRequiredString(record.name, `${label}.name`, errors);
197
197
  const description = readRequiredString(record.description, `${label}.description`, errors);
198
198
  const adapters = normalizeAdapterSources(record.adapters, `${label}.adapters`, errors);
@@ -202,7 +202,7 @@ function normalizeBenchmarkRecord(record, label, errors) {
202
202
  }
203
203
  return name && description && engine
204
204
  ? {
205
- version: 3,
205
+ version: 4,
206
206
  name,
207
207
  description,
208
208
  adapters,
@@ -222,7 +222,7 @@ function normalizeEngineRuntimeConfig(engine, label, errors) {
222
222
  }
223
223
  }
224
224
  }
225
- function normalizeSubjectRecord(record, label, errors) {
225
+ function normalizeCandidateRecord(record, label, errors, selectedRunId) {
226
226
  if (!record) {
227
227
  return null;
228
228
  }
@@ -233,28 +233,41 @@ function normalizeSubjectRecord(record, label, errors) {
233
233
  "files",
234
234
  "prepare",
235
235
  "adapters",
236
- "run",
236
+ "defaultRun",
237
+ "runs",
238
+ "improve",
239
+ "selectedRunId",
237
240
  ], errors);
238
- requireVersionThree(record.version, label, errors);
241
+ requireVersionFour(record.version, label, errors);
239
242
  const name = readRequiredString(record.name, `${label}.name`, errors);
240
243
  const description = readOptionalString(record.description, `${label}.description`, errors);
241
244
  const files = normalizePathRef(record.files, `${label}.files`, errors);
242
- const prepare = normalizeSubjectPrepare(record.prepare, `${label}.prepare`, errors);
245
+ const prepare = normalizeCandidatePrepare(record.prepare, `${label}.prepare`, errors);
243
246
  const adapters = normalizeAdapterSources(record.adapters, `${label}.adapters`, errors);
244
- const run = normalizePhaseAdapter(record.run, `${label}.run`, errors);
245
- return name && files && run
247
+ const runs = normalizeCandidateRuns(record.runs, `${label}.runs`, errors);
248
+ const defaultRun = readOptionalString(record.defaultRun, `${label}.defaultRun`, errors);
249
+ const embeddedSelectedRun = readOptionalString(record.selectedRunId, `${label}.selectedRunId`, errors);
250
+ const selected = selectedRunId ?? embeddedSelectedRun ?? defaultRun ?? Object.keys(runs).sort()[0];
251
+ if (selected && !runs[selected]) {
252
+ errors.push(`${label}.selectedRunId references unknown run ${selected}.`);
253
+ }
254
+ const improve = normalizeCandidateImprove(record.improve, `${label}.improve`, errors);
255
+ return name && files && selected && Object.keys(runs).length > 0
246
256
  ? {
247
- version: 3,
257
+ version: 4,
248
258
  name,
249
259
  ...(description ? { description } : {}),
250
260
  files,
251
261
  ...(prepare ? { prepare } : {}),
252
262
  adapters,
253
- run,
263
+ ...(defaultRun ? { defaultRun } : {}),
264
+ runs,
265
+ ...(improve ? { improve } : {}),
266
+ selectedRunId: selected,
254
267
  }
255
268
  : null;
256
269
  }
257
- function normalizeSubjectPrepare(value, label, errors) {
270
+ function normalizeCandidatePrepare(value, label, errors) {
258
271
  if (value === undefined) {
259
272
  return undefined;
260
273
  }
@@ -266,38 +279,115 @@ function normalizeSubjectPrepare(value, label, errors) {
266
279
  const command = readRequiredString(record.command, `${label}.command`, errors);
267
280
  return command ? { command } : undefined;
268
281
  }
269
- function normalizeOptimizerRecord(record, label, errors) {
282
+ function normalizeCandidateRuns(value, label, errors) {
283
+ const record = readRequiredRecord(value, label, errors);
270
284
  if (!record) {
271
- return null;
285
+ return {};
272
286
  }
273
- rejectUnknownKeys(record, label, [
274
- "version",
275
- "name",
276
- "description",
277
- "edits",
278
- "adapters",
279
- "improve",
280
- ], errors);
281
- requireVersionThree(record.version, label, errors);
282
- const name = readRequiredString(record.name, `${label}.name`, errors);
283
- const description = readOptionalString(record.description, `${label}.description`, errors);
287
+ const runs = {};
288
+ for (const [runId, runValue] of Object.entries(record).sort(([left], [right]) => left.localeCompare(right))) {
289
+ if (!/^[a-zA-Z0-9][a-zA-Z0-9._-]*$/u.test(runId)) {
290
+ errors.push(`${label}.${runId} must use letters, numbers, dots, underscores, or dashes.`);
291
+ continue;
292
+ }
293
+ const runRecord = readRequiredRecord(runValue, `${label}.${runId}`, errors);
294
+ if (!runRecord) {
295
+ continue;
296
+ }
297
+ rejectUnknownKeys(runRecord, `${label}.${runId}`, ["name", "use", "with", "auth"], errors);
298
+ const name = readRequiredString(runRecord.name, `${label}.${runId}.name`, errors);
299
+ const invocation = normalizePhaseAdapter(adapterRecordFrom(runRecord), `${label}.${runId}`, errors);
300
+ if (name && invocation) {
301
+ runs[runId] = {
302
+ name,
303
+ ...invocation,
304
+ };
305
+ }
306
+ }
307
+ if (Object.keys(runs).length === 0) {
308
+ errors.push(`${label} must declare at least one run.`);
309
+ }
310
+ return runs;
311
+ }
312
+ function normalizeCandidateImprove(value, label, errors) {
313
+ if (value === undefined) {
314
+ return undefined;
315
+ }
316
+ const record = readRequiredRecord(value, label, errors);
317
+ if (!record) {
318
+ return undefined;
319
+ }
320
+ rejectUnknownKeys(record, label, ["edits", "use", "with", "auth", "optimizeOn", "selectBy"], errors);
284
321
  const edits = normalizeRelativePathList(record.edits, `${label}.edits`, errors);
285
- const adapters = normalizeAdapterSources(record.adapters, `${label}.adapters`, errors);
286
- const improve = normalizePhaseAdapter(record.improve, `${label}.improve`, errors);
287
- return name && edits.length > 0 && improve
322
+ const invocation = normalizePhaseAdapter(adapterRecordFrom(record), label, errors);
323
+ const optimizeOn = normalizeCaseSelector(record.optimizeOn, `${label}.optimizeOn`, errors);
324
+ const selectBy = normalizeSelectionSpec(record.selectBy, `${label}.selectBy`, errors);
325
+ return edits.length > 0 && invocation
288
326
  ? {
289
- version: 3,
290
- name,
291
- ...(description ? { description } : {}),
327
+ ...invocation,
292
328
  edits,
293
- adapters,
294
- improve,
329
+ ...(optimizeOn ? { optimizeOn } : {}),
330
+ ...(selectBy ? { selectBy } : {}),
295
331
  }
296
- : null;
332
+ : undefined;
333
+ }
334
+ function normalizeSelectionSpec(value, label, errors) {
335
+ if (value === undefined) {
336
+ return undefined;
337
+ }
338
+ const record = readRequiredRecord(value, label, errors);
339
+ if (!record) {
340
+ return undefined;
341
+ }
342
+ rejectUnknownKeys(record, label, ["metric", "cases"], errors);
343
+ const metric = readRequiredString(record.metric, `${label}.metric`, errors);
344
+ const cases = normalizeCaseSelector(record.cases, `${label}.cases`, errors);
345
+ return metric
346
+ ? {
347
+ metric,
348
+ ...(cases ? { cases } : {}),
349
+ }
350
+ : undefined;
351
+ }
352
+ function normalizeCaseSelector(value, label, errors) {
353
+ if (value === undefined) {
354
+ return undefined;
355
+ }
356
+ const record = readRequiredRecord(value, label, errors);
357
+ if (!record) {
358
+ return undefined;
359
+ }
360
+ rejectUnknownKeys(record, label, ["all", "split"], errors);
361
+ const hasAll = Object.prototype.hasOwnProperty.call(record, "all");
362
+ const hasSplit = Object.prototype.hasOwnProperty.call(record, "split");
363
+ if (hasAll && hasSplit) {
364
+ errors.push(`${label} must specify either all or split, not both.`);
365
+ return undefined;
366
+ }
367
+ if (!hasAll && !hasSplit) {
368
+ errors.push(`${label} must specify all: true or split.`);
369
+ return undefined;
370
+ }
371
+ if (hasAll) {
372
+ if (record.all !== true) {
373
+ errors.push(`${label}.all must be true when provided.`);
374
+ return undefined;
375
+ }
376
+ return { all: true };
377
+ }
378
+ const split = readRequiredString(record.split, `${label}.split`, errors);
379
+ return split ? { split } : undefined;
297
380
  }
298
- function requireVersionThree(value, label, errors) {
299
- if (value !== 3) {
300
- errors.push(`${label}.version must be 3.`);
381
+ function adapterRecordFrom(record) {
382
+ return {
383
+ use: record.use,
384
+ ...(record.with !== undefined ? { with: record.with } : {}),
385
+ ...(record.auth !== undefined ? { auth: record.auth } : {}),
386
+ };
387
+ }
388
+ function requireVersionFour(value, label, errors) {
389
+ if (value !== 4) {
390
+ errors.push(`${label}.version must be 4.`);
301
391
  }
302
392
  }
303
393
  function normalizeRuntime(value, label, errors) {
@@ -417,10 +507,13 @@ function engineRuntimeFromConfig(engine) {
417
507
  };
418
508
  }
419
509
  function cloneEngineInvocation(engine) {
510
+ return clonePhaseAdapter(engine);
511
+ }
512
+ function clonePhaseAdapter(adapter) {
420
513
  return {
421
- use: engine.use,
422
- with: cloneJson(engine.with ?? {}),
423
- ...(engine.auth !== undefined ? { auth: cloneJson(engine.auth) } : {}),
514
+ use: adapter.use,
515
+ with: cloneJson(adapter.with ?? {}),
516
+ ...(adapter.auth !== undefined ? { auth: cloneJson(adapter.auth) } : {}),
424
517
  };
425
518
  }
426
519
  function mergeRuntime(base, override) {