@workbench-ai/workbench-core 0.0.49 → 0.0.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,7 @@ const NUMERIC_USAGE_FIELDS = [
12
12
  "costUsd",
13
13
  ];
14
14
  const USAGE_ROLES = [
15
- "optimizer",
15
+ "improver",
16
16
  "runner",
17
17
  "engine",
18
18
  ];
@@ -44,18 +44,18 @@ export function completeUsageSummary(usage) {
44
44
  if (!usage) {
45
45
  return undefined;
46
46
  }
47
- const optimizer = usage.optimizer ? normalizeExecutionUsage(usage.optimizer) : undefined;
47
+ const improver = usage.improver ? normalizeExecutionUsage(usage.improver) : undefined;
48
48
  const runner = usage.runner ? normalizeExecutionUsage(usage.runner) : undefined;
49
49
  const engine = usage.engine ? normalizeExecutionUsage(usage.engine) : undefined;
50
50
  const roleTotal = mergeExecutionUsage([
51
- optimizer,
51
+ improver,
52
52
  runner,
53
53
  engine,
54
54
  ]);
55
55
  const total = roleTotal ?? normalizeExecutionUsage(usage.total);
56
56
  return compactUsageSummary({
57
57
  ...(total ? { total } : {}),
58
- ...(optimizer ? { optimizer } : {}),
58
+ ...(improver ? { improver } : {}),
59
59
  ...(runner ? { runner } : {}),
60
60
  ...(engine ? { engine } : {}),
61
61
  });
@@ -63,12 +63,12 @@ export function completeUsageSummary(usage) {
63
63
  export function normalizeUsageSummary(value) {
64
64
  const record = jsonRecord(value);
65
65
  const total = normalizeExecutionUsage(record.total);
66
- const optimizer = normalizeExecutionUsage(record.optimizer);
66
+ const improver = normalizeExecutionUsage(record.improver);
67
67
  const runner = normalizeExecutionUsage(record.runner);
68
68
  const engine = normalizeExecutionUsage(record.engine);
69
69
  return completeUsageSummary({
70
70
  ...(total ? { total } : {}),
71
- ...(optimizer ? { optimizer } : {}),
71
+ ...(improver ? { improver } : {}),
72
72
  ...(runner ? { runner } : {}),
73
73
  ...(engine ? { engine } : {}),
74
74
  });
@@ -83,17 +83,17 @@ export function mergeUsageSummaries(summaries) {
83
83
  }
84
84
  return compactUsageSummary({
85
85
  total: mergeExecutionUsage(entries.map((entry) => entry.total)),
86
- optimizer: mergeExecutionUsage(entries.map((entry) => entry.optimizer)),
86
+ improver: mergeExecutionUsage(entries.map((entry) => entry.improver)),
87
87
  runner: mergeExecutionUsage(entries.map((entry) => entry.runner)),
88
88
  engine: mergeExecutionUsage(entries.map((entry) => entry.engine)),
89
89
  });
90
90
  }
91
91
  export function mergeUsageRoles(roles) {
92
- const optimizer = completeUsageSummary(roles.optimizer);
92
+ const improver = completeUsageSummary(roles.improver);
93
93
  const runner = completeUsageSummary(roles.runner);
94
94
  const engine = completeUsageSummary(roles.engine);
95
95
  return completeUsageSummary({
96
- optimizer: optimizer?.optimizer ?? optimizer?.total,
96
+ improver: improver?.improver ?? improver?.total,
97
97
  runner: runner?.runner ?? runner?.total,
98
98
  engine: engine?.engine ?? engine?.total,
99
99
  });
@@ -1,6 +1,7 @@
1
1
  import { type EngineResolveBinding, type SurfaceSnapshotFile, type WorkbenchAdapterInvocation, type WorkbenchExecutionNetworkPolicy, type WorkbenchExecutionResources, type WorkbenchSpecValidation } from "@workbench-ai/workbench-contract";
2
2
  import type { WorkbenchEngineCase, WorkbenchEngineCaseSpec } from "@workbench-ai/workbench-protocol";
3
3
  export declare const BENCHMARK_SPEC_FILE = "benchmark.yaml";
4
+ export declare const CANDIDATE_SPEC_FILE = "candidate.yaml";
4
5
  export interface WorkbenchRuntimeSpec {
5
6
  dockerfile: string;
6
7
  workdir?: string;
@@ -15,42 +16,43 @@ export interface WorkbenchRuntimeSpec {
15
16
  export interface WorkbenchPathRef {
16
17
  path: string;
17
18
  }
18
- export interface WorkbenchSubjectPrepareSpec {
19
+ export interface WorkbenchCandidatePrepareSpec {
19
20
  command: string;
20
21
  }
21
22
  export interface AuthoredBenchmarkSpec {
22
- version: 3;
23
+ version: 4;
23
24
  name: string;
24
25
  description: string;
25
26
  adapters: string[];
26
27
  engine: WorkbenchAdapterInvocation;
27
28
  }
28
- export interface WorkbenchSubjectManifestSpec {
29
- version: 3;
29
+ export interface WorkbenchCandidateRunSpec extends WorkbenchAdapterInvocation {
30
30
  name: string;
31
- description?: string;
32
- files: WorkbenchPathRef;
33
- prepare?: WorkbenchSubjectPrepareSpec;
34
- adapters: string[];
35
- run: WorkbenchAdapterInvocation;
36
31
  }
37
- export type ResolvedSubjectSpec = WorkbenchSubjectManifestSpec;
38
- export interface AuthoredOptimizerSpec {
39
- version: 3;
32
+ export interface WorkbenchCandidateImproveSpec extends WorkbenchAdapterInvocation {
33
+ edits: string[];
34
+ }
35
+ export interface WorkbenchCandidateManifestSpec {
36
+ version: 4;
40
37
  name: string;
41
38
  description?: string;
42
- edits: string[];
39
+ files: WorkbenchPathRef;
40
+ prepare?: WorkbenchCandidatePrepareSpec;
43
41
  adapters: string[];
44
- improve: WorkbenchAdapterInvocation;
42
+ defaultRun?: string;
43
+ runs: Record<string, WorkbenchCandidateRunSpec>;
44
+ improve?: WorkbenchCandidateImproveSpec;
45
+ }
46
+ export interface ResolvedCandidateSpec extends WorkbenchCandidateManifestSpec {
47
+ selectedRunId: string;
45
48
  }
46
49
  export interface WorkbenchResolvedSource {
47
- version: 3;
50
+ version: 4;
48
51
  benchmark: AuthoredBenchmarkSpec;
49
- subject: ResolvedSubjectSpec;
50
- optimizer?: AuthoredOptimizerSpec;
52
+ candidate: ResolvedCandidateSpec;
51
53
  }
52
54
  export interface GenericRunSpec {
53
- version: 3;
55
+ version: 4;
54
56
  name: string;
55
57
  description: string;
56
58
  benchmark: {
@@ -58,16 +60,18 @@ export interface GenericRunSpec {
58
60
  description: string;
59
61
  engine: WorkbenchAdapterInvocation;
60
62
  };
61
- subject: {
63
+ candidate: {
62
64
  name: string;
63
65
  description?: string;
64
66
  files: WorkbenchPathRef;
65
- prepare?: WorkbenchSubjectPrepareSpec;
66
- };
67
- optimizer?: {
68
- name: string;
69
- description?: string;
70
- edits: string[];
67
+ prepare?: WorkbenchCandidatePrepareSpec;
68
+ defaultRun: string;
69
+ selectedRunId: string;
70
+ selectedRunName: string;
71
+ runs: Record<string, WorkbenchCandidateRunSpec>;
72
+ improve?: {
73
+ edits: string[];
74
+ };
71
75
  };
72
76
  environment: WorkbenchRuntimeSpec;
73
77
  adapters: string[];
@@ -91,16 +95,16 @@ export declare function engineResolveBindingForSourceYaml(source: string): Engin
91
95
  export declare function engineResolveBindingForSpec(spec: GenericRunSpec): EngineResolveBinding;
92
96
  export declare function resolveWorkbenchSourceFiles(args: {
93
97
  benchmarkSource: string;
94
- subjectSource: string;
95
- optimizerSource?: string | null;
98
+ candidateSource: string;
99
+ runId?: string | null;
96
100
  }): GenericRunSpec;
97
101
  export declare function parseWorkbenchSourceFiles(args: {
98
102
  benchmarkSource: string;
99
- subjectSource?: string;
100
- optimizerSource?: string | null;
103
+ candidateSource?: string;
104
+ runId?: string | null;
101
105
  }): WorkbenchResolvedSource;
102
106
  export declare function serializeWorkbenchResolvedSourceYaml(source: WorkbenchResolvedSource): string;
103
- export declare function isWorkbenchSubjectManifestPath(filePath: string): boolean;
107
+ export declare function isWorkbenchCandidateManifestPath(filePath: string): boolean;
104
108
  export declare function resolveEngineCaseExecutionConfig(args: {
105
109
  spec: GenericRunSpec;
106
110
  engineCase: GenericEngineCaseSpec;
@@ -1 +1 @@
1
- {"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,KAAK,oBAAoB,EAEzB,KAAK,mBAAmB,EACxB,KAAK,0BAA0B,EAC/B,KAAK,+BAA+B,EACpC,KAAK,2BAA2B,EAChC,KAAK,uBAAuB,EAC7B,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,mBAAmB,EACnB,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAG1C,eAAO,MAAM,mBAAmB,mBAAmB,CAAC;AAEpD,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE;QACV,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;IACF,OAAO,CAAC,EAAE,+BAA+B,CAAC;CAC3C;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,2BAA2B;IAC1C,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;CACpC;AAED,MAAM,WAAW,4BAA4B;IAC3C,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,gBAAgB,CAAC;IACxB,OAAO,CAAC,EAAE,2BAA2B,CAAC;IACtC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAED,MAAM,MAAM,mBAAmB,GAAG,4BAA4B,CAAC;AAE/D,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,EAAE,0BAA0B,CAAC;CACrC;AAED,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,CAAC,CAAC;IACX,SAAS,EAAE,qBAAqB,CAAC;IACjC,OAAO,EAAE,mBAAmB,CAAC;IAC7B,SAAS,CAAC,EAAE,qBAAqB,CAAC;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE;QACT,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,MAAM,EAAE,0BAA0B,CAAC;KACpC,CAAC;IACF,OAAO,EAAE;QACP,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,gBAAgB,CAAC;QACxB,OAAO,CAAC,EAAE,2BAA2B,CAAC;KACvC,CAAC;IACF,SAAS,CAAC,EAAE;QACV,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,MAAM,EAAE,CAAC;KACjB,CAAC;IACF,WAAW,EAAE,oBAAoB,CAAC;IAClC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;IACnC,aAAa,EAAE,0BAA0B,CAAC;IAC1C,OAAO,CAAC,EAAE,0BAA0B,CAAC;IACrC,GAAG,EAAE,0BAA0B,CAAC;IAChC,SAAS,EAAE,0BAA0B,CAAC;CACvC;AAED,MAAM,MAAM,qBAAqB,GAAG,uBAAuB,CAAC;AAC5D,YAAY,EAAE,mBAAmB,EAAE,MAAM,kCAAkC,CAAC;AAE5E,MAAM,WAAW,iCAAiC;IAChD,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,oBAAoB,CAAC;IAClC,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAQD,eAAO,MAAM,2BAA2B,EAAE,2BAKzC,CAAC;AAEF,wBAAgB,mCAAmC,CACjD,MAAM,EAAE,MAAM,GACb,uBAAuB,CAmBzB;AAED,wBAAgB,kCAAkC,CAChD,MAAM,EAAE,MAAM,GACb,cAAc,CAsChB;AAED,wBAAgB,iCAAiC,CAC/C,MAAM,EAAE,MAAM,GACb,oBAAoB,CAEtB;AAED,wBAAgB,2BAA2B,CACzC,IAAI,EAAE,cAAc,GACnB,oBAAoB,CAStB;AAED,wBAAgB,2BAA2B,CAAC,IAAI,EAAE;IAChD,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACjC,GAAG,cAAc,CAMjB;AAED,wBAAgB,yBAAyB,CAAC,IAAI,EAAE;IAC9C,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACjC,GAAG,uBAAuB,CA4B1B;AAED,wBAAgB,oCAAoC,CAClD,MAAM,EAAE,uBAAuB,GAC9B,MAAM,CAER;AAED,wBAAgB,8BAA8B,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAIxE;AAED,wBAAgB,gCAAgC,CAAC,IAAI,EAAE;IACrD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,qBAAqB,CAAC;CACnC,GAAG,iCAAiC,CAMpC;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE,cAAc,GAAG,0BAA0B,CAE/F;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE;IACnD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,mBAAmB,CAAC;CACjC,GAAG,mBAAmB,EAAE,CAGxB;AAED,wBAAgB,qBAAqB,CACnC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,sBAAsB,CACpC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,oBAAoB,GAC5B,2BAA2B,CAiB7B;AAED,wBAAgB,cAAc,CAC5B,OAAO,EAAE,oBAAoB,GAC5B,+BAA+B,CAEjC;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM,CAEvE"}
1
+ {"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,KAAK,oBAAoB,EAEzB,KAAK,mBAAmB,EACxB,KAAK,0BAA0B,EAC/B,KAAK,+BAA+B,EACpC,KAAK,2BAA2B,EAChC,KAAK,uBAAuB,EAC7B,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,mBAAmB,EACnB,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAG1C,eAAO,MAAM,mBAAmB,mBAAmB,CAAC;AACpD,eAAO,MAAM,mBAAmB,mBAAmB,CAAC;AAEpD,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE;QACV,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;IACF,OAAO,CAAC,EAAE,+BAA+B,CAAC;CAC3C;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,6BAA6B;IAC5C,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;CACpC;AAED,MAAM,WAAW,yBAA0B,SAAQ,0BAA0B;IAC3E,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,6BAA8B,SAAQ,0BAA0B;IAC/E,KAAK,EAAE,MAAM,EAAE,CAAC;CACjB;AAED,MAAM,WAAW,8BAA8B;IAC7C,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,gBAAgB,CAAC;IACxB,OAAO,CAAC,EAAE,6BAA6B,CAAC;IACxC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,yBAAyB,CAAC,CAAC;IAChD,OAAO,CAAC,EAAE,6BAA6B,CAAC;CACzC;AAED,MAAM,WAAW,qBAAsB,SAAQ,8BAA8B;IAC3E,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,CAAC,CAAC;IACX,SAAS,EAAE,qBAAqB,CAAC;IACjC,SAAS,EAAE,qBAAqB,CAAC;CAClC;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE;QACT,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,MAAM,EAAE,0BAA0B,CAAC;KACpC,CAAC;IACF,SAAS,EAAE;QACT,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,gBAAgB,CAAC;QACxB,OAAO,CAAC,EAAE,6BAA6B,CAAC;QACxC,UAAU,EAAE,MAAM,CAAC;QACnB,aAAa,EAAE,MAAM,CAAC;QACtB,eAAe,EAAE,MAAM,CAAC;QACxB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,yBAAyB,CAAC,CAAC;QAChD,OAAO,CAAC,EAAE;YACR,KAAK,EAAE,MAAM,EAAE,CAAC;SACjB,CAAC;KACH,CAAC;IACF,WAAW,EAAE,oBAAoB,CAAC;IAClC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;IACnC,aAAa,EAAE,0BAA0B,CAAC;IAC1C,OAAO,CAAC,EAAE,0BAA0B,CAAC;IACrC,GAAG,EAAE,0BAA0B,CAAC;IAChC,SAAS,EAAE,0BAA0B,CAAC;CACvC;AAED,MAAM,MAAM,qBAAqB,GAAG,uBAAuB,CAAC;AAC5D,YAAY,EAAE,mBAAmB,EAAE,MAAM,kCAAkC,CAAC;AAE5E,MAAM,WAAW,iCAAiC;IAChD,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,oBAAoB,CAAC;IAClC,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAQD,eAAO,MAAM,2BAA2B,EAAE,2BAKzC,CAAC;AAEF,wBAAgB,mCAAmC,CACjD,MAAM,EAAE,MAAM,GACb,uBAAuB,CAmBzB;AAED,wBAAgB,kCAAkC,CAChD,MAAM,EAAE,MAAM,GACb,cAAc,CA6BhB;AAED,wBAAgB,iCAAiC,CAC/C,MAAM,EAAE,MAAM,GACb,oBAAoB,CAEtB;AAED,wBAAgB,2BAA2B,CACzC,IAAI,EAAE,cAAc,GACnB,oBAAoB,CAStB;AAED,wBAAgB,2BAA2B,CAAC,IAAI,EAAE;IAChD,eAAe,EAAE,MAAM,CAAC;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACvB,GAAG,cAAc,CAMjB;AAED,wBAAgB,yBAAyB,CAAC,IAAI,EAAE;IAC9C,eAAe,EAAE,MAAM,CAAC;IACxB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACvB,GAAG,uBAAuB,CAqB1B;AAED,wBAAgB,oCAAoC,CAClD,MAAM,EAAE,uBAAuB,GAC9B,MAAM,CAER;AAED,wBAAgB,gCAAgC,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAI1E;AAED,wBAAgB,gCAAgC,CAAC,IAAI,EAAE;IACrD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,qBAAqB,CAAC;CACnC,GAAG,iCAAiC,CAMpC;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE,cAAc,GAAG,0BAA0B,CAE/F;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE;IACnD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,mBAAmB,CAAC;CACjC,GAAG,mBAAmB,EAAE,CAGxB;AAED,wBAAgB,qBAAqB,CACnC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,sBAAsB,CACpC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,oBAAoB,GAC5B,2BAA2B,CAiB7B;AAED,wBAAgB,cAAc,CAC5B,OAAO,EAAE,oBAAoB,GAC5B,+BAA+B,CAEjC;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM,CAEvE"}
@@ -2,6 +2,7 @@ import { createHash } from "node:crypto";
2
2
  import { isWorkbenchExecutionNetworkEgress, } from "@workbench-ai/workbench-contract";
3
3
  import YAML from "yaml";
4
4
  export const BENCHMARK_SPEC_FILE = "benchmark.yaml";
5
+ export const CANDIDATE_SPEC_FILE = "candidate.yaml";
5
6
  export const DEFAULT_EXECUTION_RESOURCES = {
6
7
  cpu: 2,
7
8
  memoryGb: 4,
@@ -35,25 +36,20 @@ export function resolveWorkbenchResolvedSourceYaml(source) {
35
36
  rejectUnknownKeys(parsed, "resolved Workbench source", [
36
37
  "version",
37
38
  "benchmark",
38
- "subject",
39
- "optimizer",
39
+ "candidate",
40
40
  ], errors);
41
- if (parsed.version !== 3) {
42
- throw new Error("Resolved Workbench source version must be 3.");
41
+ if (parsed.version !== 4) {
42
+ throw new Error("Resolved Workbench source version must be 4.");
43
43
  }
44
44
  const benchmark = normalizeBenchmarkRecord(readRequiredRecord(parsed.benchmark, "resolved Workbench source.benchmark", errors), "benchmark.yaml", errors);
45
- const subject = normalizeSubjectRecord(readRequiredRecord(parsed.subject, "resolved Workbench source.subject", errors), "resolved Workbench source.subject", errors);
46
- const optimizer = parsed.optimizer === undefined
47
- ? undefined
48
- : normalizeOptimizerRecord(readRequiredRecord(parsed.optimizer, "resolved Workbench source.optimizer", errors), "optimizer YAML", errors);
45
+ const candidate = normalizeCandidateRecord(readRequiredRecord(parsed.candidate, "resolved Workbench source.candidate", errors), "resolved Workbench source.candidate", errors);
49
46
  if (errors.length > 0) {
50
47
  throw new Error(errors.join("\n"));
51
48
  }
52
49
  return genericSpecFromAuthoredBundle({
53
- version: 3,
50
+ version: 4,
54
51
  benchmark: benchmark,
55
- subject: subject,
56
- ...(optimizer ? { optimizer } : {}),
52
+ candidate: candidate,
57
53
  });
58
54
  }
59
55
  export function engineResolveBindingForSourceYaml(source) {
@@ -72,32 +68,28 @@ export function engineResolveBindingForSpec(spec) {
72
68
  export function resolveWorkbenchSourceFiles(args) {
73
69
  return genericSpecFromAuthoredBundle(parseWorkbenchSourceFiles({
74
70
  benchmarkSource: args.benchmarkSource,
75
- subjectSource: args.subjectSource,
76
- optimizerSource: args.optimizerSource,
71
+ candidateSource: args.candidateSource,
72
+ runId: args.runId,
77
73
  }));
78
74
  }
79
75
  export function parseWorkbenchSourceFiles(args) {
80
76
  const errors = [];
81
77
  const benchmark = normalizeBenchmarkRecord(parseYamlRecord(args.benchmarkSource, BENCHMARK_SPEC_FILE), BENCHMARK_SPEC_FILE, errors);
82
- const subject = normalizeSubjectRecord(parseYamlRecord(args.subjectSource ?? "", "subject YAML"), "subject YAML", errors);
83
- const optimizer = args.optimizerSource?.trim()
84
- ? normalizeOptimizerRecord(parseYamlRecord(args.optimizerSource, "optimizer YAML"), "optimizer YAML", errors)
85
- : undefined;
78
+ const candidate = normalizeCandidateRecord(parseYamlRecord(args.candidateSource ?? "", "candidate YAML"), "candidate YAML", errors, args.runId ?? undefined);
86
79
  if (errors.length > 0) {
87
80
  throw new Error(errors.join("\n"));
88
81
  }
89
82
  return {
90
- version: 3,
83
+ version: 4,
91
84
  benchmark: benchmark,
92
- subject: subject,
93
- ...(optimizer ? { optimizer } : {}),
85
+ candidate: candidate,
94
86
  };
95
87
  }
96
88
  export function serializeWorkbenchResolvedSourceYaml(source) {
97
89
  return YAML.stringify(source).trimEnd() + "\n";
98
90
  }
99
- export function isWorkbenchSubjectManifestPath(filePath) {
100
- return /^subjects\/[^/]+\/subject\.ya?ml$/iu.test(filePath.replace(/\\/gu, "/").replace(/^\/+/u, "").replace(/^(?:\.\/)+/u, ""));
91
+ export function isWorkbenchCandidateManifestPath(filePath) {
92
+ return /^candidates\/[^/]+\/candidate\.ya?ml$/iu.test(filePath.replace(/\\/gu, "/").replace(/^\/+/u, "").replace(/^(?:\.\/)+/u, ""));
101
93
  }
102
94
  export function resolveEngineCaseExecutionConfig(args) {
103
95
  return {
@@ -142,8 +134,13 @@ function genericSpecFromAuthoredBundle(source) {
142
134
  const engineRuntime = engineRuntimeFromConfig(source.benchmark.engine);
143
135
  const engineRun = cloneEngineInvocation(source.benchmark.engine);
144
136
  const engineResolve = cloneEngineInvocation(source.benchmark.engine);
137
+ const candidate = source.candidate;
138
+ const selectedRun = candidate.runs[candidate.selectedRunId];
139
+ if (!selectedRun) {
140
+ throw new Error(`Candidate run not found: ${candidate.selectedRunId}`);
141
+ }
145
142
  return {
146
- version: 3,
143
+ version: 4,
147
144
  name: source.benchmark.name,
148
145
  description: source.benchmark.description,
149
146
  benchmark: {
@@ -151,33 +148,34 @@ function genericSpecFromAuthoredBundle(source) {
151
148
  description: source.benchmark.description,
152
149
  engine: cloneJson(source.benchmark.engine),
153
150
  },
154
- subject: {
155
- name: source.subject.name,
156
- ...(source.subject.description ? { description: source.subject.description } : {}),
157
- files: cloneJson(source.subject.files),
158
- ...(source.subject.prepare ? { prepare: cloneJson(source.subject.prepare) } : {}),
151
+ candidate: {
152
+ name: candidate.name,
153
+ ...(candidate.description ? { description: candidate.description } : {}),
154
+ files: cloneJson(candidate.files),
155
+ ...(candidate.prepare ? { prepare: cloneJson(candidate.prepare) } : {}),
156
+ defaultRun: candidate.defaultRun ?? candidate.selectedRunId,
157
+ selectedRunId: candidate.selectedRunId,
158
+ selectedRunName: selectedRun.name,
159
+ runs: cloneJson(candidate.runs),
160
+ ...(candidate.improve
161
+ ? {
162
+ improve: {
163
+ edits: [...candidate.improve.edits],
164
+ },
165
+ }
166
+ : {}),
159
167
  },
160
- ...(source.optimizer
161
- ? {
162
- optimizer: {
163
- name: source.optimizer.name,
164
- ...(source.optimizer.description ? { description: source.optimizer.description } : {}),
165
- edits: [...source.optimizer.edits],
166
- },
167
- }
168
- : {}),
169
168
  environment: cloneJson(engineRuntime),
170
169
  adapters: [
171
170
  ...new Set([
172
171
  ...source.benchmark.adapters,
173
- ...source.subject.adapters,
174
- ...(source.optimizer?.adapters ?? []),
172
+ ...candidate.adapters,
175
173
  ]),
176
174
  ],
177
175
  engine: cloneJson(source.benchmark.engine),
178
176
  engineResolve: cloneJson(engineResolve),
179
- ...(source.optimizer ? { improve: cloneJson(source.optimizer.improve) } : {}),
180
- run: cloneJson(source.subject.run),
177
+ ...(candidate.improve ? { improve: clonePhaseAdapter(candidate.improve) } : {}),
178
+ run: clonePhaseAdapter(selectedRun),
181
179
  engineRun: cloneJson(engineRun),
182
180
  };
183
181
  }
@@ -192,7 +190,7 @@ function normalizeBenchmarkRecord(record, label, errors) {
192
190
  "adapters",
193
191
  "engine",
194
192
  ], errors);
195
- requireVersionThree(record.version, label, errors);
193
+ requireVersionFour(record.version, label, errors);
196
194
  const name = readRequiredString(record.name, `${label}.name`, errors);
197
195
  const description = readRequiredString(record.description, `${label}.description`, errors);
198
196
  const adapters = normalizeAdapterSources(record.adapters, `${label}.adapters`, errors);
@@ -202,7 +200,7 @@ function normalizeBenchmarkRecord(record, label, errors) {
202
200
  }
203
201
  return name && description && engine
204
202
  ? {
205
- version: 3,
203
+ version: 4,
206
204
  name,
207
205
  description,
208
206
  adapters,
@@ -222,7 +220,7 @@ function normalizeEngineRuntimeConfig(engine, label, errors) {
222
220
  }
223
221
  }
224
222
  }
225
- function normalizeSubjectRecord(record, label, errors) {
223
+ function normalizeCandidateRecord(record, label, errors, selectedRunId) {
226
224
  if (!record) {
227
225
  return null;
228
226
  }
@@ -233,28 +231,41 @@ function normalizeSubjectRecord(record, label, errors) {
233
231
  "files",
234
232
  "prepare",
235
233
  "adapters",
236
- "run",
234
+ "defaultRun",
235
+ "runs",
236
+ "improve",
237
+ "selectedRunId",
237
238
  ], errors);
238
- requireVersionThree(record.version, label, errors);
239
+ requireVersionFour(record.version, label, errors);
239
240
  const name = readRequiredString(record.name, `${label}.name`, errors);
240
241
  const description = readOptionalString(record.description, `${label}.description`, errors);
241
242
  const files = normalizePathRef(record.files, `${label}.files`, errors);
242
- const prepare = normalizeSubjectPrepare(record.prepare, `${label}.prepare`, errors);
243
+ const prepare = normalizeCandidatePrepare(record.prepare, `${label}.prepare`, errors);
243
244
  const adapters = normalizeAdapterSources(record.adapters, `${label}.adapters`, errors);
244
- const run = normalizePhaseAdapter(record.run, `${label}.run`, errors);
245
- return name && files && run
245
+ const runs = normalizeCandidateRuns(record.runs, `${label}.runs`, errors);
246
+ const defaultRun = readOptionalString(record.defaultRun, `${label}.defaultRun`, errors);
247
+ const embeddedSelectedRun = readOptionalString(record.selectedRunId, `${label}.selectedRunId`, errors);
248
+ const selected = selectedRunId ?? embeddedSelectedRun ?? defaultRun ?? Object.keys(runs).sort()[0];
249
+ if (selected && !runs[selected]) {
250
+ errors.push(`${label}.selectedRunId references unknown run ${selected}.`);
251
+ }
252
+ const improve = normalizeCandidateImprove(record.improve, `${label}.improve`, errors);
253
+ return name && files && selected && Object.keys(runs).length > 0
246
254
  ? {
247
- version: 3,
255
+ version: 4,
248
256
  name,
249
257
  ...(description ? { description } : {}),
250
258
  files,
251
259
  ...(prepare ? { prepare } : {}),
252
260
  adapters,
253
- run,
261
+ ...(defaultRun ? { defaultRun } : {}),
262
+ runs,
263
+ ...(improve ? { improve } : {}),
264
+ selectedRunId: selected,
254
265
  }
255
266
  : null;
256
267
  }
257
- function normalizeSubjectPrepare(value, label, errors) {
268
+ function normalizeCandidatePrepare(value, label, errors) {
258
269
  if (value === undefined) {
259
270
  return undefined;
260
271
  }
@@ -266,38 +277,64 @@ function normalizeSubjectPrepare(value, label, errors) {
266
277
  const command = readRequiredString(record.command, `${label}.command`, errors);
267
278
  return command ? { command } : undefined;
268
279
  }
269
- function normalizeOptimizerRecord(record, label, errors) {
280
+ function normalizeCandidateRuns(value, label, errors) {
281
+ const record = readRequiredRecord(value, label, errors);
270
282
  if (!record) {
271
- return null;
283
+ return {};
272
284
  }
273
- rejectUnknownKeys(record, label, [
274
- "version",
275
- "name",
276
- "description",
277
- "edits",
278
- "adapters",
279
- "improve",
280
- ], errors);
281
- requireVersionThree(record.version, label, errors);
282
- const name = readRequiredString(record.name, `${label}.name`, errors);
283
- const description = readOptionalString(record.description, `${label}.description`, errors);
285
+ const runs = {};
286
+ for (const [runId, runValue] of Object.entries(record).sort(([left], [right]) => left.localeCompare(right))) {
287
+ if (!/^[a-zA-Z0-9][a-zA-Z0-9._-]*$/u.test(runId)) {
288
+ errors.push(`${label}.${runId} must use letters, numbers, dots, underscores, or dashes.`);
289
+ continue;
290
+ }
291
+ const runRecord = readRequiredRecord(runValue, `${label}.${runId}`, errors);
292
+ if (!runRecord) {
293
+ continue;
294
+ }
295
+ rejectUnknownKeys(runRecord, `${label}.${runId}`, ["name", "use", "with", "auth"], errors);
296
+ const name = readRequiredString(runRecord.name, `${label}.${runId}.name`, errors);
297
+ const invocation = normalizePhaseAdapter(adapterRecordFrom(runRecord), `${label}.${runId}`, errors);
298
+ if (name && invocation) {
299
+ runs[runId] = {
300
+ name,
301
+ ...invocation,
302
+ };
303
+ }
304
+ }
305
+ if (Object.keys(runs).length === 0) {
306
+ errors.push(`${label} must declare at least one run.`);
307
+ }
308
+ return runs;
309
+ }
310
+ function normalizeCandidateImprove(value, label, errors) {
311
+ if (value === undefined) {
312
+ return undefined;
313
+ }
314
+ const record = readRequiredRecord(value, label, errors);
315
+ if (!record) {
316
+ return undefined;
317
+ }
318
+ rejectUnknownKeys(record, label, ["edits", "use", "with", "auth"], errors);
284
319
  const edits = normalizeRelativePathList(record.edits, `${label}.edits`, errors);
285
- const adapters = normalizeAdapterSources(record.adapters, `${label}.adapters`, errors);
286
- const improve = normalizePhaseAdapter(record.improve, `${label}.improve`, errors);
287
- return name && edits.length > 0 && improve
320
+ const invocation = normalizePhaseAdapter(adapterRecordFrom(record), label, errors);
321
+ return edits.length > 0 && invocation
288
322
  ? {
289
- version: 3,
290
- name,
291
- ...(description ? { description } : {}),
323
+ ...invocation,
292
324
  edits,
293
- adapters,
294
- improve,
295
325
  }
296
- : null;
326
+ : undefined;
297
327
  }
298
- function requireVersionThree(value, label, errors) {
299
- if (value !== 3) {
300
- errors.push(`${label}.version must be 3.`);
328
+ function adapterRecordFrom(record) {
329
+ return {
330
+ use: record.use,
331
+ ...(record.with !== undefined ? { with: record.with } : {}),
332
+ ...(record.auth !== undefined ? { auth: record.auth } : {}),
333
+ };
334
+ }
335
+ function requireVersionFour(value, label, errors) {
336
+ if (value !== 4) {
337
+ errors.push(`${label}.version must be 4.`);
301
338
  }
302
339
  }
303
340
  function normalizeRuntime(value, label, errors) {
@@ -417,10 +454,13 @@ function engineRuntimeFromConfig(engine) {
417
454
  };
418
455
  }
419
456
  function cloneEngineInvocation(engine) {
457
+ return clonePhaseAdapter(engine);
458
+ }
459
+ function clonePhaseAdapter(adapter) {
420
460
  return {
421
- use: engine.use,
422
- with: cloneJson(engine.with ?? {}),
423
- ...(engine.auth !== undefined ? { auth: cloneJson(engine.auth) } : {}),
461
+ use: adapter.use,
462
+ with: cloneJson(adapter.with ?? {}),
463
+ ...(adapter.auth !== undefined ? { auth: cloneJson(adapter.auth) } : {}),
424
464
  };
425
465
  }
426
466
  function mergeRuntime(base, override) {