elasticdash-test 0.1.20-alpha-21 → 0.1.20-alpha-22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,4 +20,19 @@ export declare function submitTestRun(serverUrl: string, apiKey: string, testGro
20
20
  export declare function createBatch(serverUrl: string, apiKey: string, payload: Record<string, unknown>): Promise<{
21
21
  id: number;
22
22
  }>;
23
+ /**
24
+ * Resolved evaluator configuration from the backend.
25
+ * Provider/model/apiKey may be null if the user has not configured an evaluator.
26
+ */
27
+ export interface EvaluatorConfig {
28
+ provider: string | null;
29
+ model: string | null;
30
+ apiKey: string | null;
31
+ }
32
+ /**
33
+ * Fetch the project's evaluator config (with user-level fallback).
34
+ * Used by ed-test llm_judge benchmarks when judge_provider/judge_model
35
+ * are not specified in the test definition.
36
+ */
37
+ export declare function fetchEvaluatorConfig(serverUrl: string, apiKey: string): Promise<EvaluatorConfig>;
23
38
  //# sourceMappingURL=api-client.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"api-client.d.ts","sourceRoot":"","sources":["../../src/ci/api-client.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AA0C9C;;;GAGG;AACH,wBAAsB,eAAe,CACnC,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE;IAAE,YAAY,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GACpE,OAAO,CAAC,YAAY,EAAE,CAAC,CAUzB;AAED;;GAEG;AACH,wBAAsB,aAAa,CACjC,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,EACd,WAAW,EAAE,MAAM,EACnB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,OAAO,CAAC;IAAE,EAAE,EAAE,MAAM,CAAA;CAAE,CAAC,CAOzB;AAED;;GAEG;AACH,wBAAsB,WAAW,CAC/B,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,OAAO,CAAC;IAAE,EAAE,EAAE,MAAM,CAAA;CAAE,CAAC,CAOzB"}
1
+ {"version":3,"file":"api-client.d.ts","sourceRoot":"","sources":["../../src/ci/api-client.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AA0C9C;;;GAGG;AACH,wBAAsB,eAAe,CACnC,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE;IAAE,YAAY,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GACpE,OAAO,CAAC,YAAY,EAAE,CAAC,CAUzB;AAED;;GAEG;AACH,wBAAsB,aAAa,CACjC,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,EACd,WAAW,EAAE,MAAM,EACnB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,OAAO,CAAC;IAAE,EAAE,EAAE,MAAM,CAAA;CAAE,CAAC,CAOzB;AAED;;GAEG;AACH,wBAAsB,WAAW,CAC/B,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,OAAO,CAAC;IAAE,EAAE,EAAE,MAAM,CAAA;CAAE,CAAC,CAOzB;AAED;;;GAGG;AACH,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAA;IACvB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAA;IACpB,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;CACtB;AAED;;;;GAIG;AACH,wBAAsB,oBAAoB,CACxC,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,eAAe,CAAC,CAI1B"}
@@ -68,4 +68,14 @@ export async function createBatch(serverUrl, apiKey, payload) {
68
68
  body: JSON.stringify(payload),
69
69
  });
70
70
  }
71
+ /**
72
+ * Fetch the project's evaluator config (with user-level fallback).
73
+ * Used by ed-test llm_judge benchmarks when judge_provider/judge_model
74
+ * are not specified in the test definition.
75
+ */
76
+ export async function fetchEvaluatorConfig(serverUrl, apiKey) {
77
+ const base = normalizeBase(serverUrl);
78
+ const url = `${base}/api/test-runs/evaluator-config`;
79
+ return apiRequest(url, apiKey);
80
+ }
71
81
  //# sourceMappingURL=api-client.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"api-client.js","sourceRoot":"","sources":["../../src/ci/api-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;AAG1D,gEAAgE;AAChE,sDAAsD;AAEtD,iGAAiG;AACjG,SAAS,aAAa,CAAC,SAAiB;IACtC,OAAO,SAAS,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;AAC5D,CAAC;AAED,SAAS,OAAO,CAAC,MAAc;IAC7B,OAAO;QACL,cAAc,EAAE,kBAAkB;QAClC,SAAS,EAAE,MAAM,IAAI,EAAE;QACvB,kBAAkB,EAAE,UAAU,EAAE;KACjC,CAAA;AACH,CAAC;AAED,KAAK,UAAU,UAAU,CACvB,GAAW,EACX,MAAc,EACd,UAAuB,EAAE;IAEzB,MAAM,MAAM,GAAG,CAAC,OAAO,CAAC,MAAM,IAAI,KAAK,CAAC,CAAC,WAAW,EAAE,CAAA;IACtD,OAAO,CAAC,GAAG,CAAC,oBAAoB,MAAM,IAAI,GAAG,EAAE,CAAC,CAAA;IAEhD,MAAM,GAAG,GAAG,MAAM,gBAAgB,EAAE,CAAC,GAAG,EAAE;QACxC,GAAG,OAAO;QACV,OAAO,EAAE,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,EAAE,GAAG,CAAC,OAAO,CAAC,OAAiC,IAAI,EAAE,CAAC,EAAE;KACtF,CAAC,CAAA;IAEF,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACZ,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAA;QAC7C,OAAO,CAAC,GAAG,CAAC,oBAAoB,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAA;QAC1F,MAAM,IAAI,KAAK,CAAC,OAAO,GAAG,CAAC,MAAM,KAAK,IAAI,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAA;IACjE,CAAC;IAED,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAA8B,CAAA;IACzD,kFAAkF;IAClF,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAM,CAAA;AAChD,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,SAAiB,EACjB,MAAc,EACd,OAAqE;IAErE,MAAM,IAAI,GAAG,aAAa,CAAC,SAAS,CAAC,CAAA;IACrC,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAA;IACpC,IAAI,OAAO,EAAE,YAAY;QAAE,MAAM,CAAC,GAAG,CAAC,cAAc,EAAE,OAAO,CAAC,YAAY,CAAC,CAAA;IAC3E,IAAI,OAAO,EAAE,IAAI,EAAE,MAAM;QAAE,MAAM,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;IACrE,IAAI,OAAO,EAAE,MAAM;QAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,MAAM,CAAC,CAAA;IAEzD,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAA;IAC5B,MAAM,GAAG,GAAG,GAAG,IAAI,6BAA6B,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAA;IACpE,OAAO,UAAU,CAAiB,GAAG,EAAE,MAAM,CAAC,CAAA;AAChD,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,SAAiB,EACjB,MAAc,EACd,WAAmB,EACnB,OAAgC;IAEhC,MAAM,IAAI,GAAG,aAAa,CAAC,SAAS,CAAC,CAAA;IACrC,MAAM,GAAG,GAAG,GAAG,IAAI,mBAAmB,WAAW,OAAO,CAAA;IACxD,OAAO,UAAU,CAAiB,GAAG,EAAE,MAAM,EAAE;QAC7C,MAAM,EAAE,MAAM;QACd,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;KAC9B,CAAC,CAAA;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,SAAiB,EACjB,MAAc,EACd,OAAgC;IAEhC,MAAM,IAAI,GAAG,aAAa,CAAC,SAAS,CAAC,CAAA;IACrC,MAAM,GAAG,GAAG,GAAG,IAAI,yBAAyB,CAAA;IAC5C,OAAO,UAAU,CAAiB,GAAG,EAAE,MAAM,EAAE;QAC7C,MAAM,EAAE,MAAM;QACd,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;KAC9B,CAAC,CAAA;AACJ,CAAC"}
1
+ {"version":3,"file":"api-client.js","sourceRoot":"","sources":["../../src/ci/api-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;AAG1D,gEAAgE;AAChE,sDAAsD;AAEtD,iGAAiG;AACjG,SAAS,aAAa,CAAC,SAAiB;IACtC,OAAO,SAAS,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;AAC5D,CAAC;AAED,SAAS,OAAO,CAAC,MAAc;IAC7B,OAAO;QACL,cAAc,EAAE,kBAAkB;QAClC,SAAS,EAAE,MAAM,IAAI,EAAE;QACvB,kBAAkB,EAAE,UAAU,EAAE;KACjC,CAAA;AACH,CAAC;AAED,KAAK,UAAU,UAAU,CACvB,GAAW,EACX,MAAc,EACd,UAAuB,EAAE;IAEzB,MAAM,MAAM,GAAG,CAAC,OAAO,CAAC,MAAM,IAAI,KAAK,CAAC,CAAC,WAAW,EAAE,CAAA;IACtD,OAAO,CAAC,GAAG,CAAC,oBAAoB,MAAM,IAAI,GAAG,EAAE,CAAC,CAAA;IAEhD,MAAM,GAAG,GAAG,MAAM,gBAAgB,EAAE,CAAC,GAAG,EAAE;QACxC,GAAG,OAAO;QACV,OAAO,EAAE,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,EAAE,GAAG,CAAC,OAAO,CAAC,OAAiC,IAAI,EAAE,CAAC,EAAE;KACtF,CAAC,CAAA;IAEF,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACZ,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAA;QAC7C,OAAO,CAAC,GAAG,CAAC,oBAAoB,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAA;QAC1F,MAAM,IAAI,KAAK,CAAC,OAAO,GAAG,CAAC,MAAM,KAAK,IAAI,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAA;IACjE,CAAC;IAED,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAA8B,CAAA;IACzD,kFAAkF;IAClF,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAM,CAAA;AAChD,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,SAAiB,EACjB,MAAc,EACd,OAAqE;IAErE,MAAM,IAAI,GAAG,aAAa,CAAC,SAAS,CAAC,CAAA;IACrC,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAA;IACpC,IAAI,OAAO,EAAE,YAAY;QAAE,MAAM,CAAC,GAAG,CAAC,cAAc,EAAE,OAAO,CAAC,YAAY,CAAC,CAAA;IAC3E,IAAI,OAAO,EAAE,IAAI,EAAE,MAAM;QAAE,MAAM,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;IACrE,IAAI,OAAO,EAAE,MAAM;QAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,MAAM,CAAC,CAAA;IAEzD,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAA;IAC5B,MAAM,GAAG,GAAG,GAAG,IAAI,6BAA6B,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAA;IACpE,OAAO,UAAU,CAAiB,GAAG,EAAE,MAAM,CAAC,CAAA;AAChD,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,SAAiB,EACjB,MAAc,EACd,WAAmB,EACnB,OAAgC;IAEhC,MAAM,IAAI,GAAG,aAAa,CAAC,SAAS,CAAC,CAAA;IACrC,MAAM,GAAG,GAAG,GAAG,IAAI,mBAAmB,WAAW,OAAO,CAAA;IACxD,OAAO,UAAU,CAAiB,GAAG,EAAE,MAAM,EAAE;QAC7C,MAAM,EAAE,MAAM;QACd,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;KAC9B,CAAC,CAAA;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,SAAiB,EACjB,MAAc,EACd,OAAgC;IAEhC,MAAM,IAAI,GAAG,aAAa,CAAC,SAAS,CAAC,CAAA;IACrC,MAAM,GAAG,GAAG,GAAG,IAAI,yBAAyB,CAAA;IAC5C,OAAO,UAAU,CAAiB,GAAG,EAAE,MAAM,EAAE;QAC7C,MAAM,EAAE,MAAM;QACd,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;KAC9B,CAAC,CAAA;AACJ,CAAC;AAYD;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,SAAiB,EACjB,MAAc;IAEd,MAAM,IAAI,GAAG,aAAa,CAAC,SAAS,CAAC,CAAA;IACrC,MAAM,GAAG,GAAG,GAAG,IAAI,iCAAiC,CAAA;IACpD,OAAO,UAAU,CAAkB,GAAG,EAAE,MAAM,CAAC,CAAA;AACjD,CAAC"}
@@ -8,6 +8,7 @@
8
8
  */
9
9
  import type { TestMeasurement } from './measurement.js';
10
10
  import type { TestBenchmarks } from './test-registry.js';
11
+ import type { EvaluatorConfig } from './api-client.js';
11
12
  export type MetricName = 'duration_ms' | 'tokens_total' | 'output_contains' | 'output_not_contains' | 'llm_judge';
12
13
  export interface MetricResult {
13
14
  name: MetricName;
@@ -24,6 +25,9 @@ export interface BenchmarkResult {
24
25
  /**
25
26
  * Compare a measurement against benchmarks. Async because llm_judge requires
26
27
  * an LLM call. The step's output is needed for output_contains/llm_judge checks.
28
+ *
29
+ * @param evaluatorConfig - Optional backend evaluator config used as fallback
30
+ * when the test does not specify judge_provider/judge_model.
27
31
  */
28
- export declare function compareBenchmarks(measurement: TestMeasurement, benchmarks: TestBenchmarks, stepOutput?: unknown): Promise<BenchmarkResult>;
32
+ export declare function compareBenchmarks(measurement: TestMeasurement, benchmarks: TestBenchmarks, stepOutput?: unknown, evaluatorConfig?: EvaluatorConfig | null): Promise<BenchmarkResult>;
29
33
  //# sourceMappingURL=benchmark.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../../src/ci/benchmark.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAA;AACvD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAA;AAExD,MAAM,MAAM,UAAU,GAAG,aAAa,GAAG,cAAc,GAAG,iBAAiB,GAAG,qBAAqB,GAAG,WAAW,CAAA;AAEjH,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,UAAU,CAAA;IAChB,KAAK,EAAE,MAAM,CAAA;IACb,SAAS,EAAE,MAAM,CAAA;IACjB,MAAM,EAAE,OAAO,CAAA;IACf,MAAM,CAAC,EAAE,MAAM,CAAA;CAChB;AAED,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,OAAO,CAAA;IACf,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,OAAO,EAAE,YAAY,EAAE,CAAA;CACxB;AAED;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,WAAW,EAAE,eAAe,EAC5B,UAAU,EAAE,cAAc,EAC1B,UAAU,CAAC,EAAE,OAAO,GACnB,OAAO,CAAC,eAAe,CAAC,CA+H1B"}
1
+ {"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../../src/ci/benchmark.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAA;AACvD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAA;AACxD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAA;AAEtD,MAAM,MAAM,UAAU,GAAG,aAAa,GAAG,cAAc,GAAG,iBAAiB,GAAG,qBAAqB,GAAG,WAAW,CAAA;AAEjH,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,UAAU,CAAA;IAChB,KAAK,EAAE,MAAM,CAAA;IACb,SAAS,EAAE,MAAM,CAAA;IACjB,MAAM,EAAE,OAAO,CAAA;IACf,MAAM,CAAC,EAAE,MAAM,CAAA;CAChB;AAED,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,OAAO,CAAA;IACf,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,OAAO,EAAE,YAAY,EAAE,CAAA;CACxB;AAaD;;;;;;GAMG;AACH,wBAAsB,iBAAiB,CACrC,WAAW,EAAE,eAAe,EAC5B,UAAU,EAAE,cAAc,EAC1B,UAAU,CAAC,EAAE,OAAO,EACpB,eAAe,CAAC,EAAE,eAAe,GAAG,IAAI,GACvC,OAAO,CAAC,eAAe,CAAC,CA4J1B"}
@@ -7,11 +7,23 @@
7
7
  * Generated/updated on 2026-04-20.
8
8
  */
9
9
  import { callProviderLLM } from '../matchers/index.js';
10
+ /** Maps backend provider names to SDK provider names used by callProviderLLM. */
11
+ const PROVIDER_NAME_MAP = {
12
+ anthropic: 'claude',
13
+ moonshot: 'kimi',
14
+ };
15
+ /** Normalize provider name from backend format to SDK format. */
16
+ function normalizeSdkProvider(provider) {
17
+ return PROVIDER_NAME_MAP[provider] ?? provider;
18
+ }
10
19
  /**
11
20
  * Compare a measurement against benchmarks. Async because llm_judge requires
12
21
  * an LLM call. The step's output is needed for output_contains/llm_judge checks.
22
+ *
23
+ * @param evaluatorConfig - Optional backend evaluator config used as fallback
24
+ * when the test does not specify judge_provider/judge_model.
13
25
  */
14
- export async function compareBenchmarks(measurement, benchmarks, stepOutput) {
26
+ export async function compareBenchmarks(measurement, benchmarks, stepOutput, evaluatorConfig) {
15
27
  const metrics = [];
16
28
  let firstFailure;
17
29
  if (benchmarks.max_duration_ms !== undefined) {
@@ -74,10 +86,35 @@ export async function compareBenchmarks(measurement, benchmarks, stepOutput) {
74
86
  const judge = benchmarks.llm_judge;
75
87
  const outputStr = stringifyOutput(stepOutput);
76
88
  const threshold = judge.judge_score_threshold ?? 7;
89
+ // Resolve provider/model: test definition takes priority, then backend
90
+ // evaluator config, then fall back to 'openai' default.
91
+ const resolvedProvider = normalizeSdkProvider(judge.judge_provider ?? evaluatorConfig?.provider ?? 'openai');
92
+ const resolvedModel = judge.judge_model ?? evaluatorConfig?.model ?? undefined;
93
+ // If the backend provided an API key and we're using its provider,
94
+ // set it in the environment so callProviderLLM can pick it up.
95
+ const envKeyMap = {
96
+ openai: 'OPENAI_API_KEY',
97
+ claude: 'ANTHROPIC_API_KEY',
98
+ gemini: 'GEMINI_API_KEY',
99
+ grok: 'GROK_API_KEY',
100
+ kimi: 'KIMI_API_KEY',
101
+ };
102
+ const envKey = envKeyMap[resolvedProvider];
103
+ let restoreEnv;
104
+ if (evaluatorConfig?.apiKey && envKey && !judge.judge_provider && !process.env[envKey]) {
105
+ const prev = process.env[envKey];
106
+ process.env[envKey] = evaluatorConfig.apiKey;
107
+ restoreEnv = () => {
108
+ if (prev === undefined)
109
+ delete process.env[envKey];
110
+ else
111
+ process.env[envKey] = prev;
112
+ };
113
+ }
77
114
  try {
78
- const provider = judge.judge_provider ?? 'openai';
79
115
  const evalPrompt = `${judge.judge_prompt}\n\nOutput to evaluate:\n${outputStr}\n\nScore this output on a scale of 0-10. Respond with only the number.`;
80
- const result = await callProviderLLM(evalPrompt, { provider, model: judge.judge_model ?? undefined }, 'You are an expert test judge. Return only a number between 0 and 10.', 16, 0);
116
+ const result = await callProviderLLM(evalPrompt, { provider: resolvedProvider, model: resolvedModel }, 'You are an expert test judge. Return only a number between 0 and 10.', 16, 0);
117
+ restoreEnv?.();
81
118
  const score = parseFloat(result.content.match(/-?\d+(?:\.\d+)?/)?.[0] ?? '');
82
119
  if (isNaN(score)) {
83
120
  metrics.push({
@@ -106,6 +143,7 @@ export async function compareBenchmarks(measurement, benchmarks, stepOutput) {
106
143
  }
107
144
  }
108
145
  catch (err) {
146
+ restoreEnv?.();
109
147
  const errMsg = err instanceof Error ? err.message : String(err);
110
148
  metrics.push({
111
149
  name: 'llm_judge',
@@ -1 +1 @@
1
- {"version":3,"file":"benchmark.js","sourceRoot":"","sources":["../../src/ci/benchmark.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAA;AAoBtD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,WAA4B,EAC5B,UAA0B,EAC1B,UAAoB;IAEpB,MAAM,OAAO,GAAmB,EAAE,CAAA;IAClC,IAAI,YAAgC,CAAA;IAEpC,IAAI,UAAU,CAAC,eAAe,KAAK,SAAS,EAAE,CAAC;QAC7C,MAAM,MAAM,GAAG,WAAW,CAAC,WAAW,IAAI,UAAU,CAAC,eAAe,CAAA;QACpE,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,aAAa;YACnB,KAAK,EAAE,WAAW,CAAC,WAAW;YAC9B,SAAS,EAAE,UAAU,CAAC,eAAe;YACrC,MAAM;SACP,CAAC,CAAA;QACF,IAAI,CAAC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;YAC7B,YAAY,GAAG,gBAAgB,WAAW,CAAC,WAAW,6BAA6B,UAAU,CAAC,eAAe,GAAG,CAAA;QAClH,CAAC;IACH,CAAC;IAED,IAAI,UAAU,CAAC,gBAAgB,KAAK,SAAS,EAAE,CAAC;QAC9C,MAAM,KAAK,GAAG,WAAW,CAAC,YAAY,IAAI,CAAC,CAAA;QAC3C,MAAM,MAAM,GAAG,KAAK,IAAI,UAAU,CAAC,gBAAgB,CAAA;QACnD,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,cAAc;YACpB,KAAK;YACL,SAAS,EAAE,UAAU,CAAC,gBAAgB;YACtC,MAAM;SACP,CAAC,CAAA;QACF,IAAI,CAAC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;YAC7B,YAAY,GAAG,iBAAiB,KAAK,6BAA6B,UAAU,CAAC,gBAAgB,GAAG,CAAA;QAClG,CAAC;IACH,CAAC;IAED,+DAA+D;IAC/D,IAAI,UAAU,CAAC,eAAe,KAAK,SAAS,EAAE,CAAC;QAC7C,MAAM,SAAS,GAAG,eAAe,CAAC,UAAU,CAAC,CAAA;QAC7C,MAAM,MAAM,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,eAAe,CAAC,WAAW,EAAE,CAAC,CAAA;QACzF,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,iBAAiB;YACvB,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACrB,SAAS,EAAE,CAAC;YACZ,MAAM;YACN,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,4BAA4B,UAAU,CAAC,eAAe,GAAG;SACvF,CAAC,CAAA;QACF,IAAI,CAAC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;YAC7B,YAAY,GAAG,4BAA4B,UAAU,CAAC,eAAe,GAAG,CAAA;QAC1E,CAAC;IACH,CAAC;IAED,+DAA+D;IAC/D,IAAI,UAAU,CAAC,mBAAmB,KAAK,SAAS,EAAE,CAAC;QACjD,MAAM,SAAS,GAAG,eAAe,CAAC,UAAU,CAAC,CAAA;QAC7C,MAAM,MAAM,GAAG,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,mBAAmB,CAAC,WAAW,EAAE,CAAC,CAAA;QAC9F,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,qBAAqB;YAC3B,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACrB,SAAS,EAAE,CAAC;YACZ,MAAM;YACN,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,iCAAiC,UAAU,CAAC,mBAAmB,GAAG;SAChG,CAAC,CAAA;QACF,IAAI,CAAC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;YAC7B,YAAY,GAAG,iCAAiC,UAAU,CAAC,mBAAmB,GAAG,CAAA;QACnF,CAAC;IACH,CAAC;IAED,8DAA8D;IAC9D,IAAI,UAAU,CAAC,SAAS,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,UAAU,CAAC,SAAS,CAAA;QAClC,MAAM,SAAS,GAAG,eAAe,CAAC,UAAU,CAAC,CAAA;QAC7C,MAAM,SAAS,GAAG,KAAK,CAAC,qBAAqB,IAAI,CAAC,CAAA;QAElD,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,IAAI,QAAQ,CAAA;YACjD,MAAM,UAAU,GAAG,GAAG,KAAK,CAAC,YAAY,4BAA4B,SAAS,yEAAyE,CAAA;YAEtJ,MAAM,MAAM,GAAG,MAAM,eAAe,CAClC,UAAU,EACV,EAAE,QAAQ,EAAE,KAAK,EAAE,KAAK,CAAC,WAAW,IAAI,SAAS,EAAE,EACnD,sEAAsE,EACtE,EAAE,EACF,CAAC,CACF,CAAA;YAED,MAAM,KAAK,GAAG,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;YAC5E,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;gBACjB,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,WAAW;oBACjB,KAAK,EAAE,CAAC;oBACR,SAAS;oBACT,MAAM,EAAE,KAAK;oBACb,MAAM,EAAE,6CAA6C,MAAM,CAAC,OAAO,GAAG;iBACvE,CAAC,CAAA;gBACF,IAAI,CAAC,YAAY,EAAE,CAAC;oBAClB,YAAY,GAAG,gDAAgD,CAAA;gBACjE,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,MAAM,MAAM,GAAG,KAAK,IAAI,SAAS,CAAA;gBACjC,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,WAAW;oBACjB,KAAK,EAAE,KAAK;oBACZ,SAAS;oBACT,MAAM;oBACN,MAAM,EAAE,UAAU,KAAK,IAAI,SAAS,EAAE;iBACvC,CAAC,CAAA;gBACF,IAAI,CAAC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;oBAC7B,YAAY,GAAG,oBAAoB,KAAK,sBAAsB,SAAS,GAAG,CAAA;gBAC5E,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,MAAM,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;YAC/D,OAAO,CAAC,IAAI,CAAC;gBACX,IAAI,EAAE,WAAW;gBACjB,KAAK,EAAE,CAAC;gBACR,SAAS;gBACT,MAAM,EAAE,KAAK;gBACb,MAAM,EAAE,oBAAoB,MAAM,EAAE;aACrC,CAAC,CAAA;YACF,IAAI,CAAC,YAAY,EAAE,CAAC;gBAClB,YAAY,GAAG,oBAAoB,MAAM,EAAE,CAAA;YAC7C,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAA;IAC9C,OAAO;QACL,MAAM,EAAE,SAAS;QACjB,cAAc,EAAE,YAAY;QAC5B,OAAO;KACR,CAAA;AACH,CAAC;AAED,kEAAkE;AAClE,SAAS,eAAe,CAAC,MAAe;IACtC,IAAI,MAAM,KAAK,IAAI,IAAI,MAAM,KAAK,SAAS;QAAE,OAAO,EAAE,CAAA;IACtD,IAAI,OAAO,MAAM,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAA;IAC7C,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;AAC/B,CAAC"}
1
+ {"version":3,"file":"benchmark.js","sourceRoot":"","sources":["../../src/ci/benchmark.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAA;AAqBtD,iFAAiF;AACjF,MAAM,iBAAiB,GAA2B;IAChD,SAAS,EAAE,QAAQ;IACnB,QAAQ,EAAE,MAAM;CACjB,CAAA;AAED,iEAAiE;AACjE,SAAS,oBAAoB,CAAC,QAAgB;IAC5C,OAAO,iBAAiB,CAAC,QAAQ,CAAC,IAAI,QAAQ,CAAA;AAChD,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,WAA4B,EAC5B,UAA0B,EAC1B,UAAoB,EACpB,eAAwC;IAExC,MAAM,OAAO,GAAmB,EAAE,CAAA;IAClC,IAAI,YAAgC,CAAA;IAEpC,IAAI,UAAU,CAAC,eAAe,KAAK,SAAS,EAAE,CAAC;QAC7C,MAAM,MAAM,GAAG,WAAW,CAAC,WAAW,IAAI,UAAU,CAAC,eAAe,CAAA;QACpE,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,aAAa;YACnB,KAAK,EAAE,WAAW,CAAC,WAAW;YAC9B,SAAS,EAAE,UAAU,CAAC,eAAe;YACrC,MAAM;SACP,CAAC,CAAA;QACF,IAAI,CAAC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;YAC7B,YAAY,GAAG,gBAAgB,WAAW,CAAC,WAAW,6BAA6B,UAAU,CAAC,eAAe,GAAG,CAAA;QAClH,CAAC;IACH,CAAC;IAED,IAAI,UAAU,CAAC,gBAAgB,KAAK,SAAS,EAAE,CAAC;QAC9C,MAAM,KAAK,GAAG,WAAW,CAAC,YAAY,IAAI,CAAC,CAAA;QAC3C,MAAM,MAAM,GAAG,KAAK,IAAI,UAAU,CAAC,gBAAgB,CAAA;QACnD,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,cAAc;YACpB,KAAK;YACL,SAAS,EAAE,UAAU,CAAC,gBAAgB;YACtC,MAAM;SACP,CAAC,CAAA;QACF,IAAI,CAAC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;YAC7B,YAAY,GAAG,iBAAiB,KAAK,6BAA6B,UAAU,CAAC,gBAAgB,GAAG,CAAA;QAClG,CAAC;IACH,CAAC;IAED,+DAA+D;IAC/D,IAAI,UAAU,CAAC,eAAe,KAAK,SAAS,EAAE,CAAC;QAC7C,MAAM,SAAS,GAAG,eAAe,CAAC,UAAU,CAAC,CAAA;QAC7C,MAAM,MAAM,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,eAAe,CAAC,WAAW,EAAE,CAAC,CAAA;QACzF,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,iBAAiB;YACvB,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACrB,SAAS,EAAE,CAAC;YACZ,MAAM;YACN,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,4BAA4B,UAAU,CAAC,eAAe,GAAG;SACvF,CAAC,CAAA;QACF,IAAI,CAAC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;YAC7B,YAAY,GAAG,4BAA4B,UAAU,CAAC,eAAe,GAAG,CAAA;QAC1E,CAAC;IACH,CAAC;IAED,+DAA+D;IAC/D,IAAI,UAAU,CAAC,mBAAmB,KAAK,SAAS,EAAE,CAAC;QACjD,MAAM,SAAS,GAAG,eAAe,CAAC,UAAU,CAAC,CAAA;QAC7C,MAAM,MAAM,GAAG,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,mBAAmB,CAAC,WAAW,EAAE,CAAC,CAAA;QAC9F,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,qBAAqB;YAC3B,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACrB,SAAS,EAAE,CAAC;YACZ,MAAM;YACN,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,iCAAiC,UAAU,CAAC,mBAAmB,GAAG;SAChG,CAAC,CAAA;QACF,IAAI,CAAC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;YAC7B,YAAY,GAAG,iCAAiC,UAAU,CAAC,mBAAmB,GAAG,CAAA;QACnF,CAAC;IACH,CAAC;IAED,8DAA8D;IAC9D,IAAI,UAAU,CAAC,SAAS,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,UAAU,CAAC,SAAS,CAAA;QAClC,MAAM,SAAS,GAAG,eAAe,CAAC,UAAU,CAAC,CAAA;QAC7C,MAAM,SAAS,GAAG,KAAK,CAAC,qBAAqB,IAAI,CAAC,CAAA;QAElD,uEAAuE;QACvE,wDAAwD;QACxD,MAAM,gBAAgB,GAAG,oBAAoB,CAC3C,KAAK,CAAC,cAAc,IAAI,eAAe,EAAE,QAAQ,IAAI,QAAQ,CAC9D,CAAA;QACD,MAAM,aAAa,GAAG,KAAK,CAAC,WAAW,IAAI,eAAe,EAAE,KAAK,IAAI,SAAS,CAAA;QAE9E,mEAAmE;QACnE,+DAA+D;QAC/D,MAAM,SAAS,GAA2B;YACxC,MAAM,EAAE,gBAAgB;YACxB,MAAM,EAAE,mBAAmB;YAC3B,MAAM,EAAE,gBAAgB;YACxB,IAAI,EAAE,cAAc;YACpB,IAAI,EAAE,cAAc;SACrB,CAAA;QACD,MAAM,MAAM,GAAG,SAAS,CAAC,gBAAgB,CAAC,CAAA;QAC1C,IAAI,UAAoC,CAAA;QACxC,IAAI,eAAe,EAAE,MAAM,IAAI,MAAM,IAAI,CAAC,KAAK,CAAC,cAAc,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YACvF,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;YAChC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,eAAe,CAAC,MAAM,CAAA;YAC5C,UAAU,GAAG,GAAG,EAAE;gBAChB,IAAI,IAAI,KAAK,SAAS;oBAAE,OAAO,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;;oBAC7C,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,IAAI,CAAA;YACjC,CAAC,CAAA;QACH,CAAC;QAED,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,GAAG,KAAK,CAAC,YAAY,4BAA4B,SAAS,yEAAyE,CAAA;YAEtJ,MAAM,MAAM,GAAG,MAAM,eAAe,CAClC,UAAU,EACV,EAAE,QAAQ,EAAE,gBAAoE,EAAE,KAAK,EAAE,aAAa,EAAE,EACxG,sEAAsE,EACtE,EAAE,EACF,CAAC,CACF,CAAA;YAED,UAAU,EAAE,EAAE,CAAA;YAEd,MAAM,KAAK,GAAG,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;YAC5E,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;gBACjB,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,WAAW;oBACjB,KAAK,EAAE,CAAC;oBACR,SAAS;oBACT,MAAM,EAAE,KAAK;oBACb,MAAM,EAAE,6CAA6C,MAAM,CAAC,OAAO,GAAG;iBACvE,CAAC,CAAA;gBACF,IAAI,CAAC,YAAY,EAAE,CAAC;oBAClB,YAAY,GAAG,gDAAgD,CAAA;gBACjE,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,MAAM,MAAM,GAAG,KAAK,IAAI,SAAS,CAAA;gBACjC,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,WAAW;oBACjB,KAAK,EAAE,KAAK;oBACZ,SAAS;oBACT,MAAM;oBACN,MAAM,EAAE,UAAU,KAAK,IAAI,SAAS,EAAE;iBACvC,CAAC,CAAA;gBACF,IAAI,CAAC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;oBAC7B,YAAY,GAAG,oBAAoB,KAAK,sBAAsB,SAAS,GAAG,CAAA;gBAC5E,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,UAAU,EAAE,EAAE,CAAA;YACd,MAAM,MAAM,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;YAC/D,OAAO,CAAC,IAAI,CAAC;gBACX,IAAI,EAAE,WAAW;gBACjB,KAAK,EAAE,CAAC;gBACR,SAAS;gBACT,MAAM,EAAE,KAAK;gBACb,MAAM,EAAE,oBAAoB,MAAM,EAAE;aACrC,CAAC,CAAA;YACF,IAAI,CAAC,YAAY,EAAE,CAAC;gBAClB,YAAY,GAAG,oBAAoB,MAAM,EAAE,CAAA;YAC7C,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAA;IAC9C,OAAO;QACL,MAAM,EAAE,SAAS;QACjB,cAAc,EAAE,YAAY;QAC5B,OAAO;KACR,CAAA;AACH,CAAC;AAED,kEAAkE;AAClE,SAAS,eAAe,CAAC,MAAe;IACtC,IAAI,MAAM,KAAK,IAAI,IAAI,MAAM,KAAK,SAAS;QAAE,OAAO,EAAE,CAAA;IACtD,IAAI,OAAO,MAAM,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAA;IAC7C,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;AAC/B,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"ed-runner.d.ts","sourceRoot":"","sources":["../../src/ci/ed-runner.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAA;AACvD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAA;AAOrD,MAAM,WAAW,gBAAgB;IAC/B,GAAG,CAAC,EAAE,MAAM,CAAA;IACZ,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,QAAQ,CAAC,EAAE,SAAS,GAAG,MAAM,GAAG,OAAO,CAAA;IACvC,mFAAmF;IACnF,IAAI,CAAC,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAA;IACvB,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB,WAAW,CAAC,EAAE,eAAe,CAAA;IAC7B,eAAe,CAAC,EAAE,eAAe,CAAA;IACjC,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB,UAAU,EAAE,MAAM,CAAA;IAClB,SAAS,EAAE,MAAM,CAAA;IACjB,UAAU,EAAE,MAAM,CAAA;CACnB;AAED,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,GAAG,MAAM,CAAA;IACvB,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB,WAAW,CAAC,EAAE,eAAe,CAAA;IAC7B,eAAe,CAAC,EAAE,eAAe,CAAA;IACjC,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,MAAM,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAA;IAC1C,KAAK,CAAC,EAAE,OAAO,CAAA;IACf,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB,UAAU,EAAE,MAAM,CAAA;IAClB,iDAAiD;IACjD,UAAU,CAAC,EAAE,iBAAiB,EAAE,CAAA;CACjC;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAA;IACb,SAAS,EAAE,MAAM,CAAA;IACjB,UAAU,EAAE,MAAM,CAAA;IAClB,OAAO,EAAE,YAAY,EAAE,CAAA;IACvB,UAAU,EAAE,MAAM,CAAA;CACnB;AAID,wBAAsB,UAAU,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,eAAe,CAAC,CAsFrF"}
1
+ {"version":3,"file":"ed-runner.d.ts","sourceRoot":"","sources":["../../src/ci/ed-runner.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAA;AACvD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAA;AAOrD,MAAM,WAAW,gBAAgB;IAC/B,GAAG,CAAC,EAAE,MAAM,CAAA;IACZ,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,QAAQ,CAAC,EAAE,SAAS,GAAG,MAAM,GAAG,OAAO,CAAA;IACvC,mFAAmF;IACnF,IAAI,CAAC,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAA;IACvB,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB,WAAW,CAAC,EAAE,eAAe,CAAA;IAC7B,eAAe,CAAC,EAAE,eAAe,CAAA;IACjC,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB,UAAU,EAAE,MAAM,CAAA;IAClB,SAAS,EAAE,MAAM,CAAA;IACjB,UAAU,EAAE,MAAM,CAAA;CACnB;AAED,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,GAAG,MAAM,CAAA;IACvB,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB,WAAW,CAAC,EAAE,eAAe,CAAA;IAC7B,eAAe,CAAC,EAAE,eAAe,CAAA;IACjC,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,MAAM,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAA;IAC1C,KAAK,CAAC,EAAE,OAAO,CAAA;IACf,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB,UAAU,EAAE,MAAM,CAAA;IAClB,iDAAiD;IACjD,UAAU,CAAC,EAAE,iBAAiB,EAAE,CAAA;CACjC;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAA;IACb,SAAS,EAAE,MAAM,CAAA;IACjB,UAAU,EAAE,MAAM,CAAA;IAClB,OAAO,EAAE,YAAY,EAAE,CAAA;IACvB,UAAU,EAAE,MAAM,CAAA;CACnB;AAID,wBAAsB,UAAU,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,eAAe,CAAC,CAyGrF"}
@@ -4,6 +4,7 @@ import { createReplayContext, installReplay, uninstallReplay, ReplayMissError }
4
4
  import { collectMeasurement } from './measurement.js';
5
5
  import { SDK_VERSION } from './trace-schema.js';
6
6
  import { compareBenchmarks } from './benchmark.js';
7
+ import { fetchEvaluatorConfig } from './api-client.js';
7
8
  // ─── Runner ─────────────────────────────────────────────────
8
9
  export async function runEdTests(options) {
9
10
  const cwd = options?.cwd ?? process.cwd();
@@ -29,12 +30,29 @@ export async function runEdTests(options) {
29
30
  testsToRun = tests.filter(t => matchGlob(t.name, pattern));
30
31
  }
31
32
  const maxRuns = Math.max(1, options?.runs ?? 1);
33
+ // Fetch evaluator config from backend if any test uses llm_judge without
34
+ // explicit provider/model. Cached for the entire run to avoid repeated calls.
35
+ let evaluatorConfig = null;
36
+ const needsEvaluatorConfig = testsToRun.some(t => t.benchmarks.llm_judge && (!t.benchmarks.llm_judge.judge_provider || !t.benchmarks.llm_judge.judge_model));
37
+ if (needsEvaluatorConfig) {
38
+ const serverUrl = process.env.ELASTICDASH_API_URL ?? process.env.ELASTICDASH_SERVER ?? '';
39
+ const apiKey = process.env.ELASTICDASH_API_KEY ?? '';
40
+ if (serverUrl && apiKey) {
41
+ try {
42
+ evaluatorConfig = await fetchEvaluatorConfig(serverUrl, apiKey);
43
+ console.log(`[ed-test] Evaluator config: provider=${evaluatorConfig.provider}, model=${evaluatorConfig.model}, hasKey=${!!evaluatorConfig.apiKey}`);
44
+ }
45
+ catch (err) {
46
+ console.warn(`[ed-test] Could not fetch evaluator config: ${err instanceof Error ? err.message : String(err)}`);
47
+ }
48
+ }
49
+ }
32
50
  for (const test of testsToRun) {
33
51
  const allRuns = [];
34
52
  let bestResult = null;
35
53
  for (let attempt = 1; attempt <= maxRuns; attempt++) {
36
54
  const runStartedAt = new Date().toISOString();
37
- const result = await runSingleTest(test);
55
+ const result = await runSingleTest(test, evaluatorConfig);
38
56
  const runFinishedAt = new Date().toISOString();
39
57
  if (attempt > 1) {
40
58
  console.log(` [ed-test] ${test.name}: run ${attempt}/${maxRuns} — ${result.status}`);
@@ -81,7 +99,7 @@ export async function runEdTests(options) {
81
99
  async function resolveCustomInput(input) {
82
100
  return typeof input === 'function' ? await input() : input;
83
101
  }
84
- async function runSingleTest(test) {
102
+ async function runSingleTest(test, evaluatorConfig) {
85
103
  const startMs = Date.now();
86
104
  const targetStep = test.traceData.steps.find(s => s.step_id === test.target.step_id);
87
105
  const resolvedInput = test.input !== undefined
@@ -139,7 +157,7 @@ async function runSingleTest(test) {
139
157
  };
140
158
  }
141
159
  // Compare against benchmarks (async to support llm_judge)
142
- const benchmarkResult = await compareBenchmarks(measurement, test.benchmarks, targetStep?.output);
160
+ const benchmarkResult = await compareBenchmarks(measurement, test.benchmarks, targetStep?.output, evaluatorConfig);
143
161
  return {
144
162
  ...base,
145
163
  testId: test.name,
@@ -158,7 +176,7 @@ async function runSingleTest(test) {
158
176
  const traceMeasurement = extractMeasurementFromTrace(test);
159
177
  if (traceMeasurement) {
160
178
  console.log(` [ed-test] ${test.name}: run() failed (${err instanceof Error ? err.message : String(err)}), using trace measurement fallback`);
161
- const benchmarkResult = await compareBenchmarks(traceMeasurement, test.benchmarks, targetStep?.output);
179
+ const benchmarkResult = await compareBenchmarks(traceMeasurement, test.benchmarks, targetStep?.output, evaluatorConfig);
162
180
  return {
163
181
  ...base,
164
182
  testId: test.name,
@@ -1 +1 @@
1
- {"version":3,"file":"ed-runner.js","sourceRoot":"","sources":["../../src/ci/ed-runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AAC5C,OAAO,EAAE,mBAAmB,EAAE,aAAa,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,aAAa,CAAA;AAClG,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAA;AACrD,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAA;AAC/C,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAA;AAsDlD,+DAA+D;AAE/D,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,OAA0B;IACzD,MAAM,GAAG,GAAG,OAAO,EAAE,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAA;IACzC,MAAM,KAAK,GAAG,UAAU,EAAE,CAAA;IAC1B,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;IAC1C,MAAM,OAAO,GAAmB,EAAE,CAAA;IAElC,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,SAAS,CAAC,EAAE,GAAG,EAAE,CAAC,CAAA;IAElD,2CAA2C;IAC3C,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QACzB,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,GAAG,CAAC,QAAQ,IAAI,SAAS;YACjC,QAAQ,EAAE,GAAG,CAAC,QAAQ,IAAI,SAAS;YACnC,MAAM,EAAE,MAAM;YACd,aAAa,EAAE,qBAAqB,GAAG,CAAC,OAAO,EAAE;YACjD,UAAU,EAAE,CAAC;YACb,UAAU,EAAE,EAAE;SACf,CAAC,CAAA;IACJ,CAAC;IAED,mCAAmC;IACnC,IAAI,UAAU,GAAoB,KAAK,CAAA;IACvC,IAAI,OAAO,EAAE,MAAM,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAA;QAC9B,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAA;IAC5D,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,IAAI,CAAC,CAAC,CAAA;IAE/C,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,OAAO,GAAwB,EAAE,CAAA;QACvC,IAAI,UAAU,GAAwB,IAAI,CAAA;QAE1C,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC;YACpD,MAAM,YAAY,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;YAC7C,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,IAAI,CAAC,CAAA;YACxC,MAAM,aAAa,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;YAE9C,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;gBAChB,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,CAAC,IAAI,SAAS,OAAO,IAAI,OAAO,MAAM,MAAM,CAAC,MAAM,EAAE,CAAC,CAAA;YACvF,CAAC;YAED,+BAA+B;YAC/B,OAAO,CAAC,IAAI,CAAC;gBACX,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,aAAa,EAAE,MAAM,CAAC,aAAa;gBACnC,WAAW,EAAE,MAAM,CAAC,WAAW;gBAC/B,eAAe,EAAE,MAAM,CAAC,eAAe;gBACvC,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,UAAU,EAAE,MAAM,CAAC,UAAU;gBAC7B,SAAS,EAAE,YAAY;gBACvB,UAAU,EAAE,aAAa;aAC1B,CAAC,CAAA;YAEF,4EAA4E;YAC5E,IAAI,CAAC,UAAU,IAAI,MAAM,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;gBAC5C,UAAU,GAAG,MAAM,CAAA;YACrB,CAAC;QAEP,CAAC;QAEG,oCAAoC;QACpC,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAA;QACxD,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAA;QAExD,OAAO,CAAC,IAAI,CAAC;YACX,GAAG,UAAW;YACd,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;YACnC,aAAa,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,aAAa,IAAI,UAAW,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,SAAS;YAC9F,UAAU,EAAE,OAAO;SACpB,CAAC,CAAA;QAEF,IAAI,OAAO,EAAE,QAAQ,IAAI,SAAS,EAAE,CAAC;YACnC,MAAK;QACP,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;IAE3C,OAAO;QACL,KAAK;QACL,SAAS;QACT,UAAU;QACV,OAAO;QACP,UAAU,EAAE,WAAW;KACxB,CAAA;AACH,CAAC;AAED,+DAA+D;AAE/D,KAAK,UAAU,kBAAkB,CAAC,KAAmD;IACnF,OAAO,OAAO,KAAK,KAAK,UAAU,CAAC,CAAC,CAAC,MAAO,KAA0C,EAAE,CAAC,CAAC,CAAC,KAAK,CAAA;AAClG,CAAC;AAED,KAAK,UAAU,aAAa,CAAC,IAAmB;IAC9C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAA;IAC1B,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;IAEpF,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,KAAK,SAAS;QAC5C,CAAC,CAAC,MAAM,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC;QACtC,CAAC,CAAC,UAAU,EAAE,KAAK,CAAA;IAErB,MAAM,IAAI,GAA0B;QAClC,MAAM,EAAE,IAAI,CAAC,IAAI;QACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;QACnB,QAAQ,EAAE,IAAI,CAAC,KAAK;QACpB,MAAM,EAAE,EAAE,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;QAChE,KAAK,EAAE,aAAa;QACpB,MAAM,EAAE,UAAU,EAAE,MAAM;KAC3B,CAAA;IAED,4BAA4B;IAC5B,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,OAAO,IAAI,CAAC,GAAG,KAAK,UAAU,EAAE,CAAC;QAChD,OAAO;YACL,GAAG,IAAI;YACP,MAAM,EAAE,IAAI,CAAC,IAAI;YACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,MAAM,EAAE,MAAM;YACd,aAAa,EAAE,0BAA0B;YACzC,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;SACjC,CAAA;IACH,CAAC;IAED,MAAM,SAAS,GAAG,mBAAmB,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;IAC1E,aAAa,CAAC,SAAS,CAAC,CAAA;IAExB,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,IAAI,KAAK,CAAA;QAE1C,MAAM,OAAO,CAAC,IAAI,CAAC;YACjB,IAAI,CAAC,GAAG,EAAE;YACV,IAAI,OAAO,CAAQ,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAC/B,UAAU,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,CAAC,CACjE;SACF,CAAC,CAAA;QAEF,4CAA4C;QAC5C,gEAAgE;QAChE,sEAAsE;QACtE,qEAAqE;QACrE,sEAAsE;QACtE,uEAAuE;QACvE,2CAA2C;QAC3C,IAAI,WAAW,GAAG,kBAAkB,CAAC,SAAS,CAAC,CAAA;QAC/C,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,WAAW,GAAG,2BAA2B,CAAC,IAAI,CAAC,IAAI,IAAI,CAAA;YACvD,IAAI,WAAW,EAAE,CAAC;gBAChB,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,CAAC,IAAI,yDAAyD,CAAC,CAAA;YAChG,CAAC;QACH,CAAC;QACD,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,OAAO;gBACL,GAAG,IAAI;gBACP,MAAM,EAAE,IAAI,CAAC,IAAI;gBACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,MAAM,EAAE,MAAM;gBACd,aAAa,EAAE,gBAAgB,IAAI,CAAC,MAAM,CAAC,OAAO,qCAAqC;gBACvF,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;aACjC,CAAA;QACH,CAAC;QAED,0DAA0D;QAC1D,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAAC,WAAW,EAAE,IAAI,CAAC,UAAU,EAAE,UAAU,EAAE,MAAM,CAAC,CAAA;QAEjG,OAAO;YACL,GAAG,IAAI;YACP,MAAM,EAAE,IAAI,CAAC,IAAI;YACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,MAAM,EAAE,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;YAChD,aAAa,EAAE,eAAe,CAAC,cAAc;YAC7C,WAAW;YACX,eAAe;YACf,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;SACjC,CAAA;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,4EAA4E;QAC5E,4EAA4E;QAC5E,iEAAiE;QACjE,MAAM,gBAAgB,GAAG,2BAA2B,CAAC,IAAI,CAAC,CAAA;QAC1D,IAAI,gBAAgB,EAAE,CAAC;YACrB,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,CAAC,IAAI,mBAAmB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAA;YAC7I,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAAC,gBAAgB,EAAE,IAAI,CAAC,UAAU,EAAE,UAAU,EAAE,MAAM,CAAC,CAAA;YACtG,OAAO;gBACL,GAAG,IAAI;gBACP,MAAM,EAAE,IAAI,CAAC,IAAI;gBACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,MAAM,EAAE,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;gBAChD,aAAa,EAAE,eAAe,CAAC,cAAc;gBAC7C,WAAW,EAAE,gBAAgB;gBAC7B,eAAe;gBACf,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;aACjC,CAAA;QACH,CAAC;QAED,IAAI,GAAG,YAAY,eAAe,EAAE,CAAC;YACnC,OAAO;gBACL,GAAG,IAAI;gBACP,MAAM,EAAE,IAAI,CAAC,IAAI;gBACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,MAAM,EAAE,MAAM;gBACd,aAAa,EAAE,gBAAgB,GAAG,CAAC,QAAQ,KAAK,GAAG,CAAC,QAAQ,EAAE;gBAC9D,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;aACjC,CAAA;QACH,CAAC;QACD,IAAI,GAAG,YAAY,YAAY,EAAE,CAAC;YAChC,OAAO;gBACL,GAAG,IAAI;gBACP,MAAM,EAAE,IAAI,CAAC,IAAI;gBACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,MAAM,EAAE,MAAM;gBACd,aAAa,EAAE,wBAAwB,GAAG,CAAC,SAAS,IAAI;gBACxD,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;aACjC,CAAA;QACH,CAAC;QACD,OAAO;YACL,GAAG,IAAI;YACP,MAAM,EAAE,IAAI,CAAC,IAAI;YACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,MAAM,EAAE,MAAM;YACd,aAAa,EAAE,oBAAoB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;YACrF,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;SACjC,CAAA;IACH,CAAC;YAAS,CAAC;QACT,eAAe,EAAE,CAAA;IACnB,CAAC;AACH,CAAC;AAED,+DAA+D;AAE/D;;;;;;GAMG;AACH,SAAS,2BAA2B,CAAC,IAAmB;IACtD,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;IAC9E,IAAI,CAAC,IAAI;QAAE,OAAO,SAAS,CAAA;IAE3B,MAAM,MAAM,GAAoB;QAC9B,WAAW,EAAE,IAAI,CAAC,WAAW;KAC9B,CAAA;IAED,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;QAChB,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAA;QACvC,MAAM,CAAC,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAA;QACzC,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAA;IACzC,CAAC;IAED,OAAO,MAAM,CAAA;AACf,CAAC;AAED,+DAA+D;AAE/D,MAAM,YAAa,SAAQ,KAAK;IACX;IAAnB,YAAmB,SAAiB;QAClC,KAAK,CAAC,wBAAwB,SAAS,IAAI,CAAC,CAAA;QAD3B,cAAS,GAAT,SAAS,CAAQ;QAElC,IAAI,CAAC,IAAI,GAAG,cAAc,CAAA;IAC5B,CAAC;CACF;AAED,SAAS,SAAS,CAAC,IAAY,EAAE,OAAe;IAC9C,0CAA0C;IAC1C,MAAM,KAAK,GAAG,IAAI,MAAM,CACtB,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,mBAAmB,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,GAAG,GAAG,CAClG,CAAA;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AACzB,CAAC"}
1
+ {"version":3,"file":"ed-runner.js","sourceRoot":"","sources":["../../src/ci/ed-runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AAC5C,OAAO,EAAE,mBAAmB,EAAE,aAAa,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,aAAa,CAAA;AAClG,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAA;AACrD,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAA;AAC/C,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAA;AAClD,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAA;AAuDtD,+DAA+D;AAE/D,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,OAA0B;IACzD,MAAM,GAAG,GAAG,OAAO,EAAE,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAA;IACzC,MAAM,KAAK,GAAG,UAAU,EAAE,CAAA;IAC1B,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;IAC1C,MAAM,OAAO,GAAmB,EAAE,CAAA;IAElC,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,SAAS,CAAC,EAAE,GAAG,EAAE,CAAC,CAAA;IAElD,2CAA2C;IAC3C,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QACzB,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,GAAG,CAAC,QAAQ,IAAI,SAAS;YACjC,QAAQ,EAAE,GAAG,CAAC,QAAQ,IAAI,SAAS;YACnC,MAAM,EAAE,MAAM;YACd,aAAa,EAAE,qBAAqB,GAAG,CAAC,OAAO,EAAE;YACjD,UAAU,EAAE,CAAC;YACb,UAAU,EAAE,EAAE;SACf,CAAC,CAAA;IACJ,CAAC;IAED,mCAAmC;IACnC,IAAI,UAAU,GAAoB,KAAK,CAAA;IACvC,IAAI,OAAO,EAAE,MAAM,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAA;QAC9B,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAA;IAC5D,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,IAAI,CAAC,CAAC,CAAA;IAE/C,yEAAyE;IACzE,8EAA8E;IAC9E,IAAI,eAAe,GAA2B,IAAI,CAAA;IAClD,MAAM,oBAAoB,GAAG,UAAU,CAAC,IAAI,CAC1C,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,SAAS,IAAI,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,cAAc,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,WAAW,CAAC,CAC/G,CAAA;IACD,IAAI,oBAAoB,EAAE,CAAC;QACzB,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,OAAO,CAAC,GAAG,CAAC,kBAAkB,IAAI,EAAE,CAAA;QACzF,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,EAAE,CAAA;QACpD,IAAI,SAAS,IAAI,MAAM,EAAE,CAAC;YACxB,IAAI,CAAC;gBACH,eAAe,GAAG,MAAM,oBAAoB,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;gBAC/D,OAAO,CAAC,GAAG,CAAC,wCAAwC,eAAe,CAAC,QAAQ,WAAW,eAAe,CAAC,KAAK,YAAY,CAAC,CAAC,eAAe,CAAC,MAAM,EAAE,CAAC,CAAA;YACrJ,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,OAAO,CAAC,IAAI,CAAC,+CAA+C,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAA;YACjH,CAAC;QACH,CAAC;IACH,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,OAAO,GAAwB,EAAE,CAAA;QACvC,IAAI,UAAU,GAAwB,IAAI,CAAA;QAE1C,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC;YACpD,MAAM,YAAY,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;YAC7C,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,IAAI,EAAE,eAAe,CAAC,CAAA;YACzD,MAAM,aAAa,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;YAE9C,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;gBAChB,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,CAAC,IAAI,SAAS,OAAO,IAAI,OAAO,MAAM,MAAM,CAAC,MAAM,EAAE,CAAC,CAAA;YACvF,CAAC;YAED,+BAA+B;YAC/B,OAAO,CAAC,IAAI,CAAC;gBACX,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,aAAa,EAAE,MAAM,CAAC,aAAa;gBACnC,WAAW,EAAE,MAAM,CAAC,WAAW;gBAC/B,eAAe,EAAE,MAAM,CAAC,eAAe;gBACvC,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,UAAU,EAAE,MAAM,CAAC,UAAU;gBAC7B,SAAS,EAAE,YAAY;gBACvB,UAAU,EAAE,aAAa;aAC1B,CAAC,CAAA;YAEF,4EAA4E;YAC5E,IAAI,CAAC,UAAU,IAAI,MAAM,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;gBAC5C,UAAU,GAAG,MAAM,CAAA;YACrB,CAAC;QAEP,CAAC;QAEG,oCAAoC;QACpC,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAA;QACxD,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAA;QAExD,OAAO,CAAC,IAAI,CAAC;YACX,GAAG,UAAW;YACd,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;YACnC,aAAa,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,aAAa,IAAI,UAAW,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,SAAS;YAC9F,UAAU,EAAE,OAAO;SACpB,CAAC,CAAA;QAEF,IAAI,OAAO,EAAE,QAAQ,IAAI,SAAS,EAAE,CAAC;YACnC,MAAK;QACP,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;IAE3C,OAAO;QACL,KAAK;QACL,SAAS;QACT,UAAU;QACV,OAAO;QACP,UAAU,EAAE,WAAW;KACxB,CAAA;AACH,CAAC;AAED,+DAA+D;AAE/D,KAAK,UAAU,kBAAkB,CAAC,KAAmD;IACnF,OAAO,OAAO,KAAK,KAAK,UAAU,CAAC,CAAC,CAAC,MAAO,KAA0C,EAAE,CAAC,CAAC,CAAC,KAAK,CAAA;AAClG,CAAC;AAED,KAAK,UAAU,aAAa,CAAC,IAAmB,EAAE,eAAwC;IACxF,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAA;IAC1B,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;IAEpF,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,KAAK,SAAS;QAC5C,CAAC,CAAC,MAAM,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC;QACtC,CAAC,CAAC,UAAU,EAAE,KAAK,CAAA;IAErB,MAAM,IAAI,GAA0B;QAClC,MAAM,EAAE,IAAI,CAAC,IAAI;QACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;QACnB,QAAQ,EAAE,IAAI,CAAC,KAAK;QACpB,MAAM,EAAE,EAAE,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;QAChE,KAAK,EAAE,aAAa;QACpB,MAAM,EAAE,UAAU,EAAE,MAAM;KAC3B,CAAA;IAED,4BAA4B;IAC5B,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,OAAO,IAAI,CAAC,GAAG,KAAK,UAAU,EAAE,CAAC;QAChD,OAAO;YACL,GAAG,IAAI;YACP,MAAM,EAAE,IAAI,CAAC,IAAI;YACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,MAAM,EAAE,MAAM;YACd,aAAa,EAAE,0BAA0B;YACzC,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;SACjC,CAAA;IACH,CAAC;IAED,MAAM,SAAS,GAAG,mBAAmB,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;IAC1E,aAAa,CAAC,SAAS,CAAC,CAAA;IAExB,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,IAAI,KAAK,CAAA;QAE1C,MAAM,OAAO,CAAC,IAAI,CAAC;YACjB,IAAI,CAAC,GAAG,EAAE;YACV,IAAI,OAAO,CAAQ,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAC/B,UAAU,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,CAAC,CACjE;SACF,CAAC,CAAA;QAEF,4CAA4C;QAC5C,gEAAgE;QAChE,sEAAsE;QACtE,qEAAqE;QACrE,sEAAsE;QACtE,uEAAuE;QACvE,2CAA2C;QAC3C,IAAI,WAAW,GAAG,kBAAkB,CAAC,SAAS,CAAC,CAAA;QAC/C,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,WAAW,GAAG,2BAA2B,CAAC,IAAI,CAAC,IAAI,IAAI,CAAA;YACvD,IAAI,WAAW,EAAE,CAAC;gBAChB,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,CAAC,IAAI,yDAAyD,CAAC,CAAA;YAChG,CAAC;QACH,CAAC;QACD,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,OAAO;gBACL,GAAG,IAAI;gBACP,MAAM,EAAE,IAAI,CAAC,IAAI;gBACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,MAAM,EAAE,MAAM;gBACd,aAAa,EAAE,gBAAgB,IAAI,CAAC,MAAM,CAAC,OAAO,qCAAqC;gBACvF,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;aACjC,CAAA;QACH,CAAC;QAED,0DAA0D;QAC1D,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAAC,WAAW,EAAE,IAAI,CAAC,UAAU,EAAE,UAAU,EAAE,MAAM,EAAE,eAAe,CAAC,CAAA;QAElH,OAAO;YACL,GAAG,IAAI;YACP,MAAM,EAAE,IAAI,CAAC,IAAI;YACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,MAAM,EAAE,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;YAChD,aAAa,EAAE,eAAe,CAAC,cAAc;YAC7C,WAAW;YACX,eAAe;YACf,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;SACjC,CAAA;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,4EAA4E;QAC5E,4EAA4E;QAC5E,iEAAiE;QACjE,MAAM,gBAAgB,GAAG,2BAA2B,CAAC,IAAI,CAAC,CAAA;QAC1D,IAAI,gBAAgB,EAAE,CAAC;YACrB,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,CAAC,IAAI,mBAAmB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAA;YAC7I,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAAC,gBAAgB,EAAE,IAAI,CAAC,UAAU,EAAE,UAAU,EAAE,MAAM,EAAE,eAAe,CAAC,CAAA;YACvH,OAAO;gBACL,GAAG,IAAI;gBACP,MAAM,EAAE,IAAI,CAAC,IAAI;gBACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,MAAM,EAAE,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;gBAChD,aAAa,EAAE,eAAe,CAAC,cAAc;gBAC7C,WAAW,EAAE,gBAAgB;gBAC7B,eAAe;gBACf,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;aACjC,CAAA;QACH,CAAC;QAED,IAAI,GAAG,YAAY,eAAe,EAAE,CAAC;YACnC,OAAO;gBACL,GAAG,IAAI;gBACP,MAAM,EAAE,IAAI,CAAC,IAAI;gBACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,MAAM,EAAE,MAAM;gBACd,aAAa,EAAE,gBAAgB,GAAG,CAAC,QAAQ,KAAK,GAAG,CAAC,QAAQ,EAAE;gBAC9D,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;aACjC,CAAA;QACH,CAAC;QACD,IAAI,GAAG,YAAY,YAAY,EAAE,CAAC;YAChC,OAAO;gBACL,GAAG,IAAI;gBACP,MAAM,EAAE,IAAI,CAAC,IAAI;gBACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,MAAM,EAAE,MAAM;gBACd,aAAa,EAAE,wBAAwB,GAAG,CAAC,SAAS,IAAI;gBACxD,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;aACjC,CAAA;QACH,CAAC;QACD,OAAO;YACL,GAAG,IAAI;YACP,MAAM,EAAE,IAAI,CAAC,IAAI;YACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,MAAM,EAAE,MAAM;YACd,aAAa,EAAE,oBAAoB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;YACrF,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;SACjC,CAAA;IACH,CAAC;YAAS,CAAC;QACT,eAAe,EAAE,CAAA;IACnB,CAAC;AACH,CAAC;AAED,+DAA+D;AAE/D;;;;;;GAMG;AACH,SAAS,2BAA2B,CAAC,IAAmB;IACtD,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;IAC9E,IAAI,CAAC,IAAI;QAAE,OAAO,SAAS,CAAA;IAE3B,MAAM,MAAM,GAAoB;QAC9B,WAAW,EAAE,IAAI,CAAC,WAAW;KAC9B,CAAA;IAED,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;QAChB,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAA;QACvC,MAAM,CAAC,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAA;QACzC,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAA;IACzC,CAAC;IAED,OAAO,MAAM,CAAA;AACf,CAAC;AAED,+DAA+D;AAE/D,MAAM,YAAa,SAAQ,KAAK;IACX;IAAnB,YAAmB,SAAiB;QAClC,KAAK,CAAC,wBAAwB,SAAS,IAAI,CAAC,CAAA;QAD3B,cAAS,GAAT,SAAS,CAAQ;QAElC,IAAI,CAAC,IAAI,GAAG,cAAc,CAAA;IAC5B,CAAC;CACF;AAED,SAAS,SAAS,CAAC,IAAY,EAAE,OAAe;IAC9C,0CAA0C;IAC1C,MAAM,KAAK,GAAG,IAAI,MAAM,CACtB,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,mBAAmB,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,GAAG,GAAG,CAClG,CAAA;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AACzB,CAAC"}
package/dist/index.cjs CHANGED
@@ -4392,6 +4392,7 @@ __export(index_exports, {
4392
4392
  expect: () => import_expect.expect,
4393
4393
  extractTaskOutputs: () => extractTaskOutputs,
4394
4394
  fetchCapturedTrace: () => fetchCapturedTrace,
4395
+ fetchEvaluatorConfig: () => fetchEvaluatorConfig,
4395
4396
  fetchTestGroups: () => fetchTestGroups,
4396
4397
  getCaptureContext: () => getCaptureContext,
4397
4398
  getCurrentTrace: () => getCurrentTrace,
@@ -6291,7 +6292,7 @@ async function loadTests(options) {
6291
6292
  }
6292
6293
 
6293
6294
  // src/ci/ed-runner.ts
6294
- var import_node_crypto8 = require("node:crypto");
6295
+ var import_node_crypto9 = require("node:crypto");
6295
6296
 
6296
6297
  // src/ci/measurement.ts
6297
6298
  function collectMeasurement(ctx) {
@@ -6313,7 +6314,14 @@ init_trace_schema();
6313
6314
 
6314
6315
  // src/ci/benchmark.ts
6315
6316
  init_matchers();
6316
- async function compareBenchmarks(measurement, benchmarks, stepOutput) {
6317
+ var PROVIDER_NAME_MAP = {
6318
+ anthropic: "claude",
6319
+ moonshot: "kimi"
6320
+ };
6321
+ function normalizeSdkProvider(provider) {
6322
+ return PROVIDER_NAME_MAP[provider] ?? provider;
6323
+ }
6324
+ async function compareBenchmarks(measurement, benchmarks, stepOutput, evaluatorConfig) {
6317
6325
  const metrics = [];
6318
6326
  let firstFailure;
6319
6327
  if (benchmarks.max_duration_ms !== void 0) {
@@ -6373,8 +6381,28 @@ async function compareBenchmarks(measurement, benchmarks, stepOutput) {
6373
6381
  const judge = benchmarks.llm_judge;
6374
6382
  const outputStr = stringifyOutput(stepOutput);
6375
6383
  const threshold = judge.judge_score_threshold ?? 7;
6384
+ const resolvedProvider = normalizeSdkProvider(
6385
+ judge.judge_provider ?? evaluatorConfig?.provider ?? "openai"
6386
+ );
6387
+ const resolvedModel = judge.judge_model ?? evaluatorConfig?.model ?? void 0;
6388
+ const envKeyMap = {
6389
+ openai: "OPENAI_API_KEY",
6390
+ claude: "ANTHROPIC_API_KEY",
6391
+ gemini: "GEMINI_API_KEY",
6392
+ grok: "GROK_API_KEY",
6393
+ kimi: "KIMI_API_KEY"
6394
+ };
6395
+ const envKey = envKeyMap[resolvedProvider];
6396
+ let restoreEnv;
6397
+ if (evaluatorConfig?.apiKey && envKey && !judge.judge_provider && !process.env[envKey]) {
6398
+ const prev = process.env[envKey];
6399
+ process.env[envKey] = evaluatorConfig.apiKey;
6400
+ restoreEnv = () => {
6401
+ if (prev === void 0) delete process.env[envKey];
6402
+ else process.env[envKey] = prev;
6403
+ };
6404
+ }
6376
6405
  try {
6377
- const provider = judge.judge_provider ?? "openai";
6378
6406
  const evalPrompt = `${judge.judge_prompt}
6379
6407
 
6380
6408
  Output to evaluate:
@@ -6383,11 +6411,12 @@ ${outputStr}
6383
6411
  Score this output on a scale of 0-10. Respond with only the number.`;
6384
6412
  const result = await callProviderLLM(
6385
6413
  evalPrompt,
6386
- { provider, model: judge.judge_model ?? void 0 },
6414
+ { provider: resolvedProvider, model: resolvedModel },
6387
6415
  "You are an expert test judge. Return only a number between 0 and 10.",
6388
6416
  16,
6389
6417
  0
6390
6418
  );
6419
+ restoreEnv?.();
6391
6420
  const score = parseFloat(result.content.match(/-?\d+(?:\.\d+)?/)?.[0] ?? "");
6392
6421
  if (isNaN(score)) {
6393
6422
  metrics.push({
@@ -6414,6 +6443,7 @@ Score this output on a scale of 0-10. Respond with only the number.`;
6414
6443
  }
6415
6444
  }
6416
6445
  } catch (err) {
6446
+ restoreEnv?.();
6417
6447
  const errMsg = err instanceof Error ? err.message : String(err);
6418
6448
  metrics.push({
6419
6449
  name: "llm_judge",
@@ -6440,10 +6470,70 @@ function stringifyOutput(output) {
6440
6470
  return JSON.stringify(output);
6441
6471
  }
6442
6472
 
6473
+ // src/ci/api-client.ts
6474
+ var import_node_crypto8 = require("node:crypto");
6475
+ init_http();
6476
+ function normalizeBase(serverUrl) {
6477
+ return serverUrl.replace(/\/+$/, "").replace(/\/api$/, "");
6478
+ }
6479
+ function headers(apiKey) {
6480
+ return {
6481
+ "Content-Type": "application/json",
6482
+ "api-key": apiKey || "",
6483
+ "X-Correlation-ID": (0, import_node_crypto8.randomUUID)()
6484
+ };
6485
+ }
6486
+ async function apiRequest(url, apiKey, options = {}) {
6487
+ const method = (options.method || "GET").toUpperCase();
6488
+ console.log(`[elasticdash ci] ${method} ${url}`);
6489
+ const res = await getOriginalFetch()(url, {
6490
+ ...options,
6491
+ headers: { ...headers(apiKey), ...options.headers ?? {} }
6492
+ });
6493
+ if (!res.ok) {
6494
+ const text = await res.text().catch(() => "");
6495
+ console.log(`[elasticdash ci] ${method} ${url} \u2192 ${res.status} ${text.substring(0, 200)}`);
6496
+ throw new Error(`API ${res.status}: ${text || res.statusText}`);
6497
+ }
6498
+ const json = await res.json();
6499
+ return json.result ?? json.data ?? json;
6500
+ }
6501
+ async function fetchTestGroups(serverUrl, apiKey, filters) {
6502
+ const base = normalizeBase(serverUrl);
6503
+ const params = new URLSearchParams();
6504
+ if (filters?.workflowName) params.set("workflowName", filters.workflowName);
6505
+ if (filters?.tags?.length) params.set("tags", filters.tags.join(","));
6506
+ if (filters?.status) params.set("status", filters.status);
6507
+ const qs = params.toString();
6508
+ const url = `${base}/api/testgroups/by-project${qs ? `?${qs}` : ""}`;
6509
+ return apiRequest(url, apiKey);
6510
+ }
6511
+ async function submitTestRun(serverUrl, apiKey, testGroupId, payload) {
6512
+ const base = normalizeBase(serverUrl);
6513
+ const url = `${base}/api/testgroups/${testGroupId}/runs`;
6514
+ return apiRequest(url, apiKey, {
6515
+ method: "POST",
6516
+ body: JSON.stringify(payload)
6517
+ });
6518
+ }
6519
+ async function createBatch(serverUrl, apiKey, payload) {
6520
+ const base = normalizeBase(serverUrl);
6521
+ const url = `${base}/api/testgroups/batches`;
6522
+ return apiRequest(url, apiKey, {
6523
+ method: "POST",
6524
+ body: JSON.stringify(payload)
6525
+ });
6526
+ }
6527
+ async function fetchEvaluatorConfig(serverUrl, apiKey) {
6528
+ const base = normalizeBase(serverUrl);
6529
+ const url = `${base}/api/test-runs/evaluator-config`;
6530
+ return apiRequest(url, apiKey);
6531
+ }
6532
+
6443
6533
  // src/ci/ed-runner.ts
6444
6534
  async function runEdTests(options) {
6445
6535
  const cwd = options?.cwd ?? process.cwd();
6446
- const runId = (0, import_node_crypto8.randomUUID)();
6536
+ const runId = (0, import_node_crypto9.randomUUID)();
6447
6537
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
6448
6538
  const results = [];
6449
6539
  const { tests, errors } = await loadTests({ cwd });
@@ -6463,12 +6553,28 @@ async function runEdTests(options) {
6463
6553
  testsToRun = tests.filter((t) => matchGlob(t.name, pattern));
6464
6554
  }
6465
6555
  const maxRuns = Math.max(1, options?.runs ?? 1);
6556
+ let evaluatorConfig = null;
6557
+ const needsEvaluatorConfig = testsToRun.some(
6558
+ (t) => t.benchmarks.llm_judge && (!t.benchmarks.llm_judge.judge_provider || !t.benchmarks.llm_judge.judge_model)
6559
+ );
6560
+ if (needsEvaluatorConfig) {
6561
+ const serverUrl = process.env.ELASTICDASH_API_URL ?? process.env.ELASTICDASH_SERVER ?? "";
6562
+ const apiKey = process.env.ELASTICDASH_API_KEY ?? "";
6563
+ if (serverUrl && apiKey) {
6564
+ try {
6565
+ evaluatorConfig = await fetchEvaluatorConfig(serverUrl, apiKey);
6566
+ console.log(`[ed-test] Evaluator config: provider=${evaluatorConfig.provider}, model=${evaluatorConfig.model}, hasKey=${!!evaluatorConfig.apiKey}`);
6567
+ } catch (err) {
6568
+ console.warn(`[ed-test] Could not fetch evaluator config: ${err instanceof Error ? err.message : String(err)}`);
6569
+ }
6570
+ }
6571
+ }
6466
6572
  for (const test of testsToRun) {
6467
6573
  const allRuns = [];
6468
6574
  let bestResult = null;
6469
6575
  for (let attempt = 1; attempt <= maxRuns; attempt++) {
6470
6576
  const runStartedAt = (/* @__PURE__ */ new Date()).toISOString();
6471
- const result = await runSingleTest(test);
6577
+ const result = await runSingleTest(test, evaluatorConfig);
6472
6578
  const runFinishedAt = (/* @__PURE__ */ new Date()).toISOString();
6473
6579
  if (attempt > 1) {
6474
6580
  console.log(` [ed-test] ${test.name}: run ${attempt}/${maxRuns} \u2014 ${result.status}`);
@@ -6511,7 +6617,7 @@ async function runEdTests(options) {
6511
6617
  async function resolveCustomInput(input) {
6512
6618
  return typeof input === "function" ? await input() : input;
6513
6619
  }
6514
- async function runSingleTest(test) {
6620
+ async function runSingleTest(test, evaluatorConfig) {
6515
6621
  const startMs = Date.now();
6516
6622
  const targetStep = test.traceData.steps.find((s) => s.step_id === test.target.step_id);
6517
6623
  const resolvedInput = test.input !== void 0 ? await resolveCustomInput(test.input) : targetStep?.input;
@@ -6560,7 +6666,7 @@ async function runSingleTest(test) {
6560
6666
  durationMs: Date.now() - startMs
6561
6667
  };
6562
6668
  }
6563
- const benchmarkResult = await compareBenchmarks(measurement, test.benchmarks, targetStep?.output);
6669
+ const benchmarkResult = await compareBenchmarks(measurement, test.benchmarks, targetStep?.output, evaluatorConfig);
6564
6670
  return {
6565
6671
  ...base,
6566
6672
  testId: test.name,
@@ -6575,7 +6681,7 @@ async function runSingleTest(test) {
6575
6681
  const traceMeasurement = extractMeasurementFromTrace(test);
6576
6682
  if (traceMeasurement) {
6577
6683
  console.log(` [ed-test] ${test.name}: run() failed (${err instanceof Error ? err.message : String(err)}), using trace measurement fallback`);
6578
- const benchmarkResult = await compareBenchmarks(traceMeasurement, test.benchmarks, targetStep?.output);
6684
+ const benchmarkResult = await compareBenchmarks(traceMeasurement, test.benchmarks, targetStep?.output, evaluatorConfig);
6579
6685
  return {
6580
6686
  ...base,
6581
6687
  testId: test.name,
@@ -6647,7 +6753,7 @@ function matchGlob(name, pattern) {
6647
6753
  }
6648
6754
 
6649
6755
  // src/ci/upload-client.ts
6650
- var import_node_crypto9 = require("node:crypto");
6756
+ var import_node_crypto10 = require("node:crypto");
6651
6757
  init_http();
6652
6758
  init_git_info();
6653
6759
  function buildUploadPayload(runResult) {
@@ -6775,7 +6881,7 @@ async function uploadResults(payload, options) {
6775
6881
  const headers2 = {
6776
6882
  "Content-Type": "application/json",
6777
6883
  "api-key": options.apiKey,
6778
- "X-Correlation-ID": (0, import_node_crypto9.randomUUID)()
6884
+ "X-Correlation-ID": (0, import_node_crypto10.randomUUID)()
6779
6885
  };
6780
6886
  const body = JSON.stringify(payload);
6781
6887
  console.log(`[elasticdash] Uploading to ${url}, api-key=${options.apiKey ? options.apiKey.slice(0, 10) + "..." : "(none)"}`);
@@ -6811,61 +6917,6 @@ async function uploadResults(payload, options) {
6811
6917
  // src/ci/runner.ts
6812
6918
  var import_chalk2 = __toESM(require("chalk"), 1);
6813
6919
 
6814
- // src/ci/api-client.ts
6815
- var import_node_crypto10 = require("node:crypto");
6816
- init_http();
6817
- function normalizeBase(serverUrl) {
6818
- return serverUrl.replace(/\/+$/, "").replace(/\/api$/, "");
6819
- }
6820
- function headers(apiKey) {
6821
- return {
6822
- "Content-Type": "application/json",
6823
- "api-key": apiKey || "",
6824
- "X-Correlation-ID": (0, import_node_crypto10.randomUUID)()
6825
- };
6826
- }
6827
- async function apiRequest(url, apiKey, options = {}) {
6828
- const method = (options.method || "GET").toUpperCase();
6829
- console.log(`[elasticdash ci] ${method} ${url}`);
6830
- const res = await getOriginalFetch()(url, {
6831
- ...options,
6832
- headers: { ...headers(apiKey), ...options.headers ?? {} }
6833
- });
6834
- if (!res.ok) {
6835
- const text = await res.text().catch(() => "");
6836
- console.log(`[elasticdash ci] ${method} ${url} \u2192 ${res.status} ${text.substring(0, 200)}`);
6837
- throw new Error(`API ${res.status}: ${text || res.statusText}`);
6838
- }
6839
- const json = await res.json();
6840
- return json.result ?? json.data ?? json;
6841
- }
6842
- async function fetchTestGroups(serverUrl, apiKey, filters) {
6843
- const base = normalizeBase(serverUrl);
6844
- const params = new URLSearchParams();
6845
- if (filters?.workflowName) params.set("workflowName", filters.workflowName);
6846
- if (filters?.tags?.length) params.set("tags", filters.tags.join(","));
6847
- if (filters?.status) params.set("status", filters.status);
6848
- const qs = params.toString();
6849
- const url = `${base}/api/testgroups/by-project${qs ? `?${qs}` : ""}`;
6850
- return apiRequest(url, apiKey);
6851
- }
6852
- async function submitTestRun(serverUrl, apiKey, testGroupId, payload) {
6853
- const base = normalizeBase(serverUrl);
6854
- const url = `${base}/api/testgroups/${testGroupId}/runs`;
6855
- return apiRequest(url, apiKey, {
6856
- method: "POST",
6857
- body: JSON.stringify(payload)
6858
- });
6859
- }
6860
- async function createBatch(serverUrl, apiKey, payload) {
6861
- const base = normalizeBase(serverUrl);
6862
- const url = `${base}/api/testgroups/batches`;
6863
- return apiRequest(url, apiKey, {
6864
- method: "POST",
6865
- body: JSON.stringify(payload)
6866
- });
6867
- }
6868
-
6869
6920
  // src/ci/executor.ts
6870
6921
  init_portal_executor();
6871
6922
  init_tool_runner();
@@ -7593,6 +7644,7 @@ tryAutoInitHttpContext().catch(() => {
7593
7644
  expect,
7594
7645
  extractTaskOutputs,
7595
7646
  fetchCapturedTrace,
7647
+ fetchEvaluatorConfig,
7596
7648
  fetchTestGroups,
7597
7649
  getCaptureContext,
7598
7650
  getCurrentTrace,
package/dist/index.d.ts CHANGED
@@ -52,7 +52,7 @@ export type { EdTestRunOptions, EdTestResult, EdTestRunResult } from './ci/ed-ru
52
52
  export { uploadResults, buildUploadPayload } from './ci/upload-client.js';
53
53
  export type { UploadPayload, UploadTestResult } from './ci/upload-client.js';
54
54
  export { runCI } from './ci/runner.js';
55
- export { fetchTestGroups, submitTestRun, createBatch } from './ci/api-client.js';
55
+ export { fetchTestGroups, submitTestRun, createBatch, fetchEvaluatorConfig } from './ci/api-client.js';
56
56
  export { detectGitInfo } from './ci/git-info.js';
57
57
  export type { CIRunConfig, CIRunSummary, CITestResult, CISingleRunResult, CIExpectationResult } from './ci/types.js';
58
58
  export type { GitInfo } from './ci/git-info.js';
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAA;AACnH,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAA;AACtC,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAA;AACtD,OAAO,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAA;AAC5C,YAAY,EAAE,UAAU,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAGxE,OAAO,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAA;AACnH,YAAY,EAAE,aAAa,EAAE,WAAW,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAA;AAGnI,OAAO,EAAE,aAAa,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAA;AAC3F,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAA;AACtD,YAAY,EAAE,aAAa,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAA;AACzF,YAAY,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAA;AAG3D,OAAO,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAA;AACjD,OAAO,EAAE,MAAM,EAAE,MAAM,+BAA+B,CAAA;AAGtD,OAAO,EACL,iBAAiB,EACjB,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,sBAAsB,EACtB,gBAAgB,EAChB,6BAA6B,EAC7B,oBAAoB,EACpB,sBAAsB,GACvB,MAAM,kCAAkC,CAAA;AACzC,YAAY,EAAE,cAAc,EAAE,MAAM,kCAAkC,CAAA;AAGtE,OAAO,EACL,WAAW,EACX,aAAa,EACb,iBAAiB,EACjB,qBAAqB,EACrB,eAAe,EACf,oBAAoB,EACpB,sBAAsB,GACvB,MAAM,8BAA8B,CAAA;AACrC,YAAY,EACV,WAAW,EACX,YAAY,EACZ,mBAAmB,EACnB,oBAAoB,EACpB,qBAAqB,EACrB,sBAAsB,GACvB,MAAM,8BAA8B,CAAA;AAGrC,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,QAAQ,EAAE,mBAAmB,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAA;AAC3G,OAAO,EAAE,wBAAwB,EAAE,0BAA0B,EAAE,MAAM,2BAA2B,CAAA;AAGhG,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AACzF,YAAY,EAAE,oBAAoB,EAAE,MAAM,wBAAwB,CAAA;AAGlE,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,gCAAgC,CAAA;AAGjH,OAAO,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,MAAM,kCAAkC,CAAA;AAG/F,OAAO,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,UAAU,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjH,YAAY,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAA;AACnF,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAA;AACzD,YAAY,EAAE,uBAAuB,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAA;AACjG,YAAY,EAAE,oBAAoB,EAAE,MAAM,kCAAkC,CAAA;AAC5E,OAAO,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAA;AACjF,YAAY,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAA;AAG9D,OAAO,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAA;AAC/E,YAAY,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAA;AAGnE,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AAG1E,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAA;AAC7C,OAAO,EAAE,kBAAkB,EAAE,QAAQ,EAAE,MAAM,qCAAqC,CAAA;AAGlF,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAClD,YAAY,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAA;AAGjF,OAAO,EAAE,mBAAmB,EAAE,qBAAqB,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAA;AACxH,YAAY,EAAE,SAAS,EAAE,SAAS,EAAE,UAAU,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAA;AAG1G,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAA;AACtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAA;AACxD,YAAY,EAAE,UAAU,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAA;AAG5H,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAA;AAC1D,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAA;AAClD,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAA;AAC/C,YAAY,EAAE,cAAc,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAA;AACvF,YAAY,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AACzE,YAAY,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAA;AAGpE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAA;AAC9C,YAAY,EAAE,gBAAgB,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAA;AACxF,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAA;AACzE,YAAY,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAA;AAG5E,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAA;AACtC,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAA;AAChF,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAA;AAChD,YAAY,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,eAAe,CAAA;AACpH,YAAY,EAAE,OAAO,EAAE,MAAM,kBAAkB,CAAA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAA;AACnH,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAA;AACtC,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAA;AACtD,OAAO,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAA;AAC5C,YAAY,EAAE,UAAU,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAGxE,OAAO,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAA;AACnH,YAAY,EAAE,aAAa,EAAE,WAAW,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAA;AAGnI,OAAO,EAAE,aAAa,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAA;AAC3F,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAA;AACtD,YAAY,EAAE,aAAa,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAA;AACzF,YAAY,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAA;AAG3D,OAAO,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAA;AACjD,OAAO,EAAE,MAAM,EAAE,MAAM,+BAA+B,CAAA;AAGtD,OAAO,EACL,iBAAiB,EACjB,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,sBAAsB,EACtB,gBAAgB,EAChB,6BAA6B,EAC7B,oBAAoB,EACpB,sBAAsB,GACvB,MAAM,kCAAkC,CAAA;AACzC,YAAY,EAAE,cAAc,EAAE,MAAM,kCAAkC,CAAA;AAGtE,OAAO,EACL,WAAW,EACX,aAAa,EACb,iBAAiB,EACjB,qBAAqB,EACrB,eAAe,EACf,oBAAoB,EACpB,sBAAsB,GACvB,MAAM,8BAA8B,CAAA;AACrC,YAAY,EACV,WAAW,EACX,YAAY,EACZ,mBAAmB,EACnB,oBAAoB,EACpB,qBAAqB,EACrB,sBAAsB,GACvB,MAAM,8BAA8B,CAAA;AAGrC,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,QAAQ,EAAE,mBAAmB,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAA;AAC3G,OAAO,EAAE,wBAAwB,EAAE,0BAA0B,EAAE,MAAM,2BAA2B,CAAA;AAGhG,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AACzF,YAAY,EAAE,oBAAoB,EAAE,MAAM,wBAAwB,CAAA;AAGlE,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,gCAAgC,CAAA;AAGjH,OAAO,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,MAAM,kCAAkC,CAAA;AAG/F,OAAO,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,UAAU,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjH,YAAY,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAA;AACnF,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAA;AACzD,YAAY,EAAE,uBAAuB,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAA;AACjG,YAAY,EAAE,oBAAoB,EAAE,MAAM,kCAAkC,CAAA;AAC5E,OAAO,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAA;AACjF,YAAY,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAA;AAG9D,OAAO,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAA;AAC/E,YAAY,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAA;AAGnE,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AAG1E,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAA;AAC7C,OAAO,EAAE,kBAAkB,EAAE,QAAQ,EAAE,MAAM,qCAAqC,CAAA;AAGlF,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAClD,YAAY,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAA;AAGjF,OAAO,EAAE,mBAAmB,EAAE,qBAAqB,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAA;AACxH,YAAY,EAAE,SAAS,EAAE,SAAS,EAAE,UAAU,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAA;AAG1G,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAA;AACtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAA;AACxD,YAAY,EAAE,UAAU,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAA;AAG5H,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAA;AAC1D,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAA;AAClD,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAA;AAC/C,YAAY,EAAE,cAAc,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAA;AACvF,YAAY,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AACzE,YAAY,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAA;AAGpE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAA;AAC9C,YAAY,EAAE,gBAAgB,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAA;AACxF,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAA;AACzE,YAAY,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAA;AAG5E,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAA;AACtC,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,WAAW,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAA;AACtG,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAA;AAChD,YAAY,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,eAAe,CAAA;AACpH,YAAY,EAAE,OAAO,EAAE,MAAM,kBAAkB,CAAA"}
package/dist/index.js CHANGED
@@ -53,7 +53,7 @@ export { runEdTests } from './ci/ed-runner.js';
53
53
  export { uploadResults, buildUploadPayload } from './ci/upload-client.js';
54
54
  // CI runner (programmatic API)
55
55
  export { runCI } from './ci/runner.js';
56
- export { fetchTestGroups, submitTestRun, createBatch } from './ci/api-client.js';
56
+ export { fetchTestGroups, submitTestRun, createBatch, fetchEvaluatorConfig } from './ci/api-client.js';
57
57
  export { detectGitInfo } from './ci/git-info.js';
58
58
  // ─── Eager auto-init ────────────────────────────────────────
59
59
  // When ELASTICDASH_API_KEY is set, automatically initialise observability mode
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,0CAA0C;AAE1C,iBAAiB;AACjB,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAA;AACnH,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAA;AACtC,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAA;AACtD,OAAO,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAA;AAG5C,gBAAgB;AAChB,OAAO,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAA;AAGnH,4BAA4B;AAC5B,OAAO,EAAE,aAAa,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAA;AAC3F,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAA;AAItD,yBAAyB;AACzB,OAAO,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAA;AACjD,OAAO,EAAE,MAAM,EAAE,MAAM,+BAA+B,CAAA;AAEtD,oEAAoE;AACpE,OAAO,EACL,iBAAiB,EACjB,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,sBAAsB,EACtB,gBAAgB,EAChB,6BAA6B,EAC7B,oBAAoB,EACpB,sBAAsB,GACvB,MAAM,kCAAkC,CAAA;AAGzC,4DAA4D;AAC5D,OAAO,EACL,WAAW,EACX,aAAa,EACb,iBAAiB,EACjB,qBAAqB,EACrB,eAAe,EACf,oBAAoB,EACpB,sBAAsB,GACvB,MAAM,8BAA8B,CAAA;AAUrC,wBAAwB;AACxB,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,QAAQ,EAAE,mBAAmB,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAA;AAC3G,OAAO,EAAE,wBAAwB,EAAE,0BAA0B,EAAE,MAAM,2BAA2B,CAAA;AAEhG,4BAA4B;AAC5B,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AAGzF,2BAA2B;AAC3B,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,gCAAgC,CAAA;AAEjH,sCAAsC;AACtC,OAAO,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,MAAM,kCAAkC,CAAA;AAE/F,gBAAgB;AAChB,OAAO,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,UAAU,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AAEjH,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAA;AAGzD,OAAO,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAA;AAGjF,qDAAqD;AACrD,OAAO,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAA;AAG/E,YAAY;AACZ,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AAE1E,UAAU;AACV,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAA;AAC7C,OAAO,EAAE,kBAAkB,EAAE,QAAQ,EAAE,MAAM,qCAAqC,CAAA;AAElF,kBAAkB;AAClB,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAGlD,yBAAyB;AACzB,OAAO,EAAE,mBAAmB,EAAE,qBAAqB,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAA;AAGxH,8BAA8B;AAC9B,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAA;AACtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAA;AAGxD,kDAAkD;AAClD,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAA;AAC1D,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAA;AAClD,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAA;AAK/C,iCAAiC;AACjC,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAA;AAE9C,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAA;AAGzE,+BAA+B;AAC/B,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAA;AACtC,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAA;AAChF,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAA;AAIhD,+DAA+D;AAC/D,+EAA+E;AAC/E,+EAA+E;AAC/E,+EAA+E;AAC/E,iFAAiF;AACjF,OAAO,EAAE,sBAAsB,EAAE,MAAM,kCAAkC,CAAA;AACzE,sBAAsB,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,0CAA0C;AAE1C,iBAAiB;AACjB,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAA;AACnH,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAA;AACtC,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAA;AACtD,OAAO,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAA;AAG5C,gBAAgB;AAChB,OAAO,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAA;AAGnH,4BAA4B;AAC5B,OAAO,EAAE,aAAa,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAA;AAC3F,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAA;AAItD,yBAAyB;AACzB,OAAO,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAA;AACjD,OAAO,EAAE,MAAM,EAAE,MAAM,+BAA+B,CAAA;AAEtD,oEAAoE;AACpE,OAAO,EACL,iBAAiB,EACjB,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,sBAAsB,EACtB,gBAAgB,EAChB,6BAA6B,EAC7B,oBAAoB,EACpB,sBAAsB,GACvB,MAAM,kCAAkC,CAAA;AAGzC,4DAA4D;AAC5D,OAAO,EACL,WAAW,EACX,aAAa,EACb,iBAAiB,EACjB,qBAAqB,EACrB,eAAe,EACf,oBAAoB,EACpB,sBAAsB,GACvB,MAAM,8BAA8B,CAAA;AAUrC,wBAAwB;AACxB,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,QAAQ,EAAE,mBAAmB,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAA;AAC3G,OAAO,EAAE,wBAAwB,EAAE,0BAA0B,EAAE,MAAM,2BAA2B,CAAA;AAEhG,4BAA4B;AAC5B,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AAGzF,2BAA2B;AAC3B,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,gCAAgC,CAAA;AAEjH,sCAAsC;AACtC,OAAO,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,MAAM,kCAAkC,CAAA;AAE/F,gBAAgB;AAChB,OAAO,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,UAAU,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AAEjH,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAA;AAGzD,OAAO,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAA;AAGjF,qDAAqD;AACrD,OAAO,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAA;AAG/E,YAAY;AACZ,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AAE1E,UAAU;AACV,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAA;AAC7C,OAAO,EAAE,kBAAkB,EAAE,QAAQ,EAAE,MAAM,qCAAqC,CAAA;AAElF,kBAAkB;AAClB,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAGlD,yBAAyB;AACzB,OAAO,EAAE,mBAAmB,EAAE,qBAAqB,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAA;AAGxH,8BAA8B;AAC9B,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAA;AACtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAA;AAGxD,kDAAkD;AAClD,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAA;AAC1D,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAA;AAClD,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAA;AAK/C,iCAAiC;AACjC,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAA;AAE9C,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAA;AAGzE,+BAA+B;AAC/B,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAA;AACtC,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,WAAW,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAA;AACtG,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAA;AAIhD,+DAA+D;AAC/D,+EAA+E;AAC/E,+EAA+E;AAC/E,+EAA+E;AAC/E,iFAAiF;AACjF,OAAO,EAAE,sBAAsB,EAAE,MAAM,kCAAkC,CAAA;AACzE,sBAAsB,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "elasticdash-test",
3
- "version": "0.1.20-alpha-21",
3
+ "version": "0.1.20-alpha-22",
4
4
  "description": "AI-native test runner for ElasticDash workflow testing",
5
5
  "type": "module",
6
6
  "bin": {
@@ -94,3 +94,27 @@ export async function createBatch(
94
94
  body: JSON.stringify(payload),
95
95
  })
96
96
  }
97
+
98
+ /**
99
+ * Resolved evaluator configuration from the backend.
100
+ * Provider/model/apiKey may be null if the user has not configured an evaluator.
101
+ */
102
+ export interface EvaluatorConfig {
103
+ provider: string | null
104
+ model: string | null
105
+ apiKey: string | null
106
+ }
107
+
108
+ /**
109
+ * Fetch the project's evaluator config (with user-level fallback).
110
+ * Used by ed-test llm_judge benchmarks when judge_provider/judge_model
111
+ * are not specified in the test definition.
112
+ */
113
+ export async function fetchEvaluatorConfig(
114
+ serverUrl: string,
115
+ apiKey: string,
116
+ ): Promise<EvaluatorConfig> {
117
+ const base = normalizeBase(serverUrl)
118
+ const url = `${base}/api/test-runs/evaluator-config`
119
+ return apiRequest<EvaluatorConfig>(url, apiKey)
120
+ }
@@ -10,6 +10,7 @@
10
10
  import { callProviderLLM } from '../matchers/index.js'
11
11
  import type { TestMeasurement } from './measurement.js'
12
12
  import type { TestBenchmarks } from './test-registry.js'
13
+ import type { EvaluatorConfig } from './api-client.js'
13
14
 
14
15
  export type MetricName = 'duration_ms' | 'tokens_total' | 'output_contains' | 'output_not_contains' | 'llm_judge'
15
16
 
@@ -27,14 +28,29 @@ export interface BenchmarkResult {
27
28
  metrics: MetricResult[]
28
29
  }
29
30
 
31
+ /** Maps backend provider names to SDK provider names used by callProviderLLM. */
32
+ const PROVIDER_NAME_MAP: Record<string, string> = {
33
+ anthropic: 'claude',
34
+ moonshot: 'kimi',
35
+ }
36
+
37
+ /** Normalize provider name from backend format to SDK format. */
38
+ function normalizeSdkProvider(provider: string): string {
39
+ return PROVIDER_NAME_MAP[provider] ?? provider
40
+ }
41
+
30
42
  /**
31
43
  * Compare a measurement against benchmarks. Async because llm_judge requires
32
44
  * an LLM call. The step's output is needed for output_contains/llm_judge checks.
45
+ *
46
+ * @param evaluatorConfig - Optional backend evaluator config used as fallback
47
+ * when the test does not specify judge_provider/judge_model.
33
48
  */
34
49
  export async function compareBenchmarks(
35
50
  measurement: TestMeasurement,
36
51
  benchmarks: TestBenchmarks,
37
52
  stepOutput?: unknown,
53
+ evaluatorConfig?: EvaluatorConfig | null,
38
54
  ): Promise<BenchmarkResult> {
39
55
  const metrics: MetricResult[] = []
40
56
  let firstFailure: string | undefined
@@ -104,18 +120,46 @@ export async function compareBenchmarks(
104
120
  const outputStr = stringifyOutput(stepOutput)
105
121
  const threshold = judge.judge_score_threshold ?? 7
106
122
 
123
+ // Resolve provider/model: test definition takes priority, then backend
124
+ // evaluator config, then fall back to 'openai' default.
125
+ const resolvedProvider = normalizeSdkProvider(
126
+ judge.judge_provider ?? evaluatorConfig?.provider ?? 'openai'
127
+ )
128
+ const resolvedModel = judge.judge_model ?? evaluatorConfig?.model ?? undefined
129
+
130
+ // If the backend provided an API key and we're using its provider,
131
+ // set it in the environment so callProviderLLM can pick it up.
132
+ const envKeyMap: Record<string, string> = {
133
+ openai: 'OPENAI_API_KEY',
134
+ claude: 'ANTHROPIC_API_KEY',
135
+ gemini: 'GEMINI_API_KEY',
136
+ grok: 'GROK_API_KEY',
137
+ kimi: 'KIMI_API_KEY',
138
+ }
139
+ const envKey = envKeyMap[resolvedProvider]
140
+ let restoreEnv: (() => void) | undefined
141
+ if (evaluatorConfig?.apiKey && envKey && !judge.judge_provider && !process.env[envKey]) {
142
+ const prev = process.env[envKey]
143
+ process.env[envKey] = evaluatorConfig.apiKey
144
+ restoreEnv = () => {
145
+ if (prev === undefined) delete process.env[envKey]
146
+ else process.env[envKey] = prev
147
+ }
148
+ }
149
+
107
150
  try {
108
- const provider = judge.judge_provider ?? 'openai'
109
151
  const evalPrompt = `${judge.judge_prompt}\n\nOutput to evaluate:\n${outputStr}\n\nScore this output on a scale of 0-10. Respond with only the number.`
110
152
 
111
153
  const result = await callProviderLLM(
112
154
  evalPrompt,
113
- { provider, model: judge.judge_model ?? undefined },
155
+ { provider: resolvedProvider as 'openai' | 'claude' | 'gemini' | 'grok' | 'kimi', model: resolvedModel },
114
156
  'You are an expert test judge. Return only a number between 0 and 10.',
115
157
  16,
116
158
  0,
117
159
  )
118
160
 
161
+ restoreEnv?.()
162
+
119
163
  const score = parseFloat(result.content.match(/-?\d+(?:\.\d+)?/)?.[0] ?? '')
120
164
  if (isNaN(score)) {
121
165
  metrics.push({
@@ -142,6 +186,7 @@ export async function compareBenchmarks(
142
186
  }
143
187
  }
144
188
  } catch (err) {
189
+ restoreEnv?.()
145
190
  const errMsg = err instanceof Error ? err.message : String(err)
146
191
  metrics.push({
147
192
  name: 'llm_judge',
@@ -4,6 +4,8 @@ import { createReplayContext, installReplay, uninstallReplay, ReplayMissError }
4
4
  import { collectMeasurement } from './measurement.js'
5
5
  import { SDK_VERSION } from './trace-schema.js'
6
6
  import { compareBenchmarks } from './benchmark.js'
7
+ import { fetchEvaluatorConfig } from './api-client.js'
8
+ import type { EvaluatorConfig } from './api-client.js'
7
9
  import type { TestMeasurement } from './measurement.js'
8
10
  import type { BenchmarkResult } from './benchmark.js'
9
11
  import type { ValidatedTest } from './test-loader.js'
@@ -88,13 +90,32 @@ export async function runEdTests(options?: EdTestRunOptions): Promise<EdTestRunR
88
90
 
89
91
  const maxRuns = Math.max(1, options?.runs ?? 1)
90
92
 
93
+ // Fetch evaluator config from backend if any test uses llm_judge without
94
+ // explicit provider/model. Cached for the entire run to avoid repeated calls.
95
+ let evaluatorConfig: EvaluatorConfig | null = null
96
+ const needsEvaluatorConfig = testsToRun.some(
97
+ t => t.benchmarks.llm_judge && (!t.benchmarks.llm_judge.judge_provider || !t.benchmarks.llm_judge.judge_model)
98
+ )
99
+ if (needsEvaluatorConfig) {
100
+ const serverUrl = process.env.ELASTICDASH_API_URL ?? process.env.ELASTICDASH_SERVER ?? ''
101
+ const apiKey = process.env.ELASTICDASH_API_KEY ?? ''
102
+ if (serverUrl && apiKey) {
103
+ try {
104
+ evaluatorConfig = await fetchEvaluatorConfig(serverUrl, apiKey)
105
+ console.log(`[ed-test] Evaluator config: provider=${evaluatorConfig.provider}, model=${evaluatorConfig.model}, hasKey=${!!evaluatorConfig.apiKey}`)
106
+ } catch (err) {
107
+ console.warn(`[ed-test] Could not fetch evaluator config: ${err instanceof Error ? err.message : String(err)}`)
108
+ }
109
+ }
110
+ }
111
+
91
112
  for (const test of testsToRun) {
92
113
  const allRuns: EdSingleRunResult[] = []
93
114
  let bestResult: EdTestResult | null = null
94
115
 
95
116
  for (let attempt = 1; attempt <= maxRuns; attempt++) {
96
117
  const runStartedAt = new Date().toISOString()
97
- const result = await runSingleTest(test)
118
+ const result = await runSingleTest(test, evaluatorConfig)
98
119
  const runFinishedAt = new Date().toISOString()
99
120
 
100
121
  if (attempt > 1) {
@@ -153,7 +174,7 @@ async function resolveCustomInput(input: unknown | (() => Promise<unknown> | unk
153
174
  return typeof input === 'function' ? await (input as () => Promise<unknown> | unknown)() : input
154
175
  }
155
176
 
156
- async function runSingleTest(test: ValidatedTest): Promise<EdTestResult> {
177
+ async function runSingleTest(test: ValidatedTest, evaluatorConfig?: EvaluatorConfig | null): Promise<EdTestResult> {
157
178
  const startMs = Date.now()
158
179
  const targetStep = test.traceData.steps.find(s => s.step_id === test.target.step_id)
159
180
 
@@ -221,7 +242,7 @@ async function runSingleTest(test: ValidatedTest): Promise<EdTestResult> {
221
242
  }
222
243
 
223
244
  // Compare against benchmarks (async to support llm_judge)
224
- const benchmarkResult = await compareBenchmarks(measurement, test.benchmarks, targetStep?.output)
245
+ const benchmarkResult = await compareBenchmarks(measurement, test.benchmarks, targetStep?.output, evaluatorConfig)
225
246
 
226
247
  return {
227
248
  ...base,
@@ -240,7 +261,7 @@ async function runSingleTest(test: ValidatedTest): Promise<EdTestResult> {
240
261
  const traceMeasurement = extractMeasurementFromTrace(test)
241
262
  if (traceMeasurement) {
242
263
  console.log(` [ed-test] ${test.name}: run() failed (${err instanceof Error ? err.message : String(err)}), using trace measurement fallback`)
243
- const benchmarkResult = await compareBenchmarks(traceMeasurement, test.benchmarks, targetStep?.output)
264
+ const benchmarkResult = await compareBenchmarks(traceMeasurement, test.benchmarks, targetStep?.output, evaluatorConfig)
244
265
  return {
245
266
  ...base,
246
267
  testId: test.name,
package/src/index.ts CHANGED
@@ -120,7 +120,7 @@ export type { UploadPayload, UploadTestResult } from './ci/upload-client.js'
120
120
 
121
121
  // CI runner (programmatic API)
122
122
  export { runCI } from './ci/runner.js'
123
- export { fetchTestGroups, submitTestRun, createBatch } from './ci/api-client.js'
123
+ export { fetchTestGroups, submitTestRun, createBatch, fetchEvaluatorConfig } from './ci/api-client.js'
124
124
  export { detectGitInfo } from './ci/git-info.js'
125
125
  export type { CIRunConfig, CIRunSummary, CITestResult, CISingleRunResult, CIExpectationResult } from './ci/types.js'
126
126
  export type { GitInfo } from './ci/git-info.js'