@modular-prompt/experiment 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +71 -5
  2. package/dist/{src/evaluators/base-module.d.ts → base-evaluation-module.d.ts} +2 -2
  3. package/dist/base-evaluation-module.d.ts.map +1 -0
  4. package/dist/{src/evaluators/base-module.js → base-evaluation-module.js} +1 -1
  5. package/dist/base-evaluation-module.js.map +1 -0
  6. package/dist/cli/args.d.ts.map +1 -0
  7. package/dist/{src/cli → cli}/args.js +6 -0
  8. package/dist/cli/args.js.map +1 -0
  9. package/dist/config/dynamic-loader.d.ts.map +1 -0
  10. package/dist/{src/config → config}/dynamic-loader.js +44 -31
  11. package/dist/config/dynamic-loader.js.map +1 -0
  12. package/dist/config/loader.d.ts.map +1 -0
  13. package/dist/{src/config → config}/loader.js +5 -4
  14. package/dist/config/loader.js.map +1 -0
  15. package/dist/evaluators/index.d.ts +12 -0
  16. package/dist/evaluators/index.d.ts.map +1 -0
  17. package/dist/evaluators/index.js +16 -0
  18. package/dist/evaluators/index.js.map +1 -0
  19. package/dist/{src/evaluators/functional-correctness.d.ts → evaluators/llm-requirement-fulfillment.d.ts} +3 -3
  20. package/dist/evaluators/llm-requirement-fulfillment.d.ts.map +1 -0
  21. package/dist/{src/evaluators/functional-correctness.js → evaluators/llm-requirement-fulfillment.js} +7 -7
  22. package/dist/evaluators/llm-requirement-fulfillment.js.map +1 -0
  23. package/dist/{src/evaluators/json-validator.d.ts → evaluators/structured-output-presence.d.ts} +3 -3
  24. package/dist/evaluators/structured-output-presence.d.ts.map +1 -0
  25. package/dist/{src/evaluators/json-validator.js → evaluators/structured-output-presence.js} +6 -6
  26. package/dist/evaluators/structured-output-presence.js.map +1 -0
  27. package/dist/{src/index.d.ts → index.d.ts} +1 -1
  28. package/dist/index.d.ts.map +1 -0
  29. package/dist/{src/index.js → index.js} +1 -1
  30. package/dist/index.js.map +1 -0
  31. package/dist/logger.d.ts +9 -0
  32. package/dist/logger.d.ts.map +1 -0
  33. package/dist/logger.js +9 -0
  34. package/dist/logger.js.map +1 -0
  35. package/dist/reporter/statistics.d.ts.map +1 -0
  36. package/dist/reporter/statistics.js.map +1 -0
  37. package/dist/{src/run-comparison.d.ts → run-comparison.d.ts} +1 -0
  38. package/dist/run-comparison.d.ts.map +1 -0
  39. package/dist/{src/run-comparison.js → run-comparison.js} +27 -0
  40. package/dist/run-comparison.js.map +1 -0
  41. package/dist/runner/driver-manager.d.ts.map +1 -0
  42. package/dist/{src/runner → runner}/driver-manager.js +8 -6
  43. package/dist/runner/driver-manager.js.map +1 -0
  44. package/dist/{src/runner → runner}/evaluator.d.ts +2 -1
  45. package/dist/runner/evaluator.d.ts.map +1 -0
  46. package/dist/{src/runner → runner}/evaluator.js +15 -5
  47. package/dist/runner/evaluator.js.map +1 -0
  48. package/dist/runner/experiment.d.ts.map +1 -0
  49. package/dist/{src/runner → runner}/experiment.js +19 -18
  50. package/dist/runner/experiment.js.map +1 -0
  51. package/dist/{src/types.d.ts → types.d.ts} +26 -0
  52. package/dist/types.d.ts.map +1 -0
  53. package/dist/types.js.map +1 -0
  54. package/examples/experiment.yaml +9 -17
  55. package/package.json +5 -4
  56. package/dist/src/cli/args.d.ts.map +0 -1
  57. package/dist/src/cli/args.js.map +0 -1
  58. package/dist/src/config/dynamic-loader.d.ts.map +0 -1
  59. package/dist/src/config/dynamic-loader.js.map +0 -1
  60. package/dist/src/config/loader.d.ts.map +0 -1
  61. package/dist/src/config/loader.js.map +0 -1
  62. package/dist/src/evaluators/base-module.d.ts.map +0 -1
  63. package/dist/src/evaluators/base-module.js.map +0 -1
  64. package/dist/src/evaluators/functional-correctness.d.ts.map +0 -1
  65. package/dist/src/evaluators/functional-correctness.js.map +0 -1
  66. package/dist/src/evaluators/json-validator.d.ts.map +0 -1
  67. package/dist/src/evaluators/json-validator.js.map +0 -1
  68. package/dist/src/index.d.ts.map +0 -1
  69. package/dist/src/index.js.map +0 -1
  70. package/dist/src/reporter/statistics.d.ts.map +0 -1
  71. package/dist/src/reporter/statistics.js.map +0 -1
  72. package/dist/src/run-comparison.d.ts.map +0 -1
  73. package/dist/src/run-comparison.js.map +0 -1
  74. package/dist/src/runner/driver-manager.d.ts.map +0 -1
  75. package/dist/src/runner/driver-manager.js.map +0 -1
  76. package/dist/src/runner/evaluator.d.ts.map +0 -1
  77. package/dist/src/runner/evaluator.js.map +0 -1
  78. package/dist/src/runner/experiment.d.ts.map +0 -1
  79. package/dist/src/runner/experiment.js.map +0 -1
  80. package/dist/src/types.d.ts.map +0 -1
  81. package/dist/src/types.js.map +0 -1
  82. package/dist/tsconfig.tsbuildinfo +0 -1
  83. /package/dist/{src/cli → cli}/args.d.ts +0 -0
  84. /package/dist/{src/config → config}/dynamic-loader.d.ts +0 -0
  85. /package/dist/{src/config → config}/loader.d.ts +0 -0
  86. /package/dist/{src/reporter → reporter}/statistics.d.ts +0 -0
  87. /package/dist/{src/reporter → reporter}/statistics.js +0 -0
  88. /package/dist/{src/runner → runner}/driver-manager.d.ts +0 -0
  89. /package/dist/{src/runner → runner}/experiment.d.ts +0 -0
  90. /package/dist/{src/types.js → types.js} +0 -0
package/README.md CHANGED
@@ -69,8 +69,12 @@ testCases:
69
69
  - gemini-fast
70
70
 
71
71
  evaluators:
72
- - name: json-validator
73
- path: ./evaluators/json-validator.ts
72
+ # Built-in evaluators (name only)
73
+ - name: structured-output-presence
74
+ - name: llm-requirement-fulfillment
75
+ # Or external evaluator (with path)
76
+ - name: custom-validator
77
+ path: ./evaluators/custom-validator.ts
74
78
  # Or inline prompt evaluator
75
79
  - name: quality-check
76
80
  prompt:
@@ -124,9 +128,13 @@ export default {
124
128
  },
125
129
  ],
126
130
  evaluators: [
131
+ // Built-in evaluators (name only)
132
+ { name: 'structured-output-presence' },
133
+ { name: 'llm-requirement-fulfillment' },
134
+ // Or external evaluator (with path)
127
135
  {
128
- name: 'json-validator',
129
- path: './evaluators/json-validator.ts',
136
+ name: 'custom-validator',
137
+ path: './evaluators/custom-validator.ts',
130
138
  },
131
139
  ],
132
140
  evaluation: {
@@ -143,6 +151,9 @@ export default {
143
151
  ### 2. Run Experiment
144
152
 
145
153
  ```bash
154
+ # Validate configuration and display execution plan (recommended first step)
155
+ npx modular-experiment examples/experiment.yaml --dry-run
156
+
146
157
  # Run with YAML config
147
158
  npx modular-experiment examples/experiment.yaml
148
159
 
@@ -157,6 +168,15 @@ npx modular-experiment examples/experiment.yaml --evaluate
157
168
 
158
169
  # Run multiple times for statistics
159
170
  npx modular-experiment examples/experiment.yaml --repeat 10
171
+
172
+ # Run with detailed logging to JSONL file
173
+ npx modular-experiment examples/experiment.yaml --log-file experiment.jsonl
174
+
175
+ # Run with verbose output (show internal operations)
176
+ npx modular-experiment examples/experiment.yaml --verbose
177
+
178
+ # Combine options
179
+ npx modular-experiment examples/experiment.yaml --evaluate --log-file experiment.jsonl --verbose
160
180
  ```
161
181
 
162
182
  ## Configuration
@@ -249,6 +269,48 @@ export default {
249
269
 
250
270
  All prompt evaluators are automatically merged with the base evaluation module.
251
271
 
272
+ ## Built-in Evaluators
273
+
274
+ The framework includes built-in evaluators that can be referenced by name only (no path required):
275
+
276
+ ### structured-output-presence
277
+
278
+ - **Type**: Code Evaluator
279
+ - **What it measures**: Checks if `structuredOutput` exists and is a valid object
280
+ - **Evaluation logic**:
281
+ - Verifies presence of `structuredOutput` in query result
282
+ - Confirms it's a non-null object type
283
+ - **Score**: `(validCount / totalRuns) * 10`
284
+ - **Use case**: Verify that the model returns structured JSON output (essential for structured output workflows)
285
+ - **Usage**:
286
+ ```yaml
287
+ evaluators:
288
+ - name: "structured-output-presence"
289
+ ```
290
+
291
+ ### llm-requirement-fulfillment
292
+
293
+ - **Type**: Prompt Evaluator (uses LLM for evaluation)
294
+ - **What it measures**: Uses LLM to comprehensively evaluate whether output meets functional requirements
295
+ - **Evaluation criteria**:
296
+ 1. **Requirement Fulfillment**: Does it satisfy the intent described in the prompt?
297
+ 2. **Parameter Correctness**: Are all required parameters present and correct?
298
+ 3. **Parameter Completeness**: Are optional parameters appropriately used or omitted?
299
+ 4. **Logical Consistency**: Is the output logically consistent with the facts?
300
+ - **Score**: 0-10 overall score with detailed sub-scores for each criterion
301
+ - **Use case**: Comprehensive quality assessment of output (requires evaluation model to be configured)
302
+ - **Usage**:
303
+ ```yaml
304
+ evaluators:
305
+ - name: "llm-requirement-fulfillment"
306
+
307
+ evaluation:
308
+ enabled: true
309
+ model: "gemini-fast" # Model used for evaluation
310
+ ```
311
+
312
+ **Note**: `llm-requirement-fulfillment` requires an evaluation model to be configured in the `evaluation` section.
313
+
252
314
  ## Architecture
253
315
 
254
316
  ```
@@ -280,7 +342,8 @@ All prompt evaluators are automatically merged with the base evaluation module.
280
342
  | `runner/evaluator.ts` | Execute evaluations |
281
343
  | `runner/driver-manager.ts` | Cache and manage AI drivers |
282
344
  | `reporter/statistics.ts` | Generate statistical reports |
283
- | `evaluators/base-module.ts` | Base evaluation prompt module |
345
+ | `base-evaluation-module.ts` | Base evaluation prompt module |
346
+ | `evaluators/index.ts` | Built-in evaluator registry |
284
347
 
285
348
  ## Examples
286
349
 
@@ -338,6 +401,9 @@ Options:
338
401
  --repeat <count> Number of repetitions (default: 1)
339
402
  --evaluate Enable evaluation phase
340
403
  --evaluators <names> Comma-separated evaluator names (default: all)
404
+ --dry-run Display execution plan without running the experiment
405
+ --log-file <path> Log file path for JSONL output (detailed logs)
406
+ --verbose Enable verbose output (show detailed internal operations)
341
407
  ```
342
408
 
343
409
  **Note**: All paths specified in the config file are resolved relative to the config file's directory.
@@ -5,6 +5,6 @@
5
5
  * It defines how test data is presented to the evaluator.
6
6
  */
7
7
  import type { PromptModule } from '@modular-prompt/core';
8
- import type { EvaluationContext } from '../types.js';
8
+ import type { EvaluationContext } from './types.js';
9
9
  export declare const baseEvaluationModule: PromptModule<EvaluationContext>;
10
- //# sourceMappingURL=base-module.d.ts.map
10
+ //# sourceMappingURL=base-evaluation-module.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base-evaluation-module.d.ts","sourceRoot":"","sources":["../src/base-evaluation-module.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,YAAY,EAA4B,MAAM,sBAAsB,CAAC;AACnF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAEpD,eAAO,MAAM,oBAAoB,EAAE,YAAY,CAAC,iBAAiB,CAuGhE,CAAC"}
@@ -100,4 +100,4 @@ export const baseEvaluationModule = {
100
100
  },
101
101
  ],
102
102
  };
103
- //# sourceMappingURL=base-module.js.map
103
+ //# sourceMappingURL=base-evaluation-module.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base-evaluation-module.js","sourceRoot":"","sources":["../src/base-evaluation-module.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAKH,MAAM,CAAC,MAAM,oBAAoB,GAAoC;IACnE,aAAa,EAAE,GAAsB,EAAE,CAAC,CAAC;QACvC,UAAU,EAAE,EAAE;QACd,MAAM,EAAE,EAAE;QACV,IAAI,EAAE,EAAE;KACT,CAAC;IAEF,SAAS,EAAE;QACT,0CAA0C;QAC1C,yDAAyD;KAC1D;IAED,KAAK,EAAE;QACL,2CAA2C;QAC3C,2DAA2D;QAC3D,iDAAiD;QACjD,yCAAyC;KAC1C;IAED,YAAY,EAAE;QACZ;YACE,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,eAAe;YACtB,KAAK,EAAE;gBACL,gEAAgE;gBAChE,+BAA+B;gBAC/B,6CAA6C;gBAC7C,sDAAsD;aACvD;SACF;KACF;IAED,SAAS,EAAE;QACT;YACE,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,aAAa;YACpB,KAAK,EAAE;gBACL,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,UAAU;aACxB;SACF;QACD;YACE,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,aAAa;YACpB,KAAK,EAAE;gBACL,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;oBACR,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE,GAAG,CAAC,MAAM;iBACJ,CAAA;aAClB;SACF;KACF;IAED,MAAM,EAAE;QACN,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;YACrC,MAAM,MAAM,GAAG,GAAG,CAAC,WAAW,CAAC;YAC/B,MAAM,QAAQ,GAAqC,EAAE,CAAC;YAEtD,aAAa;YACb,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE,OAAO,GAAG,GAAG,CAAC,EAAE;aAC1B,CAAC,CAAC;YAEH,gDAAgD;YAChD,IAAI,MAAM,CAAC,gBAAgB,EAAE,CAAC;gBAC5B,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE,MAAM,CAAC,gBAAgB;iBACjC,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE,MAAM,CAAC,OAAO;iBACxB,CAAC,CAAC;YACL,CAAC;YAED,OAAO,QAAQ,CAAC;QAClB,CAAC,CAAC;KACH;IAED,MAAM,EAAE;QACN;YACE,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE;gBACP,IAAI,EAAE,QAAQ;gBACd,UAAU,EAAE;oBACV,KAAK,EAAE;wBACL,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,sBAAsB;qBACpC;oBACD,SAAS,EAAE;wBACT,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,0BAA0B;qBACxC;oBACD,OAAO,EAAE;wBACP,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,+BAA+B;qBAC7C;iBACF;gBACD,QAAQ,EAAE,CAAC,OAAO,EAAE,WAAW,CAAC;aACjC;SACF;KACF;CACF,CAAC"}
@@ -0,0 +1 @@
1
+ {"version":3,"file":"args.d.ts","sourceRoot":"","sources":["../../src/cli/args.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,aAAa,CAAC;AAE7D,wBAAgB,SAAS,IAAI,yBAAyB,CAiCrD"}
@@ -15,6 +15,9 @@ export function parseArgs() {
15
15
  .option('--repeat <count>', 'Number of repetitions', '1')
16
16
  .option('--evaluate', 'Enable AI-based evaluation of outputs', false)
17
17
  .option('--evaluators <names>', 'Comma-separated evaluator names (default: all)')
18
+ .option('--dry-run', 'Display execution plan without running the experiment', false)
19
+ .option('--log-file <path>', 'Log file path for JSONL output (detailed logs)')
20
+ .option('--verbose', 'Enable verbose output (show detailed internal operations)', false)
18
21
  .parse();
19
22
  const config = program.args[0];
20
23
  const options = program.opts();
@@ -26,6 +29,9 @@ export function parseArgs() {
26
29
  repeatCount: parseInt(options.repeat, 10),
27
30
  enableEvaluation: options.evaluate,
28
31
  evaluatorFilter: options.evaluators?.split(',').map((s) => s.trim()),
32
+ dryRun: options.dryRun,
33
+ logFile: options.logFile,
34
+ verbose: options.verbose,
29
35
  };
30
36
  }
31
37
  //# sourceMappingURL=args.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"args.js","sourceRoot":"","sources":["../../src/cli/args.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAG/B,MAAM,UAAU,SAAS;IACvB,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;IAE9B,OAAO;SACJ,IAAI,CAAC,oBAAoB,CAAC;SAC1B,WAAW,CAAC,2CAA2C,CAAC;SACxD,QAAQ,CAAC,UAAU,EAAE,oDAAoD,CAAC;SAC1E,MAAM,CAAC,oBAAoB,EAAE,uBAAuB,CAAC;SACrD,MAAM,CAAC,oBAAoB,EAAE,oDAAoD,CAAC;SAClF,MAAM,CAAC,mBAAmB,EAAE,qDAAqD,CAAC;SAClF,MAAM,CAAC,kBAAkB,EAAE,uBAAuB,EAAE,GAAG,CAAC;SACxD,MAAM,CAAC,YAAY,EAAE,uCAAuC,EAAE,KAAK,CAAC;SACpE,MAAM,CAAC,sBAAsB,EAAE,gDAAgD,CAAC;SAChF,MAAM,CAAC,WAAW,EAAE,uDAAuD,EAAE,KAAK,CAAC;SACnF,MAAM,CAAC,mBAAmB,EAAE,gDAAgD,CAAC;SAC7E,MAAM,CAAC,WAAW,EAAE,2DAA2D,EAAE,KAAK,CAAC;SACvF,KAAK,EAAE,CAAC;IAEX,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC/B,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO;QACL,UAAU,EAAE,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,CAAC;QAC1C,cAAc,EAAE,OAAO,CAAC,QAAQ;QAChC,WAAW,EAAE,OAAO,CAAC,KAAK;QAC1B,YAAY,EAAE,OAAO,CAAC,OAAO,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACtE,WAAW,EAAE,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;QACzC,gBAAgB,EAAE,OAAO,CAAC,QAAQ;QAClC,eAAe,EAAE,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5E,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,OAAO,EAAE,OAAO,CAAC,OAAO;KACzB,CAAC;AACJ,CAAC"}
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dynamic-loader.d.ts","sourceRoot":"","sources":["../../src/config/dynamic-loader.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAKH,OAAO,KAAK,EACV,kBAAkB,EAClB,aAAa,EACb,eAAe,EACf,gBAAgB,EACjB,MAAM,aAAa,CAAC;AAOrB;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,GAAG,QAAQ,CAAC;IACxB,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,eAAe,CAAC,EAAE,eAAe,CAAC;CACnC;AAED;;;;;;GAMG;AACH,wBAAsB,cAAc,CAClC,IAAI,EAAE,kBAAkB,EAAE,EAC1B,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,eAAe,EAAE,CAAC,CAqE5B;AAED;;GAEG;AACH,MAAM,MAAM,eAAe,GACvB;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAEzD;;;;;;GAMG;AACH,wBAAsB,WAAW,CAC/B,IAAI,EAAE,eAAe,EAAE,EACvB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAsB7B"}
@@ -6,7 +6,10 @@
6
6
  import { merge } from '@modular-prompt/core';
7
7
  import { pathToFileURL } from 'url';
8
8
  import { resolve } from 'path';
9
- import { baseEvaluationModule } from '../evaluators/base-module.js';
9
+ import { baseEvaluationModule } from '../base-evaluation-module.js';
10
+ import { getBuiltinEvaluator } from '../evaluators/index.js';
11
+ import { logger as baseLogger } from '../logger.js';
12
+ const logger = baseLogger.context('dynamic-loader');
10
13
  /**
11
14
  * Load evaluators from references
12
15
  *
@@ -17,43 +20,17 @@ import { baseEvaluationModule } from '../evaluators/base-module.js';
17
20
  export async function loadEvaluators(refs, basePath) {
18
21
  const evaluators = [];
19
22
  for (const ref of refs) {
23
+ let evaluator;
20
24
  if ('path' in ref) {
21
25
  // External file
22
26
  const filePath = resolve(basePath, ref.path);
23
27
  const fileUrl = pathToFileURL(filePath).href;
24
28
  const imported = await import(fileUrl);
25
- const evaluator = imported.default;
29
+ evaluator = imported.default;
26
30
  if (!evaluator) {
27
- console.warn(`⚠️ No default export in ${ref.path}`);
31
+ logger.warn(`No default export in ${ref.path}`);
28
32
  continue;
29
33
  }
30
- // Detect type by checking properties
31
- if ('evaluate' in evaluator && typeof evaluator.evaluate === 'function') {
32
- // Code evaluator
33
- evaluators.push({
34
- name: ref.name,
35
- description: ref.description || evaluator.description || '',
36
- type: 'code',
37
- codeEvaluator: evaluator,
38
- });
39
- }
40
- else if ('module' in evaluator) {
41
- // Prompt evaluator - merge with base module
42
- const mergedModule = merge(baseEvaluationModule, evaluator.module);
43
- evaluators.push({
44
- name: ref.name,
45
- description: ref.description || evaluator.description || '',
46
- type: 'prompt',
47
- promptEvaluator: {
48
- name: evaluator.name,
49
- description: evaluator.description,
50
- module: mergedModule,
51
- },
52
- });
53
- }
54
- else {
55
- console.warn(`⚠️ Unknown evaluator type in ${ref.path}`);
56
- }
57
34
  }
58
35
  else if ('prompt' in ref) {
59
36
  // Inline prompt definition - merge with base module
@@ -68,6 +45,42 @@ export async function loadEvaluators(refs, basePath) {
68
45
  module: mergedModule,
69
46
  },
70
47
  });
48
+ continue;
49
+ }
50
+ else {
51
+ // Builtin evaluator (name only)
52
+ evaluator = getBuiltinEvaluator(ref.name);
53
+ if (!evaluator) {
54
+ logger.warn(`Builtin evaluator not found: ${ref.name}`);
55
+ continue;
56
+ }
57
+ }
58
+ // Detect type by checking properties
59
+ if ('evaluate' in evaluator && typeof evaluator.evaluate === 'function') {
60
+ // Code evaluator
61
+ evaluators.push({
62
+ name: ref.name,
63
+ description: ref.description || evaluator.description || '',
64
+ type: 'code',
65
+ codeEvaluator: evaluator,
66
+ });
67
+ }
68
+ else if ('module' in evaluator) {
69
+ // Prompt evaluator - merge with base module
70
+ const mergedModule = merge(baseEvaluationModule, evaluator.module);
71
+ evaluators.push({
72
+ name: ref.name,
73
+ description: ref.description || evaluator.description || '',
74
+ type: 'prompt',
75
+ promptEvaluator: {
76
+ name: evaluator.name,
77
+ description: evaluator.description,
78
+ module: mergedModule,
79
+ },
80
+ });
81
+ }
82
+ else {
83
+ logger.warn(`Unknown evaluator type: ${ref.name}`);
71
84
  }
72
85
  }
73
86
  return evaluators;
@@ -87,7 +100,7 @@ export async function loadModules(refs, basePath) {
87
100
  const imported = await import(fileUrl);
88
101
  const module = imported.default;
89
102
  if (!module) {
90
- console.warn(`⚠️ No default export in ${ref.path}`);
103
+ logger.warn(`No default export in ${ref.path}`);
91
104
  continue;
92
105
  }
93
106
  modules.push({
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dynamic-loader.js","sourceRoot":"","sources":["../../src/config/dynamic-loader.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAO/B,OAAO,EAAE,oBAAoB,EAAE,MAAM,8BAA8B,CAAC;AACpE,OAAO,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAC;AAC7D,OAAO,EAAE,MAAM,IAAI,UAAU,EAAE,MAAM,cAAc,CAAC;AAEpD,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;AAapD;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,IAA0B,EAC1B,QAAgB;IAEhB,MAAM,UAAU,GAAsB,EAAE,CAAC;IAEzC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,IAAI,SAAsD,CAAC;QAE3D,IAAI,MAAM,IAAI,GAAG,EAAE,CAAC;YAClB,gBAAgB;YAChB,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC;YAC7C,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,CAAC;YACvC,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC;YAE7B,IAAI,CAAC,SAAS,EAAE,CAAC;gBACf,MAAM,CAAC,IAAI,CAAC,wBAAwB,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;gBAChD,SAAS;YACX,CAAC;QACH,CAAC;aAAM,IAAI,QAAQ,IAAI,GAAG,EAAE,CAAC;YAC3B,oDAAoD;YACpD,MAAM,YAAY,GAAG,KAAK,CAAC,oBAAoB,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;YAC7D,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,EAAE;gBAClC,IAAI,EAAE,QAAQ;gBACd,eAAe,EAAE;oBACf,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,EAAE;oBAClC,MAAM,EAAE,YAAY;iBACrB;aACF,CAAC,CAAC;YACH,SAAS;QACX,CAAC;aAAM,CAAC;YACN,gCAAgC;YAChC,SAAS,GAAG,mBAAmB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YAE1C,IAAI,CAAC,SAAS,EAAE,CAAC;gBACf,MAAM,CAAC,IAAI,CAAC,gCAAgC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;gBACxD,SAAS;YACX,CAAC;QACH,CAAC;QAED,qCAAqC;QACrC,IAAI,UAAU,IAAI,SAAS,IAAI,OAAO,SAAS,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;YACxE,iBAAiB;YACjB,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,SAAS,CAAC,WAAW,IAAI,EAAE;gBAC3D,IAAI,EAAE,MAAM;gBACZ,aAAa,EAAE,SAA0B;aAC1C,CAAC,CAAC;QACL,CAAC;aAAM,IAAI,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,4CAA4C;YAC5C,MAAM,YAAY,GAAG,KAAK,CAAC,oBAAoB,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC;YACnE,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,SAAS,CAAC,WAAW,IAAI,EAAE;gBAC3D,IAAI,EAAE,QAAQ;gBACd,eAAe,EAAE;oBACf,IAAI,EAAE,SAAS,CAAC,IAAI;oBACpB,WAAW,EAAE,SAAS,CAAC,WAAW;oBAClC,MAAM,EAAE,YAAY;iBACrB;aACF,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,2BAA2B,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;QACrD,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAQD;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,IAAuB,EACvB,QAAgB;IAEhB,MAAM,OAAO,GAAuB,EAAE,CAAC;IAEvC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;QAC7C,MAAM,OAAO,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC;QAC7C,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,CAAC;QACvC,MAAM,MAAM,GAAG,QAAQ,CAAC,OAAO,CAAC;QAEhC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,CAAC,IAAI,CAAC,wBAAwB,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;YAChD,SAAS;QACX,CAAC;QAED,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,MAAM,CAAC,WAAW,IAAI,EAAE;YACxD,OAAO,EAAE,MAAM,CAAC,OAAO;SACxB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -0,0 +1 @@
1
+ {"version":3,"file":"loader.d.ts","sourceRoot":"","sources":["../../src/config/loader.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH,OAAO,EAAE,SAAS,EAA0B,MAAM,wBAAwB,CAAC;AAC3E,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC3D,OAAO,KAAK,EAAE,kBAAkB,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAKhE,MAAM,WAAW,YAAY;IAC3B,YAAY,EAAE,GAAG,CAAC;IAClB,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3B,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,UAAU,EAAE,kBAAkB,EAAE,CAAC;IACjC,SAAS,EAAE,SAAS,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;CACnB;AAGD,MAAM,MAAM,gBAAgB,GAAG,YAAY,CAAC;AAsB5C;;;;;GAKG;AACH,wBAAsB,oBAAoB,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAsGpF"}
@@ -6,6 +6,8 @@ import { parse as parseYaml } from 'yaml';
6
6
  import { resolve, dirname, extname } from 'path';
7
7
  import { createJiti } from 'jiti';
8
8
  import { AIService } from '@modular-prompt/driver';
9
+ import { logger as baseLogger } from '../logger.js';
10
+ const logger = baseLogger.context('config-loader');
9
11
  /**
10
12
  * Resolve path relative to config file directory
11
13
  *
@@ -67,9 +69,6 @@ export async function loadExperimentConfig(configPath) {
67
69
  drivers: config.drivers,
68
70
  evaluation: config.evaluation,
69
71
  credentials: config.credentials,
70
- selection: config.selection,
71
- server: config.server,
72
- logging: config.logging,
73
72
  };
74
73
  // Resolve paths in driver configurations relative to config file
75
74
  if (serverConfig.drivers) {
@@ -85,7 +84,7 @@ export async function loadExperimentConfig(configPath) {
85
84
  if (serverConfig.credentials?.googleApplicationCredentials) {
86
85
  const resolvedPath = resolveConfigPath(configDir, serverConfig.credentials.googleApplicationCredentials);
87
86
  process.env.GOOGLE_APPLICATION_CREDENTIALS = resolvedPath;
88
- console.log(`Setting GOOGLE_APPLICATION_CREDENTIALS=${resolvedPath}`);
87
+ logger.verbose(`Setting GOOGLE_APPLICATION_CREDENTIALS=${resolvedPath}`);
89
88
  }
90
89
  // Validation
91
90
  if (!serverConfig.models || Object.keys(serverConfig.models).length === 0) {
@@ -104,6 +103,8 @@ export async function loadExperimentConfig(configPath) {
104
103
  }
105
104
  }
106
105
  // Initialize AIService
106
+ // Note: AIService is used only as a driver factory.
107
+ // Model selection is explicit in experiment configuration, not capability-based.
107
108
  const aiServiceConfig = {
108
109
  models: serverConfig.models,
109
110
  drivers: serverConfig.drivers || {},
@@ -0,0 +1 @@
1
+ {"version":3,"file":"loader.js","sourceRoot":"","sources":["../../src/config/loader.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAC1C,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AACjD,OAAO,EAAE,UAAU,EAAE,MAAM,MAAM,CAAC;AAClC,OAAO,EAAE,SAAS,EAA0B,MAAM,wBAAwB,CAAC;AAG3E,OAAO,EAAE,MAAM,IAAI,UAAU,EAAE,MAAM,cAAc,CAAC;AAEpD,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC;AAcnD;;;;;;GAMG;AACH,SAAS,iBAAiB,CAAC,SAAiB,EAAE,IAAY;IACxD,sDAAsD;IACtD,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,CAAC,CAAC;IACpD,CAAC;IACD,oCAAoC;IACpC,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,kDAAkD;IAClD,OAAO,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;AAClC,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,UAAkB;IAC3D,0CAA0C;IAC1C,MAAM,SAAS,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;IACtC,MAAM,GAAG,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;IAEhC,iCAAiC;IACjC,IAAI,MAAW,CAAC;IAEhB,IAAI,GAAG,KAAK,OAAO,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;QACtC,cAAc;QACd,MAAM,OAAO,GAAG,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;QAClD,MAAM,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;IAC9B,CAAC;SAAM,IAAI,GAAG,KAAK,KAAK,IAAI,GAAG,KAAK,KAAK,IAAI,GAAG,KAAK,MAAM,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;QAC9E,0DAA0D;QAC1D,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,EAAE;YACvC,cAAc,EAAE,IAAI,EAAG,mCAAmC;YAC1D,KAAK,EAAE,IAAI,EAAY,wCAAwC;YAC/D,YAAY,EAAE,KAAK,EAAI,0BAA0B;SAClD,CAAC,CAAC;QAEH,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAEvC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,0BAA0B,UAAU,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,KAAK,CAAC,qCAAqC,GAAG,4CAA4C,CAAC,CAAC;IACxG,CAAC;IAED,qBAAqB;IACrB,MAAM,OAAO,GAAsB,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC;IACxD,MAAM,SAAS,GAAe,MAAM,CAAC,SAAS,IAAI,EAAE,CAAC;IACrD,MAAM,UAAU,GAAyB,MAAM,CAAC,UAAU,IAAI,EAAE,CAAC;IAEjE,oDAAoD;IACpD,MAAM,YAAY,GAAG;QACnB,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,OAAO,EAAE,MAAM,CAAC,OAAO;QACvB,UAAU,EAAE,MAAM,CAAC,UAAU;QAC7B,WAAW,EAAE,MAAM,CAAC,WAAW;KAChC,CAAC;IAEF,iEAAiE;IACjE,IAAI,YAAY,CAAC,OAAO,EAAE,CAAC;QACzB,KAAK,MAAM,UAAU,IAAI,YAAY,CAAC,OAAO,EAAE,CAAC;YAC9C,MAAM,YAAY,GAAG,YAAY,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;YAEtD,8CAA8C;YAC9C,IAAI,YAAY,CAAC,eAAe,EAAE,CAAC;gBACjC,YAAY,CAAC,eAAe,GAAG,iBAAiB,CAAC,SAAS,EAAE,YAAY,CAAC,eAAe,CAAC,CAAC;YAC5F,CAAC;QACH,CAAC;IACH,CAAC;IAED,kDAAkD;IAClD,IAAI,YAAY,CAAC,WAAW,EAAE,4BAA4B,EAAE,CAAC;QAC3D,MAAM,YAAY,GAAG,iBAAiB,CAAC,SAAS,EAAE,YAAY,CAAC,WAAW,CAAC,4BAA4B,CAAC,CAAC;QACzG,OAAO,CAAC,GAAG,CAAC,8BAA8B,GAAG,YAAY,CAAC;QAC1D,MAAM,CAAC,OAAO,CAAC,0CAA0C,YAAY,EAAE,CAAC,CAAC;IAC3E,CAAC;IAED,aAAa;IACb,IAAI,CAAC,YAAY,CAAC,MAAM,IAAI,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1E,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;IAC3D,CAAC;IAED,mCAAmC;IACnC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAS,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC;IAErE,qCAAqC;IACrC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;YACpB,KAAK,MAAM,SAAS,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;gBACxC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;oBAC/B,MAAM,IAAI,KAAK,CAAC,eAAe,QAAQ,CAAC,IAAI,+BAA+B,SAAS,GAAG,CAAC,CAAC;gBAC3F,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,uBAAuB;IACvB,oDAAoD;IACpD,iFAAiF;IACjF,MAAM,eAAe,GAAsB;QACzC,MAAM,EAAE,YAAY,CAAC,MAAM;QAC3B,OAAO,EAAE,YAAY,CAAC,OAAO,IAAI,EAAE;QACnC,cAAc,EAAE;YACd,WAAW,EAAE,GAAG;YAChB,SAAS,EAAE,IAAI;SAChB;KACF,CAAC;IAEF,MAAM,SAAS,GAAG,IAAI,SAAS,CAAC,eAAe,CAAC,CAAC;IAEjD,OAAO;QACL,YAAY;QACZ,OAAO;QACP,SAAS;QACT,UAAU;QACV,SAAS;QACT,SAAS;KACV,CAAC;AACJ,CAAC"}
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Built-in evaluators
3
+ */
4
+ import type { CodeEvaluator, PromptEvaluator } from '../types.js';
5
+ type BuiltinEvaluator = CodeEvaluator | PromptEvaluator;
6
+ export declare const builtinEvaluators: Record<string, BuiltinEvaluator>;
7
+ /**
8
+ * Get builtin evaluator by name
9
+ */
10
+ export declare function getBuiltinEvaluator(name: string): BuiltinEvaluator | undefined;
11
+ export {};
12
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/evaluators/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,KAAK,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAElE,KAAK,gBAAgB,GAAG,aAAa,GAAG,eAAe,CAAC;AAExD,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,gBAAgB,CAG9D,CAAC;AAEF;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,gBAAgB,GAAG,SAAS,CAE9E"}
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Built-in evaluators
3
+ */
4
+ import structuredOutputPresence from './structured-output-presence.js';
5
+ import llmRequirementFulfillment from './llm-requirement-fulfillment.js';
6
+ export const builtinEvaluators = {
7
+ 'structured-output-presence': structuredOutputPresence,
8
+ 'llm-requirement-fulfillment': llmRequirementFulfillment,
9
+ };
10
+ /**
11
+ * Get builtin evaluator by name
12
+ */
13
+ export function getBuiltinEvaluator(name) {
14
+ return builtinEvaluators[name];
15
+ }
16
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/evaluators/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,wBAAwB,MAAM,iCAAiC,CAAC;AACvE,OAAO,yBAAyB,MAAM,kCAAkC,CAAC;AAKzE,MAAM,CAAC,MAAM,iBAAiB,GAAqC;IACjE,4BAA4B,EAAE,wBAAwB;IACtD,6BAA6B,EAAE,yBAAyB;CACzD,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,OAAO,iBAAiB,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC"}
@@ -1,7 +1,7 @@
1
1
  /**
2
- * Functional Correctness Evaluator
2
+ * LLM Requirement Fulfillment Evaluator
3
3
  *
4
- * Evaluates whether the output meets the functional requirements
4
+ * Uses LLM to evaluate whether the output meets the functional requirements
5
5
  */
6
6
  import type { PromptModule } from '@modular-prompt/core';
7
7
  import type { EvaluationContext } from '../types.js';
@@ -11,4 +11,4 @@ declare const _default: {
11
11
  module: PromptModule<EvaluationContext>;
12
12
  };
13
13
  export default _default;
14
- //# sourceMappingURL=functional-correctness.d.ts.map
14
+ //# sourceMappingURL=llm-requirement-fulfillment.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llm-requirement-fulfillment.d.ts","sourceRoot":"","sources":["../../src/evaluators/llm-requirement-fulfillment.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,KAAK,EAAmB,iBAAiB,EAAE,MAAM,aAAa,CAAC;;;;;;AA0FtE,wBAI4B"}
@@ -1,9 +1,9 @@
1
1
  /**
2
- * Functional Correctness Evaluator
2
+ * LLM Requirement Fulfillment Evaluator
3
3
  *
4
- * Evaluates whether the output meets the functional requirements
4
+ * Uses LLM to evaluate whether the output meets the functional requirements
5
5
  */
6
- const functionalCorrectnessModule = {
6
+ const llmRequirementFulfillmentModule = {
7
7
  createContext: () => ({
8
8
  moduleName: '',
9
9
  prompt: '',
@@ -88,8 +88,8 @@ const functionalCorrectnessModule = {
88
88
  ],
89
89
  };
90
90
  export default {
91
- name: 'Functional Correctness',
92
- description: 'Evaluates whether the output meets the functional requirements',
93
- module: functionalCorrectnessModule,
91
+ name: 'LLM Requirement Fulfillment',
92
+ description: 'Overall requirement fulfillment score based on LLM evaluation. Compares prompt and output to evaluate requirement fulfillment, parameter correctness, completeness, and logical consistency.',
93
+ module: llmRequirementFulfillmentModule,
94
94
  };
95
- //# sourceMappingURL=functional-correctness.js.map
95
+ //# sourceMappingURL=llm-requirement-fulfillment.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llm-requirement-fulfillment.js","sourceRoot":"","sources":["../../src/evaluators/llm-requirement-fulfillment.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAKH,MAAM,+BAA+B,GAAoC;IACvE,aAAa,EAAE,GAAsB,EAAE,CAAC,CAAC;QACvC,UAAU,EAAE,EAAE;QACd,MAAM,EAAE,EAAE;QACV,IAAI,EAAE,EAAE;KACT,CAAC;IAEF,SAAS,EAAE;QACT,uEAAuE;KACxE;IAED,YAAY,EAAE;QACZ,6CAA6C;QAC7C;YACE,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,qBAAqB;YAC5B,KAAK,EAAE;gBACL,qFAAqF;gBACrF,gFAAgF;gBAChF,uFAAuF;gBACvF,gFAAgF;aACjF;SACF;QACD;YACE,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,SAAS;YAChB,KAAK,EAAE;gBACL,2CAA2C;gBAC3C,iDAAiD;gBACjD,0CAA0C;aAC3C;SACF;KACF;IAED,MAAM,EAAE;QACN;YACE,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE;gBACP,IAAI,EAAE,QAAQ;gBACd,UAAU,EAAE;oBACV,KAAK,EAAE;wBACL,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,sBAAsB;qBACpC;oBACD,SAAS,EAAE;wBACT,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,uBAAuB;qBACrC;oBACD,OAAO,EAAE;wBACP,IAAI,EAAE,QAAQ;wBACd,UAAU,EAAE;4BACV,sBAAsB,EAAE;gCACtB,IAAI,EAAE,QAAQ;gCACd,UAAU,EAAE;oCACV,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;oCACzB,SAAS,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;iCAC9B;6BACF;4BACD,oBAAoB,EAAE;gCACpB,IAAI,EAAE,QAAQ;gCACd,UAAU,EAAE;oCACV,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;oCACzB,SAAS,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;iCAC9B;6BACF;4BACD,qBAAqB,EAAE;gCACrB,IAAI,EAAE,QAAQ;gCACd,UAAU,EAAE;oCACV,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;oCACzB,SAAS,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;iCAC9B;6BACF;4BACD,kBAAkB,EAAE;gCAClB,IAAI,EAAE,QAAQ;gCACd,UAAU,EAAE;oCACV,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;oCACzB,SAAS,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;iCAC9B;6BACF;yBACF;qBACF;iBACF;gBACD,QAAQ,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,SAAS,CAAC;aAC5C;SACF;KACF;CACF,CAAC;AAEF,eAAe;IACb,IAAI,EAAE,6BAA6B;IACnC,WAAW,EAAE,8LAA8L;IAC3M,MAAM,EAAE,+BAA+B;CACd,CAAC"}
@@ -1,7 +1,7 @@
1
1
  /**
2
- * JSON Validator Evaluator
2
+ * Structured Output Presence Evaluator
3
3
  *
4
- * Validates JSON structure in structured output
4
+ * Checks if structuredOutput exists and is a valid object
5
5
  */
6
6
  import type { EvaluationContext, EvaluationResult } from '../types.js';
7
7
  declare const _default: {
@@ -10,4 +10,4 @@ declare const _default: {
10
10
  evaluate(context: EvaluationContext): Promise<EvaluationResult>;
11
11
  };
12
12
  export default _default;
13
- //# sourceMappingURL=json-validator.d.ts.map
13
+ //# sourceMappingURL=structured-output-presence.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"structured-output-presence.d.ts","sourceRoot":"","sources":["../../src/evaluators/structured-output-presence.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAiB,iBAAiB,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;;;;sBAM5D,iBAAiB,GAAG,OAAO,CAAC,gBAAgB,CAAC;;AAJvE,wBAiD0B"}
@@ -1,11 +1,11 @@
1
1
  /**
2
- * JSON Validator Evaluator
2
+ * Structured Output Presence Evaluator
3
3
  *
4
- * Validates JSON structure in structured output
4
+ * Checks if structuredOutput exists and is a valid object
5
5
  */
6
6
  export default {
7
- name: 'JSON Validator',
8
- description: 'Validates JSON structure in output',
7
+ name: 'Structured Output Presence',
8
+ description: 'Measures structured output presence rate (percentage of runs with valid structuredOutput). Checks if structuredOutput exists and is an object type for each run.',
9
9
  async evaluate(context) {
10
10
  const errors = [];
11
11
  let validCount = 0;
@@ -33,7 +33,7 @@ export default {
33
33
  ? (validCount / context.runs.length) * 10
34
34
  : 0;
35
35
  return {
36
- evaluator: 'json-validator',
36
+ evaluator: 'structured-output-presence',
37
37
  moduleName: context.moduleName,
38
38
  score,
39
39
  reasoning: errors.length > 0
@@ -48,4 +48,4 @@ export default {
48
48
  };
49
49
  },
50
50
  };
51
- //# sourceMappingURL=json-validator.js.map
51
+ //# sourceMappingURL=structured-output-presence.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"structured-output-presence.js","sourceRoot":"","sources":["../../src/evaluators/structured-output-presence.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAIH,eAAe;IACb,IAAI,EAAE,4BAA4B;IAClC,WAAW,EAAE,kKAAkK;IAE/K,KAAK,CAAC,QAAQ,CAAC,OAA0B;QACvC,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,MAAM,UAAU,GAA2D,EAAE,CAAC;QAE9E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7C,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC5B,MAAM,EAAE,gBAAgB,EAAE,GAAG,GAAG,CAAC,WAAW,CAAC;YAE7C,IAAI,CAAC,gBAAgB,EAAE,CAAC;gBACtB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;gBAClD,UAAU,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,sBAAsB,EAAE,CAAC,CAAC;gBAC7E,SAAS;YACX,CAAC;YAED,wBAAwB;YACxB,IAAI,OAAO,gBAAgB,KAAK,QAAQ,IAAI,gBAAgB,KAAK,IAAI,EAAE,CAAC;gBACtE,UAAU,EAAE,CAAC;gBACb,UAAU,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;YAC/C,CAAC;iBAAM,CAAC;gBACN,MAAM,KAAK,GAAG,wBAAwB,CAAC;gBACvC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,EAAE,CAAC,CAAC;gBACtC,UAAU,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;YACvD,CAAC;QACH,CAAC;QAED,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC;YACnC,CAAC,CAAC,CAAC,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE;YACzC,CAAC,CAAC,CAAC,CAAC;QAEN,OAAO;YACL,SAAS,EAAE,4BAA4B;YACvC,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,KAAK;YACL,SAAS,EAAE,MAAM,CAAC,MAAM,GAAG,CAAC;gBAC1B,CAAC,CAAC,GAAG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,2BAA2B,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;gBACpF,CAAC,CAAC,OAAO,UAAU,oCAAoC;YACzD,OAAO,EAAE;gBACP,UAAU;gBACV,UAAU,EAAE,OAAO,CAAC,IAAI,CAAC,MAAM;gBAC/B,MAAM;gBACN,IAAI,EAAE,UAAU;aACjB;SACF,CAAC;IACJ,CAAC;CACsB,CAAC"}
@@ -6,7 +6,7 @@
6
6
  export * from './types.js';
7
7
  export { loadExperimentConfig } from './config/loader.js';
8
8
  export { loadModules, loadEvaluators } from './config/dynamic-loader.js';
9
- export { baseEvaluationModule } from './evaluators/base-module.js';
9
+ export { baseEvaluationModule } from './base-evaluation-module.js';
10
10
  export { DriverManager } from './runner/driver-manager.js';
11
11
  export { ExperimentRunner } from './runner/experiment.js';
12
12
  export { EvaluatorRunner } from './runner/evaluator.js';
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,cAAc,YAAY,CAAC;AAG3B,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAGzE,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AAGnE,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAGxD,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC"}
@@ -9,7 +9,7 @@ export * from './types.js';
9
9
  export { loadExperimentConfig } from './config/loader.js';
10
10
  export { loadModules, loadEvaluators } from './config/dynamic-loader.js';
11
11
  // Evaluators
12
- export { baseEvaluationModule } from './evaluators/base-module.js';
12
+ export { baseEvaluationModule } from './base-evaluation-module.js';
13
13
  // Runners
14
14
  export { DriverManager } from './runner/driver-manager.js';
15
15
  export { ExperimentRunner } from './runner/experiment.js';
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,QAAQ;AACR,cAAc,YAAY,CAAC;AAE3B,wBAAwB;AACxB,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAEzE,aAAa;AACb,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AAEnE,UAAU;AACV,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAExD,YAAY;AACZ,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC"}
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Experiment package logger
3
+ */
4
+ import { Logger } from '@modular-prompt/utils';
5
+ /**
6
+ * Experiment package logger with 'experiment' prefix
7
+ */
8
+ export declare const logger: Logger;
9
+ //# sourceMappingURL=logger.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAE/C;;GAEG;AACH,eAAO,MAAM,MAAM,QAAwD,CAAC"}
package/dist/logger.js ADDED
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Experiment package logger
3
+ */
4
+ import { Logger } from '@modular-prompt/utils';
5
+ /**
6
+ * Experiment package logger with 'experiment' prefix
7
+ */
8
+ export const logger = new Logger({ prefix: 'experiment', context: 'main' });
9
+ //# sourceMappingURL=logger.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"logger.js","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAE/C;;GAEG;AACH,MAAM,CAAC,MAAM,MAAM,GAAG,IAAI,MAAM,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC"}
@@ -0,0 +1 @@
1
+ {"version":3,"file":"statistics.d.ts","sourceRoot":"","sources":["../../src/reporter/statistics.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAa,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzD,qBAAa,kBAAkB;IACjB,OAAO,CAAC,OAAO;gBAAP,OAAO,EAAE,UAAU,EAAE;IAEzC;;OAEG;IACH,MAAM,IAAI,IAAI;IA2Bd;;OAEG;IACH,OAAO,CAAC,YAAY;IASpB;;OAEG;IACH,OAAO,CAAC,iBAAiB;CAwB1B"}
@@ -0,0 +1 @@
1
+ {"version":3,"file":"statistics.js","sourceRoot":"","sources":["../../src/reporter/statistics.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,MAAM,OAAO,kBAAkB;IACT;IAApB,YAAoB,OAAqB;QAArB,YAAO,GAAP,OAAO,CAAc;IAAG,CAAC;IAE7C;;OAEG;IACH,MAAM;QACJ,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACrC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,EAAE,CAAC;QAEd,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAClC,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,QAAQ,MAAM,MAAM,CAAC,KAAK,OAAO,MAAM,CAAC,MAAM,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;YACvF,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;YAE5B,MAAM,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACvD,MAAM,WAAW,GAAG,CAAC,WAAW,CAAC,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC;YAEpE,OAAO,CAAC,GAAG,CAAC,iBAAiB,WAAW,CAAC,MAAM,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAEtG,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC;gBAC/B,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,CAAC;YACtC,CAAC;YAED,OAAO,CAAC,GAAG,EAAE,CAAC;QAChB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAC9B,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,IAAiB;QACpC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QACvC,MAAM,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;QAC5D,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC;QAC/B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC;QAE/B,OAAO,CAAC,GAAG,CAAC,uBAAuB,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,GAAG,WAAW,GAAG,IAAI,CAAC,CAAC;IACrF,CAAC;IAED;;OAEG;IACK,iBAAiB,CAAC,IAAiB;QACzC,2BAA2B;QAC3B,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;YAC/B,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;YAC7D,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QACxC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;QAE3B,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC7B,OAAO;QACT,CAAC;QAED,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAC;QAC3C,OAAO,CAAC,GAAG,CAAC,uBAAuB,aAAa,CAAC,IAAI,0BAA0B,WAAW,CAAC,MAAM,SAAS,CAAC,CAAC;QAE5G,IAAI,aAAa,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC7B,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC;QAC7C,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;YACjC,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE;gBAChD,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;gBAC3D,OAAO,CAAC,GAAG,CAAC,cAAc,GAAG,GAAG,CAAC,KAAK,KAAK,OAAO,MAAM,EAAE,CAAC,CAAC;YAC9D,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;CACF"}
@@ -17,6 +17,7 @@
17
17
  * --repeat <count> Number of repetitions (default: 1)
18
18
  * --evaluate Enable evaluation phase
19
19
  * --evaluators <names> Comma-separated evaluator names (default: all)
20
+ * --dry-run Display execution plan without running the experiment
20
21
  */
21
22
  export {};
22
23
  //# sourceMappingURL=run-comparison.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"run-comparison.d.ts","sourceRoot":"","sources":["../src/run-comparison.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;;;;;;;;GAmBG"}