@modular-prompt/experiment 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -5
- package/dist/{src/evaluators/base-module.d.ts → base-evaluation-module.d.ts} +2 -2
- package/dist/base-evaluation-module.d.ts.map +1 -0
- package/dist/{src/evaluators/base-module.js → base-evaluation-module.js} +1 -1
- package/dist/base-evaluation-module.js.map +1 -0
- package/dist/cli/args.d.ts.map +1 -0
- package/dist/{src/cli → cli}/args.js +6 -0
- package/dist/cli/args.js.map +1 -0
- package/dist/config/dynamic-loader.d.ts.map +1 -0
- package/dist/{src/config → config}/dynamic-loader.js +44 -31
- package/dist/config/dynamic-loader.js.map +1 -0
- package/dist/config/loader.d.ts.map +1 -0
- package/dist/{src/config → config}/loader.js +5 -4
- package/dist/config/loader.js.map +1 -0
- package/dist/evaluators/index.d.ts +12 -0
- package/dist/evaluators/index.d.ts.map +1 -0
- package/dist/evaluators/index.js +16 -0
- package/dist/evaluators/index.js.map +1 -0
- package/dist/{src/evaluators/functional-correctness.d.ts → evaluators/llm-requirement-fulfillment.d.ts} +3 -3
- package/dist/evaluators/llm-requirement-fulfillment.d.ts.map +1 -0
- package/dist/{src/evaluators/functional-correctness.js → evaluators/llm-requirement-fulfillment.js} +7 -7
- package/dist/evaluators/llm-requirement-fulfillment.js.map +1 -0
- package/dist/{src/evaluators/json-validator.d.ts → evaluators/structured-output-presence.d.ts} +3 -3
- package/dist/evaluators/structured-output-presence.d.ts.map +1 -0
- package/dist/{src/evaluators/json-validator.js → evaluators/structured-output-presence.js} +6 -6
- package/dist/evaluators/structured-output-presence.js.map +1 -0
- package/dist/{src/index.d.ts → index.d.ts} +1 -1
- package/dist/index.d.ts.map +1 -0
- package/dist/{src/index.js → index.js} +1 -1
- package/dist/index.js.map +1 -0
- package/dist/logger.d.ts +9 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +9 -0
- package/dist/logger.js.map +1 -0
- package/dist/reporter/statistics.d.ts.map +1 -0
- package/dist/reporter/statistics.js.map +1 -0
- package/dist/{src/run-comparison.d.ts → run-comparison.d.ts} +1 -0
- package/dist/run-comparison.d.ts.map +1 -0
- package/dist/{src/run-comparison.js → run-comparison.js} +27 -0
- package/dist/run-comparison.js.map +1 -0
- package/dist/runner/driver-manager.d.ts.map +1 -0
- package/dist/{src/runner → runner}/driver-manager.js +8 -6
- package/dist/runner/driver-manager.js.map +1 -0
- package/dist/{src/runner → runner}/evaluator.d.ts +2 -1
- package/dist/runner/evaluator.d.ts.map +1 -0
- package/dist/{src/runner → runner}/evaluator.js +15 -5
- package/dist/runner/evaluator.js.map +1 -0
- package/dist/runner/experiment.d.ts.map +1 -0
- package/dist/{src/runner → runner}/experiment.js +19 -18
- package/dist/runner/experiment.js.map +1 -0
- package/dist/{src/types.d.ts → types.d.ts} +26 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js.map +1 -0
- package/examples/experiment.yaml +9 -17
- package/package.json +5 -4
- package/dist/src/cli/args.d.ts.map +0 -1
- package/dist/src/cli/args.js.map +0 -1
- package/dist/src/config/dynamic-loader.d.ts.map +0 -1
- package/dist/src/config/dynamic-loader.js.map +0 -1
- package/dist/src/config/loader.d.ts.map +0 -1
- package/dist/src/config/loader.js.map +0 -1
- package/dist/src/evaluators/base-module.d.ts.map +0 -1
- package/dist/src/evaluators/base-module.js.map +0 -1
- package/dist/src/evaluators/functional-correctness.d.ts.map +0 -1
- package/dist/src/evaluators/functional-correctness.js.map +0 -1
- package/dist/src/evaluators/json-validator.d.ts.map +0 -1
- package/dist/src/evaluators/json-validator.js.map +0 -1
- package/dist/src/index.d.ts.map +0 -1
- package/dist/src/index.js.map +0 -1
- package/dist/src/reporter/statistics.d.ts.map +0 -1
- package/dist/src/reporter/statistics.js.map +0 -1
- package/dist/src/run-comparison.d.ts.map +0 -1
- package/dist/src/run-comparison.js.map +0 -1
- package/dist/src/runner/driver-manager.d.ts.map +0 -1
- package/dist/src/runner/driver-manager.js.map +0 -1
- package/dist/src/runner/evaluator.d.ts.map +0 -1
- package/dist/src/runner/evaluator.js.map +0 -1
- package/dist/src/runner/experiment.d.ts.map +0 -1
- package/dist/src/runner/experiment.js.map +0 -1
- package/dist/src/types.d.ts.map +0 -1
- package/dist/src/types.js.map +0 -1
- package/dist/tsconfig.tsbuildinfo +0 -1
- /package/dist/{src/cli → cli}/args.d.ts +0 -0
- /package/dist/{src/config → config}/dynamic-loader.d.ts +0 -0
- /package/dist/{src/config → config}/loader.d.ts +0 -0
- /package/dist/{src/reporter → reporter}/statistics.d.ts +0 -0
- /package/dist/{src/reporter → reporter}/statistics.js +0 -0
- /package/dist/{src/runner → runner}/driver-manager.d.ts +0 -0
- /package/dist/{src/runner → runner}/experiment.d.ts +0 -0
- /package/dist/{src/types.js → types.js} +0 -0
package/README.md
CHANGED
|
@@ -69,8 +69,12 @@ testCases:
|
|
|
69
69
|
- gemini-fast
|
|
70
70
|
|
|
71
71
|
evaluators:
|
|
72
|
-
- name
|
|
73
|
-
|
|
72
|
+
# Built-in evaluators (name only)
|
|
73
|
+
- name: structured-output-presence
|
|
74
|
+
- name: llm-requirement-fulfillment
|
|
75
|
+
# Or external evaluator (with path)
|
|
76
|
+
- name: custom-validator
|
|
77
|
+
path: ./evaluators/custom-validator.ts
|
|
74
78
|
# Or inline prompt evaluator
|
|
75
79
|
- name: quality-check
|
|
76
80
|
prompt:
|
|
@@ -124,9 +128,13 @@ export default {
|
|
|
124
128
|
},
|
|
125
129
|
],
|
|
126
130
|
evaluators: [
|
|
131
|
+
// Built-in evaluators (name only)
|
|
132
|
+
{ name: 'structured-output-presence' },
|
|
133
|
+
{ name: 'llm-requirement-fulfillment' },
|
|
134
|
+
// Or external evaluator (with path)
|
|
127
135
|
{
|
|
128
|
-
name: '
|
|
129
|
-
path: './evaluators/
|
|
136
|
+
name: 'custom-validator',
|
|
137
|
+
path: './evaluators/custom-validator.ts',
|
|
130
138
|
},
|
|
131
139
|
],
|
|
132
140
|
evaluation: {
|
|
@@ -143,6 +151,9 @@ export default {
|
|
|
143
151
|
### 2. Run Experiment
|
|
144
152
|
|
|
145
153
|
```bash
|
|
154
|
+
# Validate configuration and display execution plan (recommended first step)
|
|
155
|
+
npx modular-experiment examples/experiment.yaml --dry-run
|
|
156
|
+
|
|
146
157
|
# Run with YAML config
|
|
147
158
|
npx modular-experiment examples/experiment.yaml
|
|
148
159
|
|
|
@@ -157,6 +168,15 @@ npx modular-experiment examples/experiment.yaml --evaluate
|
|
|
157
168
|
|
|
158
169
|
# Run multiple times for statistics
|
|
159
170
|
npx modular-experiment examples/experiment.yaml --repeat 10
|
|
171
|
+
|
|
172
|
+
# Run with detailed logging to JSONL file
|
|
173
|
+
npx modular-experiment examples/experiment.yaml --log-file experiment.jsonl
|
|
174
|
+
|
|
175
|
+
# Run with verbose output (show internal operations)
|
|
176
|
+
npx modular-experiment examples/experiment.yaml --verbose
|
|
177
|
+
|
|
178
|
+
# Combine options
|
|
179
|
+
npx modular-experiment examples/experiment.yaml --evaluate --log-file experiment.jsonl --verbose
|
|
160
180
|
```
|
|
161
181
|
|
|
162
182
|
## Configuration
|
|
@@ -249,6 +269,48 @@ export default {
|
|
|
249
269
|
|
|
250
270
|
All prompt evaluators are automatically merged with the base evaluation module.
|
|
251
271
|
|
|
272
|
+
## Built-in Evaluators
|
|
273
|
+
|
|
274
|
+
The framework includes built-in evaluators that can be referenced by name only (no path required):
|
|
275
|
+
|
|
276
|
+
### structured-output-presence
|
|
277
|
+
|
|
278
|
+
- **Type**: Code Evaluator
|
|
279
|
+
- **What it measures**: Checks if `structuredOutput` exists and is a valid object
|
|
280
|
+
- **Evaluation logic**:
|
|
281
|
+
- Verifies presence of `structuredOutput` in query result
|
|
282
|
+
- Confirms it's a non-null object type
|
|
283
|
+
- **Score**: `(validCount / totalRuns) * 10`
|
|
284
|
+
- **Use case**: Verify that the model returns structured JSON output (essential for structured output workflows)
|
|
285
|
+
- **Usage**:
|
|
286
|
+
```yaml
|
|
287
|
+
evaluators:
|
|
288
|
+
- name: "structured-output-presence"
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
### llm-requirement-fulfillment
|
|
292
|
+
|
|
293
|
+
- **Type**: Prompt Evaluator (uses LLM for evaluation)
|
|
294
|
+
- **What it measures**: Uses LLM to comprehensively evaluate whether output meets functional requirements
|
|
295
|
+
- **Evaluation criteria**:
|
|
296
|
+
1. **Requirement Fulfillment**: Does it satisfy the intent described in the prompt?
|
|
297
|
+
2. **Parameter Correctness**: Are all required parameters present and correct?
|
|
298
|
+
3. **Parameter Completeness**: Are optional parameters appropriately used or omitted?
|
|
299
|
+
4. **Logical Consistency**: Is the output logically consistent with the facts?
|
|
300
|
+
- **Score**: 0-10 overall score with detailed sub-scores for each criterion
|
|
301
|
+
- **Use case**: Comprehensive quality assessment of output (requires evaluation model to be configured)
|
|
302
|
+
- **Usage**:
|
|
303
|
+
```yaml
|
|
304
|
+
evaluators:
|
|
305
|
+
- name: "llm-requirement-fulfillment"
|
|
306
|
+
|
|
307
|
+
evaluation:
|
|
308
|
+
enabled: true
|
|
309
|
+
model: "gemini-fast" # Model used for evaluation
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
**Note**: `llm-requirement-fulfillment` requires an evaluation model to be configured in the `evaluation` section.
|
|
313
|
+
|
|
252
314
|
## Architecture
|
|
253
315
|
|
|
254
316
|
```
|
|
@@ -280,7 +342,8 @@ All prompt evaluators are automatically merged with the base evaluation module.
|
|
|
280
342
|
| `runner/evaluator.ts` | Execute evaluations |
|
|
281
343
|
| `runner/driver-manager.ts` | Cache and manage AI drivers |
|
|
282
344
|
| `reporter/statistics.ts` | Generate statistical reports |
|
|
283
|
-
| `
|
|
345
|
+
| `base-evaluation-module.ts` | Base evaluation prompt module |
|
|
346
|
+
| `evaluators/index.ts` | Built-in evaluator registry |
|
|
284
347
|
|
|
285
348
|
## Examples
|
|
286
349
|
|
|
@@ -338,6 +401,9 @@ Options:
|
|
|
338
401
|
--repeat <count> Number of repetitions (default: 1)
|
|
339
402
|
--evaluate Enable evaluation phase
|
|
340
403
|
--evaluators <names> Comma-separated evaluator names (default: all)
|
|
404
|
+
--dry-run Display execution plan without running the experiment
|
|
405
|
+
--log-file <path> Log file path for JSONL output (detailed logs)
|
|
406
|
+
--verbose Enable verbose output (show detailed internal operations)
|
|
341
407
|
```
|
|
342
408
|
|
|
343
409
|
**Note**: All paths specified in the config file are resolved relative to the config file's directory.
|
|
@@ -5,6 +5,6 @@
|
|
|
5
5
|
* It defines how test data is presented to the evaluator.
|
|
6
6
|
*/
|
|
7
7
|
import type { PromptModule } from '@modular-prompt/core';
|
|
8
|
-
import type { EvaluationContext } from '
|
|
8
|
+
import type { EvaluationContext } from './types.js';
|
|
9
9
|
export declare const baseEvaluationModule: PromptModule<EvaluationContext>;
|
|
10
|
-
//# sourceMappingURL=base-module.d.ts.map
|
|
10
|
+
//# sourceMappingURL=base-evaluation-module.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"base-evaluation-module.d.ts","sourceRoot":"","sources":["../src/base-evaluation-module.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,YAAY,EAA4B,MAAM,sBAAsB,CAAC;AACnF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAEpD,eAAO,MAAM,oBAAoB,EAAE,YAAY,CAAC,iBAAiB,CAuGhE,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"base-evaluation-module.js","sourceRoot":"","sources":["../src/base-evaluation-module.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAKH,MAAM,CAAC,MAAM,oBAAoB,GAAoC;IACnE,aAAa,EAAE,GAAsB,EAAE,CAAC,CAAC;QACvC,UAAU,EAAE,EAAE;QACd,MAAM,EAAE,EAAE;QACV,IAAI,EAAE,EAAE;KACT,CAAC;IAEF,SAAS,EAAE;QACT,0CAA0C;QAC1C,yDAAyD;KAC1D;IAED,KAAK,EAAE;QACL,2CAA2C;QAC3C,2DAA2D;QAC3D,iDAAiD;QACjD,yCAAyC;KAC1C;IAED,YAAY,EAAE;QACZ;YACE,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,eAAe;YACtB,KAAK,EAAE;gBACL,gEAAgE;gBAChE,+BAA+B;gBAC/B,6CAA6C;gBAC7C,sDAAsD;aACvD;SACF;KACF;IAED,SAAS,EAAE;QACT;YACE,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,aAAa;YACpB,KAAK,EAAE;gBACL,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,UAAU;aACxB;SACF;QACD;YACE,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,aAAa;YACpB,KAAK,EAAE;gBACL,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;oBACR,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE,GAAG,CAAC,MAAM;iBACJ,CAAA;aAClB;SACF;KACF;IAED,MAAM,EAAE;QACN,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;YACrC,MAAM,MAAM,GAAG,GAAG,CAAC,WAAW,CAAC;YAC/B,MAAM,QAAQ,GAAqC,EAAE,CAAC;YAEtD,aAAa;YACb,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE,OAAO,GAAG,GAAG,CAAC,EAAE;aAC1B,CAAC,CAAC;YAEH,gDAAgD;YAChD,IAAI,MAAM,CAAC,gBAAgB,EAAE,CAAC;gBAC5B,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE,MAAM,CAAC,gBAAgB;iBACjC,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE,MAAM,CAAC,OAAO;iBACxB,CAAC,CAAC;YACL,CAAC;YAED,OAAO,QAAQ,CAAC;QAClB,CAAC,CAAC;KACH;IAED,MAAM,EAAE;QACN;YACE,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE;gBACP,IAAI,EAAE,QAAQ;gBACd,UAAU,EAAE;oBACV,KAAK,EAAE;wBACL,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,sBAAsB;qBACpC;oBACD,SAAS,EAAE;wBACT,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,0BAA0B;qBACxC;oBACD,OAAO,EAAE;wBACP,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,+BAA+B;qBAC7C;iBACF;gBACD,QAAQ,EAAE,CAAC,OAAO,EAAE,WAAW,CAAC;aACjC;SACF;KACF;CACF,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"args.d.ts","sourceRoot":"","sources":["../../src/cli/args.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,aAAa,CAAC;AAE7D,wBAAgB,SAAS,IAAI,yBAAyB,CAiCrD"}
|
|
@@ -15,6 +15,9 @@ export function parseArgs() {
|
|
|
15
15
|
.option('--repeat <count>', 'Number of repetitions', '1')
|
|
16
16
|
.option('--evaluate', 'Enable AI-based evaluation of outputs', false)
|
|
17
17
|
.option('--evaluators <names>', 'Comma-separated evaluator names (default: all)')
|
|
18
|
+
.option('--dry-run', 'Display execution plan without running the experiment', false)
|
|
19
|
+
.option('--log-file <path>', 'Log file path for JSONL output (detailed logs)')
|
|
20
|
+
.option('--verbose', 'Enable verbose output (show detailed internal operations)', false)
|
|
18
21
|
.parse();
|
|
19
22
|
const config = program.args[0];
|
|
20
23
|
const options = program.opts();
|
|
@@ -26,6 +29,9 @@ export function parseArgs() {
|
|
|
26
29
|
repeatCount: parseInt(options.repeat, 10),
|
|
27
30
|
enableEvaluation: options.evaluate,
|
|
28
31
|
evaluatorFilter: options.evaluators?.split(',').map((s) => s.trim()),
|
|
32
|
+
dryRun: options.dryRun,
|
|
33
|
+
logFile: options.logFile,
|
|
34
|
+
verbose: options.verbose,
|
|
29
35
|
};
|
|
30
36
|
}
|
|
31
37
|
//# sourceMappingURL=args.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"args.js","sourceRoot":"","sources":["../../src/cli/args.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAG/B,MAAM,UAAU,SAAS;IACvB,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;IAE9B,OAAO;SACJ,IAAI,CAAC,oBAAoB,CAAC;SAC1B,WAAW,CAAC,2CAA2C,CAAC;SACxD,QAAQ,CAAC,UAAU,EAAE,oDAAoD,CAAC;SAC1E,MAAM,CAAC,oBAAoB,EAAE,uBAAuB,CAAC;SACrD,MAAM,CAAC,oBAAoB,EAAE,oDAAoD,CAAC;SAClF,MAAM,CAAC,mBAAmB,EAAE,qDAAqD,CAAC;SAClF,MAAM,CAAC,kBAAkB,EAAE,uBAAuB,EAAE,GAAG,CAAC;SACxD,MAAM,CAAC,YAAY,EAAE,uCAAuC,EAAE,KAAK,CAAC;SACpE,MAAM,CAAC,sBAAsB,EAAE,gDAAgD,CAAC;SAChF,MAAM,CAAC,WAAW,EAAE,uDAAuD,EAAE,KAAK,CAAC;SACnF,MAAM,CAAC,mBAAmB,EAAE,gDAAgD,CAAC;SAC7E,MAAM,CAAC,WAAW,EAAE,2DAA2D,EAAE,KAAK,CAAC;SACvF,KAAK,EAAE,CAAC;IAEX,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC/B,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO;QACL,UAAU,EAAE,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,CAAC;QAC1C,cAAc,EAAE,OAAO,CAAC,QAAQ;QAChC,WAAW,EAAE,OAAO,CAAC,KAAK;QAC1B,YAAY,EAAE,OAAO,CAAC,OAAO,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACtE,WAAW,EAAE,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;QACzC,gBAAgB,EAAE,OAAO,CAAC,QAAQ;QAClC,eAAe,EAAE,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5E,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,OAAO,EAAE,OAAO,CAAC,OAAO;KACzB,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dynamic-loader.d.ts","sourceRoot":"","sources":["../../src/config/dynamic-loader.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAKH,OAAO,KAAK,EACV,kBAAkB,EAClB,aAAa,EACb,eAAe,EACf,gBAAgB,EACjB,MAAM,aAAa,CAAC;AAOrB;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,GAAG,QAAQ,CAAC;IACxB,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,eAAe,CAAC,EAAE,eAAe,CAAC;CACnC;AAED;;;;;;GAMG;AACH,wBAAsB,cAAc,CAClC,IAAI,EAAE,kBAAkB,EAAE,EAC1B,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,eAAe,EAAE,CAAC,CAqE5B;AAED;;GAEG;AACH,MAAM,MAAM,eAAe,GACvB;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAEzD;;;;;;GAMG;AACH,wBAAsB,WAAW,CAC/B,IAAI,EAAE,eAAe,EAAE,EACvB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAsB7B"}
|
|
@@ -6,7 +6,10 @@
|
|
|
6
6
|
import { merge } from '@modular-prompt/core';
|
|
7
7
|
import { pathToFileURL } from 'url';
|
|
8
8
|
import { resolve } from 'path';
|
|
9
|
-
import { baseEvaluationModule } from '../
|
|
9
|
+
import { baseEvaluationModule } from '../base-evaluation-module.js';
|
|
10
|
+
import { getBuiltinEvaluator } from '../evaluators/index.js';
|
|
11
|
+
import { logger as baseLogger } from '../logger.js';
|
|
12
|
+
const logger = baseLogger.context('dynamic-loader');
|
|
10
13
|
/**
|
|
11
14
|
* Load evaluators from references
|
|
12
15
|
*
|
|
@@ -17,43 +20,17 @@ import { baseEvaluationModule } from '../evaluators/base-module.js';
|
|
|
17
20
|
export async function loadEvaluators(refs, basePath) {
|
|
18
21
|
const evaluators = [];
|
|
19
22
|
for (const ref of refs) {
|
|
23
|
+
let evaluator;
|
|
20
24
|
if ('path' in ref) {
|
|
21
25
|
// External file
|
|
22
26
|
const filePath = resolve(basePath, ref.path);
|
|
23
27
|
const fileUrl = pathToFileURL(filePath).href;
|
|
24
28
|
const imported = await import(fileUrl);
|
|
25
|
-
|
|
29
|
+
evaluator = imported.default;
|
|
26
30
|
if (!evaluator) {
|
|
27
|
-
|
|
31
|
+
logger.warn(`No default export in ${ref.path}`);
|
|
28
32
|
continue;
|
|
29
33
|
}
|
|
30
|
-
// Detect type by checking properties
|
|
31
|
-
if ('evaluate' in evaluator && typeof evaluator.evaluate === 'function') {
|
|
32
|
-
// Code evaluator
|
|
33
|
-
evaluators.push({
|
|
34
|
-
name: ref.name,
|
|
35
|
-
description: ref.description || evaluator.description || '',
|
|
36
|
-
type: 'code',
|
|
37
|
-
codeEvaluator: evaluator,
|
|
38
|
-
});
|
|
39
|
-
}
|
|
40
|
-
else if ('module' in evaluator) {
|
|
41
|
-
// Prompt evaluator - merge with base module
|
|
42
|
-
const mergedModule = merge(baseEvaluationModule, evaluator.module);
|
|
43
|
-
evaluators.push({
|
|
44
|
-
name: ref.name,
|
|
45
|
-
description: ref.description || evaluator.description || '',
|
|
46
|
-
type: 'prompt',
|
|
47
|
-
promptEvaluator: {
|
|
48
|
-
name: evaluator.name,
|
|
49
|
-
description: evaluator.description,
|
|
50
|
-
module: mergedModule,
|
|
51
|
-
},
|
|
52
|
-
});
|
|
53
|
-
}
|
|
54
|
-
else {
|
|
55
|
-
console.warn(`⚠️ Unknown evaluator type in ${ref.path}`);
|
|
56
|
-
}
|
|
57
34
|
}
|
|
58
35
|
else if ('prompt' in ref) {
|
|
59
36
|
// Inline prompt definition - merge with base module
|
|
@@ -68,6 +45,42 @@ export async function loadEvaluators(refs, basePath) {
|
|
|
68
45
|
module: mergedModule,
|
|
69
46
|
},
|
|
70
47
|
});
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
// Builtin evaluator (name only)
|
|
52
|
+
evaluator = getBuiltinEvaluator(ref.name);
|
|
53
|
+
if (!evaluator) {
|
|
54
|
+
logger.warn(`Builtin evaluator not found: ${ref.name}`);
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
// Detect type by checking properties
|
|
59
|
+
if ('evaluate' in evaluator && typeof evaluator.evaluate === 'function') {
|
|
60
|
+
// Code evaluator
|
|
61
|
+
evaluators.push({
|
|
62
|
+
name: ref.name,
|
|
63
|
+
description: ref.description || evaluator.description || '',
|
|
64
|
+
type: 'code',
|
|
65
|
+
codeEvaluator: evaluator,
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
else if ('module' in evaluator) {
|
|
69
|
+
// Prompt evaluator - merge with base module
|
|
70
|
+
const mergedModule = merge(baseEvaluationModule, evaluator.module);
|
|
71
|
+
evaluators.push({
|
|
72
|
+
name: ref.name,
|
|
73
|
+
description: ref.description || evaluator.description || '',
|
|
74
|
+
type: 'prompt',
|
|
75
|
+
promptEvaluator: {
|
|
76
|
+
name: evaluator.name,
|
|
77
|
+
description: evaluator.description,
|
|
78
|
+
module: mergedModule,
|
|
79
|
+
},
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
else {
|
|
83
|
+
logger.warn(`Unknown evaluator type: ${ref.name}`);
|
|
71
84
|
}
|
|
72
85
|
}
|
|
73
86
|
return evaluators;
|
|
@@ -87,7 +100,7 @@ export async function loadModules(refs, basePath) {
|
|
|
87
100
|
const imported = await import(fileUrl);
|
|
88
101
|
const module = imported.default;
|
|
89
102
|
if (!module) {
|
|
90
|
-
|
|
103
|
+
logger.warn(`No default export in ${ref.path}`);
|
|
91
104
|
continue;
|
|
92
105
|
}
|
|
93
106
|
modules.push({
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dynamic-loader.js","sourceRoot":"","sources":["../../src/config/dynamic-loader.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAO/B,OAAO,EAAE,oBAAoB,EAAE,MAAM,8BAA8B,CAAC;AACpE,OAAO,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAC;AAC7D,OAAO,EAAE,MAAM,IAAI,UAAU,EAAE,MAAM,cAAc,CAAC;AAEpD,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;AAapD;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,IAA0B,EAC1B,QAAgB;IAEhB,MAAM,UAAU,GAAsB,EAAE,CAAC;IAEzC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,IAAI,SAAsD,CAAC;QAE3D,IAAI,MAAM,IAAI,GAAG,EAAE,CAAC;YAClB,gBAAgB;YAChB,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC;YAC7C,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,CAAC;YACvC,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC;YAE7B,IAAI,CAAC,SAAS,EAAE,CAAC;gBACf,MAAM,CAAC,IAAI,CAAC,wBAAwB,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;gBAChD,SAAS;YACX,CAAC;QACH,CAAC;aAAM,IAAI,QAAQ,IAAI,GAAG,EAAE,CAAC;YAC3B,oDAAoD;YACpD,MAAM,YAAY,GAAG,KAAK,CAAC,oBAAoB,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;YAC7D,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,EAAE;gBAClC,IAAI,EAAE,QAAQ;gBACd,eAAe,EAAE;oBACf,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,EAAE;oBAClC,MAAM,EAAE,YAAY;iBACrB;aACF,CAAC,CAAC;YACH,SAAS;QACX,CAAC;aAAM,CAAC;YACN,gCAAgC;YAChC,SAAS,GAAG,mBAAmB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YAE1C,IAAI,CAAC,SAAS,EAAE,CAAC;gBACf,MAAM,CAAC,IAAI,CAAC,gCAAgC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;gBACxD,SAAS;YACX,CAAC;QACH,CAAC;QAED,qCAAqC;QACrC,IAAI,UAAU,IAAI,SAAS,IAAI,OAAO,SAAS,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;YACxE,iBAAiB;YACjB,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,SAAS,CAAC,WAAW,IAAI,EAAE;gBAC3D,IAAI,EAAE,MAAM;gBACZ,aAAa,EAAE,SAA0B;aAC1C,CAAC,CAAC;QACL,CAAC;aAAM,IAAI,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,4CAA4C;YAC5C,MAAM,YAAY,GAAG,KAAK,CAAC,oBAAoB,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC;YACnE,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,SAAS,CAAC,WAAW,IAAI,EAAE;gBAC3D,IAAI,EAAE,QAAQ;gBACd,eAAe,EAAE;oBACf,IAAI,EAAE,SAAS,CAAC,IAAI;oBACpB,WAAW,EAAE,SAAS,CAAC,WAAW;oBAClC,MAAM,EAAE,YAAY;iBACrB;aACF,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,2BAA2B,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;QACrD,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAQD;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,IAAuB,EACvB,QAAgB;IAEhB,MAAM,OAAO,GAAuB,EAAE,CAAC;IAEvC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;QAC7C,MAAM,OAAO,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC;QAC7C,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,CAAC;QACvC,MAAM,MAAM,GAAG,QAAQ,CAAC,OAAO,CAAC;QAEhC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,CAAC,IAAI,CAAC,wBAAwB,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;YAChD,SAAS;QACX,CAAC;QAED,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,MAAM,CAAC,WAAW,IAAI,EAAE;YACxD,OAAO,EAAE,MAAM,CAAC,OAAO;SACxB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"loader.d.ts","sourceRoot":"","sources":["../../src/config/loader.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH,OAAO,EAAE,SAAS,EAA0B,MAAM,wBAAwB,CAAC;AAC3E,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC3D,OAAO,KAAK,EAAE,kBAAkB,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAKhE,MAAM,WAAW,YAAY;IAC3B,YAAY,EAAE,GAAG,CAAC;IAClB,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3B,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,UAAU,EAAE,kBAAkB,EAAE,CAAC;IACjC,SAAS,EAAE,SAAS,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;CACnB;AAGD,MAAM,MAAM,gBAAgB,GAAG,YAAY,CAAC;AAsB5C;;;;;GAKG;AACH,wBAAsB,oBAAoB,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAsGpF"}
|
|
@@ -6,6 +6,8 @@ import { parse as parseYaml } from 'yaml';
|
|
|
6
6
|
import { resolve, dirname, extname } from 'path';
|
|
7
7
|
import { createJiti } from 'jiti';
|
|
8
8
|
import { AIService } from '@modular-prompt/driver';
|
|
9
|
+
import { logger as baseLogger } from '../logger.js';
|
|
10
|
+
const logger = baseLogger.context('config-loader');
|
|
9
11
|
/**
|
|
10
12
|
* Resolve path relative to config file directory
|
|
11
13
|
*
|
|
@@ -67,9 +69,6 @@ export async function loadExperimentConfig(configPath) {
|
|
|
67
69
|
drivers: config.drivers,
|
|
68
70
|
evaluation: config.evaluation,
|
|
69
71
|
credentials: config.credentials,
|
|
70
|
-
selection: config.selection,
|
|
71
|
-
server: config.server,
|
|
72
|
-
logging: config.logging,
|
|
73
72
|
};
|
|
74
73
|
// Resolve paths in driver configurations relative to config file
|
|
75
74
|
if (serverConfig.drivers) {
|
|
@@ -85,7 +84,7 @@ export async function loadExperimentConfig(configPath) {
|
|
|
85
84
|
if (serverConfig.credentials?.googleApplicationCredentials) {
|
|
86
85
|
const resolvedPath = resolveConfigPath(configDir, serverConfig.credentials.googleApplicationCredentials);
|
|
87
86
|
process.env.GOOGLE_APPLICATION_CREDENTIALS = resolvedPath;
|
|
88
|
-
|
|
87
|
+
logger.verbose(`Setting GOOGLE_APPLICATION_CREDENTIALS=${resolvedPath}`);
|
|
89
88
|
}
|
|
90
89
|
// Validation
|
|
91
90
|
if (!serverConfig.models || Object.keys(serverConfig.models).length === 0) {
|
|
@@ -104,6 +103,8 @@ export async function loadExperimentConfig(configPath) {
|
|
|
104
103
|
}
|
|
105
104
|
}
|
|
106
105
|
// Initialize AIService
|
|
106
|
+
// Note: AIService is used only as a driver factory.
|
|
107
|
+
// Model selection is explicit in experiment configuration, not capability-based.
|
|
107
108
|
const aiServiceConfig = {
|
|
108
109
|
models: serverConfig.models,
|
|
109
110
|
drivers: serverConfig.drivers || {},
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"loader.js","sourceRoot":"","sources":["../../src/config/loader.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAC1C,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AACjD,OAAO,EAAE,UAAU,EAAE,MAAM,MAAM,CAAC;AAClC,OAAO,EAAE,SAAS,EAA0B,MAAM,wBAAwB,CAAC;AAG3E,OAAO,EAAE,MAAM,IAAI,UAAU,EAAE,MAAM,cAAc,CAAC;AAEpD,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC;AAcnD;;;;;;GAMG;AACH,SAAS,iBAAiB,CAAC,SAAiB,EAAE,IAAY;IACxD,sDAAsD;IACtD,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,CAAC,CAAC;IACpD,CAAC;IACD,oCAAoC;IACpC,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,kDAAkD;IAClD,OAAO,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;AAClC,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,UAAkB;IAC3D,0CAA0C;IAC1C,MAAM,SAAS,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;IACtC,MAAM,GAAG,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;IAEhC,iCAAiC;IACjC,IAAI,MAAW,CAAC;IAEhB,IAAI,GAAG,KAAK,OAAO,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;QACtC,cAAc;QACd,MAAM,OAAO,GAAG,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;QAClD,MAAM,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;IAC9B,CAAC;SAAM,IAAI,GAAG,KAAK,KAAK,IAAI,GAAG,KAAK,KAAK,IAAI,GAAG,KAAK,MAAM,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;QAC9E,0DAA0D;QAC1D,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,EAAE;YACvC,cAAc,EAAE,IAAI,EAAG,mCAAmC;YAC1D,KAAK,EAAE,IAAI,EAAY,wCAAwC;YAC/D,YAAY,EAAE,KAAK,EAAI,0BAA0B;SAClD,CAAC,CAAC;QAEH,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAEvC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,0BAA0B,UAAU,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,KAAK,CAAC,qCAAqC,GAAG,4CAA4C,CAAC,CAAC;IACxG,CAAC;IAED,qBAAqB;IACrB,MAAM,OAAO,GAAsB,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC;IACxD,MAAM,SAAS,GAAe,MAAM,CAAC,SAAS,IAAI,EAAE,CAAC;IACrD,MAAM,UAAU,GAAyB,MAAM,CAAC,UAAU,IAAI,EAAE,CAAC;IAEjE,oDAAoD;IACpD,MAAM,YAAY,GAAG;QACnB,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,OAAO,EAAE,MAAM,CAAC,OAAO;QACvB,UAAU,EAAE,MAAM,CAAC,UAAU;QAC7B,WAAW,EAAE,MAAM,CAAC,WAAW;KAChC,CAAC;IAEF,iEAAiE;IACjE,IAAI,YAAY,CAAC,OAAO,EAAE,CAAC;QACzB,KAAK,MAAM,UAAU,IAAI,YAAY,CAAC,OAAO,EAAE,CAAC;YAC9C,MAAM,YAAY,GAAG,YAAY,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;YAEtD,8CAA8C;YAC9C,IAAI,YAAY,CAAC,eAAe,EAAE,CAAC;gBACjC,YAAY,CAAC,eAAe,GAAG,iBAAiB,CAAC,SAAS,EAAE,YAAY,CAAC,eAAe,CAAC,CAAC;YAC5F,CAAC;QACH,CAAC;IACH,CAAC;IAED,kDAAkD;IAClD,IAAI,YAAY,CAAC,WAAW,EAAE,4BAA4B,EAAE,CAAC;QAC3D,MAAM,YAAY,GAAG,iBAAiB,CAAC,SAAS,EAAE,YAAY,CAAC,WAAW,CAAC,4BAA4B,CAAC,CAAC;QACzG,OAAO,CAAC,GAAG,CAAC,8BAA8B,GAAG,YAAY,CAAC;QAC1D,MAAM,CAAC,OAAO,CAAC,0CAA0C,YAAY,EAAE,CAAC,CAAC;IAC3E,CAAC;IAED,aAAa;IACb,IAAI,CAAC,YAAY,CAAC,MAAM,IAAI,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1E,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;IAC3D,CAAC;IAED,mCAAmC;IACnC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAS,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC;IAErE,qCAAqC;IACrC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;YACpB,KAAK,MAAM,SAAS,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;gBACxC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;oBAC/B,MAAM,IAAI,KAAK,CAAC,eAAe,QAAQ,CAAC,IAAI,+BAA+B,SAAS,GAAG,CAAC,CAAC;gBAC3F,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,uBAAuB;IACvB,oDAAoD;IACpD,iFAAiF;IACjF,MAAM,eAAe,GAAsB;QACzC,MAAM,EAAE,YAAY,CAAC,MAAM;QAC3B,OAAO,EAAE,YAAY,CAAC,OAAO,IAAI,EAAE;QACnC,cAAc,EAAE;YACd,WAAW,EAAE,GAAG;YAChB,SAAS,EAAE,IAAI;SAChB;KACF,CAAC;IAEF,MAAM,SAAS,GAAG,IAAI,SAAS,CAAC,eAAe,CAAC,CAAC;IAEjD,OAAO;QACL,YAAY;QACZ,OAAO;QACP,SAAS;QACT,UAAU;QACV,SAAS;QACT,SAAS;KACV,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Built-in evaluators
|
|
3
|
+
*/
|
|
4
|
+
import type { CodeEvaluator, PromptEvaluator } from '../types.js';
|
|
5
|
+
type BuiltinEvaluator = CodeEvaluator | PromptEvaluator;
|
|
6
|
+
export declare const builtinEvaluators: Record<string, BuiltinEvaluator>;
|
|
7
|
+
/**
|
|
8
|
+
* Get builtin evaluator by name
|
|
9
|
+
*/
|
|
10
|
+
export declare function getBuiltinEvaluator(name: string): BuiltinEvaluator | undefined;
|
|
11
|
+
export {};
|
|
12
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/evaluators/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,KAAK,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAElE,KAAK,gBAAgB,GAAG,aAAa,GAAG,eAAe,CAAC;AAExD,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,gBAAgB,CAG9D,CAAC;AAEF;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,gBAAgB,GAAG,SAAS,CAE9E"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Built-in evaluators
|
|
3
|
+
*/
|
|
4
|
+
import structuredOutputPresence from './structured-output-presence.js';
|
|
5
|
+
import llmRequirementFulfillment from './llm-requirement-fulfillment.js';
|
|
6
|
+
export const builtinEvaluators = {
|
|
7
|
+
'structured-output-presence': structuredOutputPresence,
|
|
8
|
+
'llm-requirement-fulfillment': llmRequirementFulfillment,
|
|
9
|
+
};
|
|
10
|
+
/**
|
|
11
|
+
* Get builtin evaluator by name
|
|
12
|
+
*/
|
|
13
|
+
export function getBuiltinEvaluator(name) {
|
|
14
|
+
return builtinEvaluators[name];
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/evaluators/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,wBAAwB,MAAM,iCAAiC,CAAC;AACvE,OAAO,yBAAyB,MAAM,kCAAkC,CAAC;AAKzE,MAAM,CAAC,MAAM,iBAAiB,GAAqC;IACjE,4BAA4B,EAAE,wBAAwB;IACtD,6BAA6B,EAAE,yBAAyB;CACzD,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,OAAO,iBAAiB,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* LLM Requirement Fulfillment Evaluator
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* Uses LLM to evaluate whether the output meets the functional requirements
|
|
5
5
|
*/
|
|
6
6
|
import type { PromptModule } from '@modular-prompt/core';
|
|
7
7
|
import type { EvaluationContext } from '../types.js';
|
|
@@ -11,4 +11,4 @@ declare const _default: {
|
|
|
11
11
|
module: PromptModule<EvaluationContext>;
|
|
12
12
|
};
|
|
13
13
|
export default _default;
|
|
14
|
-
//# sourceMappingURL=
|
|
14
|
+
//# sourceMappingURL=llm-requirement-fulfillment.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm-requirement-fulfillment.d.ts","sourceRoot":"","sources":["../../src/evaluators/llm-requirement-fulfillment.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,KAAK,EAAmB,iBAAiB,EAAE,MAAM,aAAa,CAAC;;;;;;AA0FtE,wBAI4B"}
|
package/dist/{src/evaluators/functional-correctness.js → evaluators/llm-requirement-fulfillment.js}
RENAMED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* LLM Requirement Fulfillment Evaluator
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* Uses LLM to evaluate whether the output meets the functional requirements
|
|
5
5
|
*/
|
|
6
|
-
const
|
|
6
|
+
const llmRequirementFulfillmentModule = {
|
|
7
7
|
createContext: () => ({
|
|
8
8
|
moduleName: '',
|
|
9
9
|
prompt: '',
|
|
@@ -88,8 +88,8 @@ const functionalCorrectnessModule = {
|
|
|
88
88
|
],
|
|
89
89
|
};
|
|
90
90
|
export default {
|
|
91
|
-
name: '
|
|
92
|
-
description: '
|
|
93
|
-
module:
|
|
91
|
+
name: 'LLM Requirement Fulfillment',
|
|
92
|
+
description: 'Overall requirement fulfillment score based on LLM evaluation. Compares prompt and output to evaluate requirement fulfillment, parameter correctness, completeness, and logical consistency.',
|
|
93
|
+
module: llmRequirementFulfillmentModule,
|
|
94
94
|
};
|
|
95
|
-
//# sourceMappingURL=
|
|
95
|
+
//# sourceMappingURL=llm-requirement-fulfillment.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm-requirement-fulfillment.js","sourceRoot":"","sources":["../../src/evaluators/llm-requirement-fulfillment.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAKH,MAAM,+BAA+B,GAAoC;IACvE,aAAa,EAAE,GAAsB,EAAE,CAAC,CAAC;QACvC,UAAU,EAAE,EAAE;QACd,MAAM,EAAE,EAAE;QACV,IAAI,EAAE,EAAE;KACT,CAAC;IAEF,SAAS,EAAE;QACT,uEAAuE;KACxE;IAED,YAAY,EAAE;QACZ,6CAA6C;QAC7C;YACE,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,qBAAqB;YAC5B,KAAK,EAAE;gBACL,qFAAqF;gBACrF,gFAAgF;gBAChF,uFAAuF;gBACvF,gFAAgF;aACjF;SACF;QACD;YACE,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,SAAS;YAChB,KAAK,EAAE;gBACL,2CAA2C;gBAC3C,iDAAiD;gBACjD,0CAA0C;aAC3C;SACF;KACF;IAED,MAAM,EAAE;QACN;YACE,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE;gBACP,IAAI,EAAE,QAAQ;gBACd,UAAU,EAAE;oBACV,KAAK,EAAE;wBACL,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,sBAAsB;qBACpC;oBACD,SAAS,EAAE;wBACT,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,uBAAuB;qBACrC;oBACD,OAAO,EAAE;wBACP,IAAI,EAAE,QAAQ;wBACd,UAAU,EAAE;4BACV,sBAAsB,EAAE;gCACtB,IAAI,EAAE,QAAQ;gCACd,UAAU,EAAE;oCACV,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;oCACzB,SAAS,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;iCAC9B;6BACF;4BACD,oBAAoB,EAAE;gCACpB,IAAI,EAAE,QAAQ;gCACd,UAAU,EAAE;oCACV,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;oCACzB,SAAS,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;iCAC9B;6BACF;4BACD,qBAAqB,EAAE;gCACrB,IAAI,EAAE,QAAQ;gCACd,UAAU,EAAE;oCACV,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;oCACzB,SAAS,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;iCAC9B;6BACF;4BACD,kBAAkB,EAAE;gCAClB,IAAI,EAAE,QAAQ;gCACd,UAAU,EAAE;oCACV,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;oCACzB,SAAS,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;iCAC9B;6BACF;yBACF;qBACF;iBACF;gBACD,QAAQ,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,SAAS,CAAC;aAC5C;SACF;KACF;CACF,CAAC;AAEF,eAAe;IACb,IAAI,EAAE,6BAA6B;IACnC,WAAW,EAAE,8LAA8L;IAC3M,MAAM,EAAE,+BAA+B;CACd,CAAC"}
|
package/dist/{src/evaluators/json-validator.d.ts → evaluators/structured-output-presence.d.ts}
RENAMED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Structured Output Presence Evaluator
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* Checks if structuredOutput exists and is a valid object
|
|
5
5
|
*/
|
|
6
6
|
import type { EvaluationContext, EvaluationResult } from '../types.js';
|
|
7
7
|
declare const _default: {
|
|
@@ -10,4 +10,4 @@ declare const _default: {
|
|
|
10
10
|
evaluate(context: EvaluationContext): Promise<EvaluationResult>;
|
|
11
11
|
};
|
|
12
12
|
export default _default;
|
|
13
|
-
//# sourceMappingURL=
|
|
13
|
+
//# sourceMappingURL=structured-output-presence.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"structured-output-presence.d.ts","sourceRoot":"","sources":["../../src/evaluators/structured-output-presence.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAiB,iBAAiB,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;;;;sBAM5D,iBAAiB,GAAG,OAAO,CAAC,gBAAgB,CAAC;;AAJvE,wBAiD0B"}
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Structured Output Presence Evaluator
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* Checks if structuredOutput exists and is a valid object
|
|
5
5
|
*/
|
|
6
6
|
export default {
|
|
7
|
-
name: '
|
|
8
|
-
description: '
|
|
7
|
+
name: 'Structured Output Presence',
|
|
8
|
+
description: 'Measures structured output presence rate (percentage of runs with valid structuredOutput). Checks if structuredOutput exists and is an object type for each run.',
|
|
9
9
|
async evaluate(context) {
|
|
10
10
|
const errors = [];
|
|
11
11
|
let validCount = 0;
|
|
@@ -33,7 +33,7 @@ export default {
|
|
|
33
33
|
? (validCount / context.runs.length) * 10
|
|
34
34
|
: 0;
|
|
35
35
|
return {
|
|
36
|
-
evaluator: '
|
|
36
|
+
evaluator: 'structured-output-presence',
|
|
37
37
|
moduleName: context.moduleName,
|
|
38
38
|
score,
|
|
39
39
|
reasoning: errors.length > 0
|
|
@@ -48,4 +48,4 @@ export default {
|
|
|
48
48
|
};
|
|
49
49
|
},
|
|
50
50
|
};
|
|
51
|
-
//# sourceMappingURL=
|
|
51
|
+
//# sourceMappingURL=structured-output-presence.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"structured-output-presence.js","sourceRoot":"","sources":["../../src/evaluators/structured-output-presence.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAIH,eAAe;IACb,IAAI,EAAE,4BAA4B;IAClC,WAAW,EAAE,kKAAkK;IAE/K,KAAK,CAAC,QAAQ,CAAC,OAA0B;QACvC,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,MAAM,UAAU,GAA2D,EAAE,CAAC;QAE9E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7C,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC5B,MAAM,EAAE,gBAAgB,EAAE,GAAG,GAAG,CAAC,WAAW,CAAC;YAE7C,IAAI,CAAC,gBAAgB,EAAE,CAAC;gBACtB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;gBAClD,UAAU,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,sBAAsB,EAAE,CAAC,CAAC;gBAC7E,SAAS;YACX,CAAC;YAED,wBAAwB;YACxB,IAAI,OAAO,gBAAgB,KAAK,QAAQ,IAAI,gBAAgB,KAAK,IAAI,EAAE,CAAC;gBACtE,UAAU,EAAE,CAAC;gBACb,UAAU,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;YAC/C,CAAC;iBAAM,CAAC;gBACN,MAAM,KAAK,GAAG,wBAAwB,CAAC;gBACvC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,EAAE,CAAC,CAAC;gBACtC,UAAU,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;YACvD,CAAC;QACH,CAAC;QAED,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC;YACnC,CAAC,CAAC,CAAC,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE;YACzC,CAAC,CAAC,CAAC,CAAC;QAEN,OAAO;YACL,SAAS,EAAE,4BAA4B;YACvC,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,KAAK;YACL,SAAS,EAAE,MAAM,CAAC,MAAM,GAAG,CAAC;gBAC1B,CAAC,CAAC,GAAG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,2BAA2B,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;gBACpF,CAAC,CAAC,OAAO,UAAU,oCAAoC;YACzD,OAAO,EAAE;gBACP,UAAU;gBACV,UAAU,EAAE,OAAO,CAAC,IAAI,CAAC,MAAM;gBAC/B,MAAM;gBACN,IAAI,EAAE,UAAU;aACjB;SACF,CAAC;IACJ,CAAC;CACsB,CAAC"}
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
export * from './types.js';
|
|
7
7
|
export { loadExperimentConfig } from './config/loader.js';
|
|
8
8
|
export { loadModules, loadEvaluators } from './config/dynamic-loader.js';
|
|
9
|
-
export { baseEvaluationModule } from './
|
|
9
|
+
export { baseEvaluationModule } from './base-evaluation-module.js';
|
|
10
10
|
export { DriverManager } from './runner/driver-manager.js';
|
|
11
11
|
export { ExperimentRunner } from './runner/experiment.js';
|
|
12
12
|
export { EvaluatorRunner } from './runner/evaluator.js';
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,cAAc,YAAY,CAAC;AAG3B,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAGzE,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AAGnE,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAGxD,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC"}
|
|
@@ -9,7 +9,7 @@ export * from './types.js';
|
|
|
9
9
|
export { loadExperimentConfig } from './config/loader.js';
|
|
10
10
|
export { loadModules, loadEvaluators } from './config/dynamic-loader.js';
|
|
11
11
|
// Evaluators
|
|
12
|
-
export { baseEvaluationModule } from './
|
|
12
|
+
export { baseEvaluationModule } from './base-evaluation-module.js';
|
|
13
13
|
// Runners
|
|
14
14
|
export { DriverManager } from './runner/driver-manager.js';
|
|
15
15
|
export { ExperimentRunner } from './runner/experiment.js';
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,QAAQ;AACR,cAAc,YAAY,CAAC;AAE3B,wBAAwB;AACxB,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAEzE,aAAa;AACb,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AAEnE,UAAU;AACV,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAExD,YAAY;AACZ,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC"}
|
package/dist/logger.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAE/C;;GAEG;AACH,eAAO,MAAM,MAAM,QAAwD,CAAC"}
|
package/dist/logger.js
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Experiment package logger
|
|
3
|
+
*/
|
|
4
|
+
import { Logger } from '@modular-prompt/utils';
|
|
5
|
+
/**
|
|
6
|
+
* Experiment package logger with 'experiment' prefix
|
|
7
|
+
*/
|
|
8
|
+
export const logger = new Logger({ prefix: 'experiment', context: 'main' });
|
|
9
|
+
//# sourceMappingURL=logger.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.js","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAE/C;;GAEG;AACH,MAAM,CAAC,MAAM,MAAM,GAAG,IAAI,MAAM,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"statistics.d.ts","sourceRoot":"","sources":["../../src/reporter/statistics.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAa,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzD,qBAAa,kBAAkB;IACjB,OAAO,CAAC,OAAO;gBAAP,OAAO,EAAE,UAAU,EAAE;IAEzC;;OAEG;IACH,MAAM,IAAI,IAAI;IA2Bd;;OAEG;IACH,OAAO,CAAC,YAAY;IASpB;;OAEG;IACH,OAAO,CAAC,iBAAiB;CAwB1B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"statistics.js","sourceRoot":"","sources":["../../src/reporter/statistics.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,MAAM,OAAO,kBAAkB;IACT;IAApB,YAAoB,OAAqB;QAArB,YAAO,GAAP,OAAO,CAAc;IAAG,CAAC;IAE7C;;OAEG;IACH,MAAM;QACJ,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACrC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,EAAE,CAAC;QAEd,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAClC,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,QAAQ,MAAM,MAAM,CAAC,KAAK,OAAO,MAAM,CAAC,MAAM,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;YACvF,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;YAE5B,MAAM,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACvD,MAAM,WAAW,GAAG,CAAC,WAAW,CAAC,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC;YAEpE,OAAO,CAAC,GAAG,CAAC,iBAAiB,WAAW,CAAC,MAAM,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAEtG,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC;gBAC/B,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,CAAC;YACtC,CAAC;YAED,OAAO,CAAC,GAAG,EAAE,CAAC;QAChB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAC9B,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,IAAiB;QACpC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QACvC,MAAM,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;QAC5D,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC;QAC/B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC;QAE/B,OAAO,CAAC,GAAG,CAAC,uBAAuB,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,GAAG,WAAW,GAAG,IAAI,CAAC,CAAC;IACrF,CAAC;IAED;;OAEG;IACK,iBAAiB,CAAC,IAAiB;QACzC,2BAA2B;QAC3B,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;YAC/B,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;YAC7D,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QACxC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;QAE3B,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC7B,OAAO;QACT,CAAC;QAED,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAC;QAC3C,OAAO,CAAC,GAAG,CAAC,uBAAuB,aAAa,CAAC,IAAI,0BAA0B,WAAW,CAAC,MAAM,SAAS,CAAC,CAAC;QAE5G,IAAI,aAAa,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC7B,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC;QAC7C,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;YACjC,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE;gBAChD,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;gBAC3D,OAAO,CAAC,GAAG,CAAC,cAAc,GAAG,GAAG,CAAC,KAAK,KAAK,OAAO,MAAM,EAAE,CAAC,CAAC;YAC9D,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;CACF"}
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
* --repeat <count> Number of repetitions (default: 1)
|
|
18
18
|
* --evaluate Enable evaluation phase
|
|
19
19
|
* --evaluators <names> Comma-separated evaluator names (default: all)
|
|
20
|
+
* --dry-run Display execution plan without running the experiment
|
|
20
21
|
*/
|
|
21
22
|
export {};
|
|
22
23
|
//# sourceMappingURL=run-comparison.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run-comparison.d.ts","sourceRoot":"","sources":["../src/run-comparison.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;;;;;;;;GAmBG"}
|