@modular-prompt/experiment 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +347 -0
- package/dist/src/cli/args.d.ts +6 -0
- package/dist/src/cli/args.d.ts.map +1 -0
- package/dist/src/cli/args.js +31 -0
- package/dist/src/cli/args.js.map +1 -0
- package/dist/src/config/dynamic-loader.d.ts +41 -0
- package/dist/src/config/dynamic-loader.d.ts.map +1 -0
- package/dist/src/config/dynamic-loader.js +101 -0
- package/dist/src/config/dynamic-loader.js.map +1 -0
- package/dist/src/config/loader.d.ts +23 -0
- package/dist/src/config/loader.d.ts.map +1 -0
- package/dist/src/config/loader.js +125 -0
- package/dist/src/config/loader.js.map +1 -0
- package/dist/src/evaluators/base-module.d.ts +10 -0
- package/dist/src/evaluators/base-module.d.ts.map +1 -0
- package/dist/src/evaluators/base-module.js +103 -0
- package/dist/src/evaluators/base-module.js.map +1 -0
- package/dist/src/evaluators/functional-correctness.d.ts +14 -0
- package/dist/src/evaluators/functional-correctness.d.ts.map +1 -0
- package/dist/src/evaluators/functional-correctness.js +95 -0
- package/dist/src/evaluators/functional-correctness.js.map +1 -0
- package/dist/src/evaluators/json-validator.d.ts +13 -0
- package/dist/src/evaluators/json-validator.d.ts.map +1 -0
- package/dist/src/evaluators/json-validator.js +51 -0
- package/dist/src/evaluators/json-validator.js.map +1 -0
- package/dist/src/index.d.ts +14 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +19 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/reporter/statistics.d.ts +21 -0
- package/dist/src/reporter/statistics.d.ts.map +1 -0
- package/dist/src/reporter/statistics.js +68 -0
- package/dist/src/reporter/statistics.js.map +1 -0
- package/dist/src/run-comparison.d.ts +22 -0
- package/dist/src/run-comparison.d.ts.map +1 -0
- package/dist/src/run-comparison.js +142 -0
- package/dist/src/run-comparison.js.map +1 -0
- package/dist/src/runner/driver-manager.d.ts +30 -0
- package/dist/src/runner/driver-manager.d.ts.map +1 -0
- package/dist/src/runner/driver-manager.js +68 -0
- package/dist/src/runner/driver-manager.js.map +1 -0
- package/dist/src/runner/evaluator.d.ts +32 -0
- package/dist/src/runner/evaluator.d.ts.map +1 -0
- package/dist/src/runner/evaluator.js +146 -0
- package/dist/src/runner/evaluator.js.map +1 -0
- package/dist/src/runner/experiment.d.ts +40 -0
- package/dist/src/runner/experiment.d.ts.map +1 -0
- package/dist/src/runner/experiment.js +214 -0
- package/dist/src/runner/experiment.js.map +1 -0
- package/dist/src/types.d.ts +112 -0
- package/dist/src/types.d.ts.map +1 -0
- package/dist/src/types.js +5 -0
- package/dist/src/types.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/examples/experiment.yaml +70 -0
- package/package.json +70 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Module Comparison Experiment
|
|
4
|
+
*
|
|
5
|
+
* Compares the performance and output quality of multiple prompt modules.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* moduler-experiment <config> [options]
|
|
9
|
+
*
|
|
10
|
+
* Arguments:
|
|
11
|
+
* <config> Config file path (YAML, TypeScript, or JavaScript)
|
|
12
|
+
*
|
|
13
|
+
* Options:
|
|
14
|
+
* --test-case <name> Test case name filter
|
|
15
|
+
* --model <provider> Model provider filter (mlx, vertexai, googlegenai)
|
|
16
|
+
* --modules <names> Comma-separated module names (default: all)
|
|
17
|
+
* --repeat <count> Number of repetitions (default: 1)
|
|
18
|
+
* --evaluate Enable evaluation phase
|
|
19
|
+
* --evaluators <names> Comma-separated evaluator names (default: all)
|
|
20
|
+
*/
|
|
21
|
+
import { parseArgs } from './cli/args.js';
|
|
22
|
+
import { loadExperimentConfig } from './config/loader.js';
|
|
23
|
+
import { loadModules, loadEvaluators } from './config/dynamic-loader.js';
|
|
24
|
+
import { DriverManager } from './runner/driver-manager.js';
|
|
25
|
+
import { ExperimentRunner } from './runner/experiment.js';
|
|
26
|
+
import { StatisticsReporter } from './reporter/statistics.js';
|
|
27
|
+
// Parse CLI arguments
|
|
28
|
+
const options = parseArgs();
|
|
29
|
+
// Display header
|
|
30
|
+
console.log('='.repeat(80));
|
|
31
|
+
console.log('Module Comparison Experiment');
|
|
32
|
+
console.log('='.repeat(80));
|
|
33
|
+
console.log(`Config: ${options.configPath}`);
|
|
34
|
+
console.log(`Test case filter: ${options.testCaseFilter || 'all'}`);
|
|
35
|
+
console.log(`Model filter: ${options.modelFilter || 'all enabled models'}`);
|
|
36
|
+
console.log(`Modules: ${options.moduleFilter?.join(', ') || 'all'}`);
|
|
37
|
+
console.log(`Repeat: ${options.repeatCount} time(s)`);
|
|
38
|
+
console.log(`Evaluation: ${options.enableEvaluation ? 'enabled' : 'disabled'}`);
|
|
39
|
+
if (options.enableEvaluation) {
|
|
40
|
+
console.log(`Evaluators: ${options.evaluatorFilter?.join(', ') || 'all'}`);
|
|
41
|
+
}
|
|
42
|
+
console.log('='.repeat(80));
|
|
43
|
+
console.log();
|
|
44
|
+
// Load configuration
|
|
45
|
+
const { serverConfig, modules: configModules, testCases: configTestCases, evaluators: configEvaluators, aiService, configDir } = await loadExperimentConfig(options.configPath);
|
|
46
|
+
// Keep models as object for experiment runner
|
|
47
|
+
const models = serverConfig.models;
|
|
48
|
+
// Display available models for logging
|
|
49
|
+
const modelEntries = Object.entries(models).filter(([_, spec]) => spec.enabled !== false && (!spec.role || spec.role === 'test'));
|
|
50
|
+
if (options.modelFilter) {
|
|
51
|
+
const filteredEntries = modelEntries.filter(([_, spec]) => spec.provider === options.modelFilter);
|
|
52
|
+
if (filteredEntries.length === 0) {
|
|
53
|
+
console.error(`❌ No enabled test models found for provider: ${options.modelFilter}`);
|
|
54
|
+
process.exit(1);
|
|
55
|
+
}
|
|
56
|
+
console.log(`📋 Testing with ${filteredEntries.length} model(s) (filtered by ${options.modelFilter}):`);
|
|
57
|
+
filteredEntries.forEach(([name, spec]) => console.log(` - ${name}: ${spec.model} (${spec.provider})`));
|
|
58
|
+
}
|
|
59
|
+
else {
|
|
60
|
+
console.log(`📋 Testing with ${modelEntries.length} model(s):`);
|
|
61
|
+
modelEntries.forEach(([name, spec]) => console.log(` - ${name}: ${spec.model} (${spec.provider})`));
|
|
62
|
+
}
|
|
63
|
+
console.log();
|
|
64
|
+
// Load test cases
|
|
65
|
+
const allTestCases = configTestCases;
|
|
66
|
+
const testCases = options.testCaseFilter
|
|
67
|
+
? allTestCases.filter((tc) => tc.name === options.testCaseFilter)
|
|
68
|
+
: allTestCases;
|
|
69
|
+
if (testCases.length === 0) {
|
|
70
|
+
console.error(`❌ No test cases found${options.testCaseFilter ? ` matching: ${options.testCaseFilter}` : ''}`);
|
|
71
|
+
console.error(' Please add test cases to config file');
|
|
72
|
+
process.exit(1);
|
|
73
|
+
}
|
|
74
|
+
console.log(`🧪 Running ${testCases.length} test case(s)`);
|
|
75
|
+
console.log();
|
|
76
|
+
// Load modules (from module references)
|
|
77
|
+
const allModules = await loadModules(configModules, configDir);
|
|
78
|
+
const modules = options.moduleFilter
|
|
79
|
+
? allModules.filter(m => options.moduleFilter.includes(m.name))
|
|
80
|
+
: allModules;
|
|
81
|
+
if (modules.length === 0) {
|
|
82
|
+
console.error('❌ No modules to test');
|
|
83
|
+
console.error(' Please add modules to config file');
|
|
84
|
+
process.exit(1);
|
|
85
|
+
}
|
|
86
|
+
console.log(`📦 Testing ${modules.length} module(s):`);
|
|
87
|
+
modules.forEach(m => console.log(` - ${m.name}: ${m.description}`));
|
|
88
|
+
console.log();
|
|
89
|
+
// Get evaluators and evaluator model if evaluation is enabled
|
|
90
|
+
let evaluators;
|
|
91
|
+
let evaluatorModel;
|
|
92
|
+
if (options.enableEvaluation) {
|
|
93
|
+
// Load evaluators (from evaluator references)
|
|
94
|
+
const allEvaluators = await loadEvaluators(configEvaluators, configDir);
|
|
95
|
+
evaluators = options.evaluatorFilter
|
|
96
|
+
? allEvaluators.filter(e => options.evaluatorFilter.includes(e.name))
|
|
97
|
+
: allEvaluators;
|
|
98
|
+
if (evaluators.length === 0) {
|
|
99
|
+
console.error('❌ No evaluators found');
|
|
100
|
+
process.exit(1);
|
|
101
|
+
}
|
|
102
|
+
// Find evaluator model from evaluation config
|
|
103
|
+
if (!serverConfig.evaluation || !serverConfig.evaluation.enabled) {
|
|
104
|
+
console.error('❌ Evaluation is not configured in config file');
|
|
105
|
+
console.error(' Please add evaluation section to your config.yaml:');
|
|
106
|
+
console.error(' evaluation:');
|
|
107
|
+
console.error(' enabled: true');
|
|
108
|
+
console.error(' model: "model-name"');
|
|
109
|
+
console.error(' provider: "provider-name"');
|
|
110
|
+
process.exit(1);
|
|
111
|
+
}
|
|
112
|
+
const evaluationConfig = serverConfig.evaluation;
|
|
113
|
+
// Find the specified model by name
|
|
114
|
+
const modelName = evaluationConfig.model;
|
|
115
|
+
const modelSpec = serverConfig.models[modelName];
|
|
116
|
+
if (!modelSpec || modelSpec.enabled === false) {
|
|
117
|
+
console.error(`❌ Evaluator model not found or disabled: ${modelName}`);
|
|
118
|
+
console.error(' Please ensure the model is defined in the models section and enabled');
|
|
119
|
+
process.exit(1);
|
|
120
|
+
}
|
|
121
|
+
evaluatorModel = { name: modelName, spec: modelSpec };
|
|
122
|
+
console.log(`🔍 Evaluation enabled with ${evaluators.length} evaluator(s):`);
|
|
123
|
+
evaluators.forEach(e => console.log(` - [${e.type}] ${e.name}: ${e.description}`));
|
|
124
|
+
console.log(`🔍 Evaluator model: ${modelName} (${modelSpec.provider}:${modelSpec.model})`);
|
|
125
|
+
console.log();
|
|
126
|
+
}
|
|
127
|
+
// Run experiment
|
|
128
|
+
const driverManager = new DriverManager();
|
|
129
|
+
const runner = new ExperimentRunner(aiService, driverManager, modules, testCases, models, options.repeatCount, evaluators, evaluatorModel);
|
|
130
|
+
const results = await runner.run();
|
|
131
|
+
// Display completion
|
|
132
|
+
console.log('='.repeat(80));
|
|
133
|
+
console.log('✨ Experiment completed');
|
|
134
|
+
console.log('='.repeat(80));
|
|
135
|
+
// Cleanup drivers
|
|
136
|
+
await driverManager.cleanup();
|
|
137
|
+
// Display statistics if repeated
|
|
138
|
+
if (options.repeatCount > 1) {
|
|
139
|
+
const reporter = new StatisticsReporter(results);
|
|
140
|
+
reporter.report();
|
|
141
|
+
}
|
|
142
|
+
//# sourceMappingURL=run-comparison.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run-comparison.js","sourceRoot":"","sources":["../../src/run-comparison.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAC1C,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AACzE,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAE9D,sBAAsB;AACtB,MAAM,OAAO,GAAG,SAAS,EAAE,CAAC;AAE5B,iBAAiB;AACjB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAC,CAAC;AAC5C,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,OAAO,CAAC,GAAG,CAAC,WAAW,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC;AAC7C,OAAO,CAAC,GAAG,CAAC,qBAAqB,OAAO,CAAC,cAAc,IAAI,KAAK,EAAE,CAAC,CAAC;AACpE,OAAO,CAAC,GAAG,CAAC,iBAAiB,OAAO,CAAC,WAAW,IAAI,oBAAoB,EAAE,CAAC,CAAC;AAC5E,OAAO,CAAC,GAAG,CAAC,YAAY,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC;AACrE,OAAO,CAAC,GAAG,CAAC,WAAW,OAAO,CAAC,WAAW,UAAU,CAAC,CAAC;AACtD,OAAO,CAAC,GAAG,CAAC,eAAe,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC;AAChF,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAC7B,OAAO,CAAC,GAAG,CAAC,eAAe,OAAO,CAAC,eAAe,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC;AAC7E,CAAC;AACD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,OAAO,CAAC,GAAG,EAAE,CAAC;AAEd,qBAAqB;AACrB,MAAM,EACJ,YAAY,EACZ,OAAO,EAAE,aAAa,EACtB,SAAS,EAAE,eAAe,EAC1B,UAAU,EAAE,gBAAgB,EAC5B,SAAS,EACT,SAAS,EACV,GAAG,MAAM,oBAAoB,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAEnD,8CAA8C;AAC9C,MAAM,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC;AAEnC,uCAAuC;AACvC,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAgB,EAAE,EAAE,CAC9E,IAAI,CAAC,OAAO,KAAK,KAAK,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,CAAC,CAC/D,CAAC;AAEF,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;IACxB,MAAM,eAAe,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAgB,EAAE,EAAE,CACvE,IAAI,CAAC,QAAQ,KAAK,OAAO,CAAC,WAAW,CACtC,CAAC;IACF,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjC,OAAO,CAAC,KAAK,CAAC,gDAAgD,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;QACrF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,mBAAmB,eAAe,CAAC,MAAM,0BAA0B,OAAO,CAAC,WAAW,IAAI,CAAC,CAAC;IACxG,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAgB,EAAE,EAAE,CACtD,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,KAAK,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,QAAQ,GAAG,CAAC,CAC7D,CAAC;AACJ,CAAC;KAAM,CAAC;IACN,OAAO,CAAC,GAAG,CAAC,mBAAmB,YAAY,CAAC,MAAM,YAAY,CAAC,CAAC;IAChE,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAgB,EAAE,EAAE,CACnD,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,KAAK,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,QAAQ,GAAG,CAAC,CAC7D,CAAC;AACJ,CAAC;AACD,OAAO,CAAC,GAAG,EAAE,CAAC;AAEd,kBAAkB;AAClB,MAAM,YAAY,GAAG,eAAe,CAAC;AACrC,MAAM,SAAS,GAAG,OAAO,CAAC,cAAc;IACtC,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,EAAO,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,KAAK,OAAO,CAAC,cAAc,CAAC;IACtE,CAAC,CAAC,YAAY,CAAC;AAEjB,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;IAC3B,OAAO,CAAC,KAAK,CAAC,wBAAwB,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,cAAc,OAAO,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC9G,OAAO,CAAC,KAAK,CAAC,yCAAyC,CAAC,CAAC;IACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,CAAC,MAAM,eAAe,CAAC,CAAC;AAC3D,OAAO,CAAC,GAAG,EAAE,CAAC;AAEd,wCAAwC;AACxC,MAAM,UAAU,GAAG,MAAM,WAAW,CAAC,aAAa,EAAE,SAAS,CAAC,CAAC;AAC/D,MAAM,OAAO,GAAG,OAAO,CAAC,YAAY;IAClC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,YAAa,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAChE,CAAC,CAAC,UAAU,CAAC;AAEf,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;IACzB,OAAO,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;IACtC,OAAO,CAAC,KAAK,CAAC,sCAAsC,CAAC,CAAC;IACtD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,OAAO,CAAC,GAAG,CAAC,cAAc,OAAO,CAAC,MAAM,aAAa,CAAC,CAAC;AACvD,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;AACrE,OAAO,CAAC,GAAG,EAAE,CAAC;AAEd,8DAA8D;AAC9D,IAAI,UAAU,CAAC;AACf,IAAI,cAAc,CAAC;AACnB,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAC7B,8CAA8C;IAC9C,MAAM,aAAa,GAAG,MAAM,cAAc,CAAC,gBAAgB,EAAE,SAAS,CAAC,CAAC;IACxE,UAAU,GAAG,OAAO,CAAC,eAAe;QAClC,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,eAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACtE,CAAC,CAAC,aAAa,CAAC;IAElB,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC5B,OAAO,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,8CAA8C;IAC9C,IAAI,CAAC,YAAY,CAAC,UAAU,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;QACjE,OAAO,CAAC,KAAK,CAAC,+CAA+C,CAAC,CAAC;QAC/D,OAAO,CAAC,KAAK,CAAC,uDAAuD,CAAC,CAAC;QACvE,OAAO,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QAChC,OAAO,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC;QACpC,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QAC1C,OAAO,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,gBAAgB,GAAG,YAAY,CAAC,UAAU,CAAC;IAEjD,mCAAmC;IACnC,MAAM,SAAS,GAAG,gBAAgB,CAAC,KAAK,CAAC;IACzC,MAAM,SAAS,GAAG,YAAY,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAEjD,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,OAAO,KAAK,KAAK,EAAE,CAAC;QAC9C,OAAO,CAAC,KAAK,CAAC,4CAA4C,SAAS,EAAE,CAAC,CAAC;QACvE,OAAO,CAAC,KAAK,CAAC,yEAAyE,CAAC,CAAC;QACzF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,cAAc,GAAG,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;IAEtD,OAAO,CAAC,GAAG,CAAC,8BAA8B,UAAU,CAAC,MAAM,gBAAgB,CAAC,CAAC;IAC7E,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;IACpF,OAAO,CAAC,GAAG,CAAC,uBAAuB,SAAS,KAAK,SAAS,CAAC,QAAQ,IAAI,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC;IAC3F,OAAO,CAAC,GAAG,EAAE,CAAC;AAChB,CAAC;AAED,iBAAiB;AACjB,MAAM,aAAa,GAAG,IAAI,aAAa,EAAE,CAAC;AAC1C,MAAM,MAAM,GAAG,IAAI,gBAAgB,CACjC,SAAS,EACT,aAAa,EACb,OAAO,EACP,SAAS,EACT,MAAM,EACN,OAAO,CAAC,WAAW,EACnB,UAAU,EACV,cAAc,CACf,CAAC;AAEF,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,GAAG,EAAE,CAAC;AAEnC,qBAAqB;AACrB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;AACtC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAE5B,kBAAkB;AAClB,MAAM,aAAa,CAAC,OAAO,EAAE,CAAC;AAE9B,iCAAiC;AACjC,IAAI,OAAO,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC;IAC5B,MAAM,QAAQ,GAAG,IAAI,kBAAkB,CAAC,OAAO,CAAC,CAAC;IACjD,QAAQ,CAAC,MAAM,EAAE,CAAC;AACpB,CAAC"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Driver manager for caching and cleanup
|
|
3
|
+
*/
|
|
4
|
+
import type { AIService, ModelSpec } from '@modular-prompt/driver';
|
|
5
|
+
export declare class DriverManager {
|
|
6
|
+
private cache;
|
|
7
|
+
/**
|
|
8
|
+
* Get or create driver for a model
|
|
9
|
+
*
|
|
10
|
+
* Drivers are cached by model name.
|
|
11
|
+
* Reuses existing driver if available.
|
|
12
|
+
*
|
|
13
|
+
* @param aiService - AIService instance
|
|
14
|
+
* @param modelName - Model name for caching
|
|
15
|
+
* @param modelSpec - Model spec
|
|
16
|
+
* @returns Driver instance
|
|
17
|
+
*/
|
|
18
|
+
getOrCreate(aiService: AIService, modelName: string, modelSpec: ModelSpec): Promise<any>;
|
|
19
|
+
/**
|
|
20
|
+
* Close and remove a specific driver from cache
|
|
21
|
+
*
|
|
22
|
+
* @param modelName - Model name to close
|
|
23
|
+
*/
|
|
24
|
+
close(modelName: string): Promise<void>;
|
|
25
|
+
/**
|
|
26
|
+
* Close all cached drivers
|
|
27
|
+
*/
|
|
28
|
+
cleanup(): Promise<void>;
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=driver-manager.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"driver-manager.d.ts","sourceRoot":"","sources":["../../../src/runner/driver-manager.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AAEnE,qBAAa,aAAa;IACxB,OAAO,CAAC,KAAK,CAA0B;IAEvC;;;;;;;;;;OAUG;IACG,WAAW,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC;IAY9F;;;;OAIG;IACG,KAAK,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAkB7C;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAiB/B"}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Driver manager for caching and cleanup
|
|
3
|
+
*/
|
|
4
|
+
export class DriverManager {
|
|
5
|
+
cache = new Map();
|
|
6
|
+
/**
|
|
7
|
+
* Get or create driver for a model
|
|
8
|
+
*
|
|
9
|
+
* Drivers are cached by model name.
|
|
10
|
+
* Reuses existing driver if available.
|
|
11
|
+
*
|
|
12
|
+
* @param aiService - AIService instance
|
|
13
|
+
* @param modelName - Model name for caching
|
|
14
|
+
* @param modelSpec - Model spec
|
|
15
|
+
* @returns Driver instance
|
|
16
|
+
*/
|
|
17
|
+
async getOrCreate(aiService, modelName, modelSpec) {
|
|
18
|
+
if (this.cache.has(modelName)) {
|
|
19
|
+
console.log(` Using cached driver for ${modelName}`);
|
|
20
|
+
return this.cache.get(modelName);
|
|
21
|
+
}
|
|
22
|
+
console.log(` Creating new driver for ${modelName} (${modelSpec.provider}:${modelSpec.model})`);
|
|
23
|
+
const driver = await aiService.createDriver(modelSpec);
|
|
24
|
+
this.cache.set(modelName, driver);
|
|
25
|
+
return driver;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Close and remove a specific driver from cache
|
|
29
|
+
*
|
|
30
|
+
* @param modelName - Model name to close
|
|
31
|
+
*/
|
|
32
|
+
async close(modelName) {
|
|
33
|
+
const driver = this.cache.get(modelName);
|
|
34
|
+
if (!driver) {
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
try {
|
|
38
|
+
if (typeof driver.close === 'function') {
|
|
39
|
+
await driver.close();
|
|
40
|
+
console.log(` ✅ Closed driver: ${modelName}`);
|
|
41
|
+
}
|
|
42
|
+
this.cache.delete(modelName);
|
|
43
|
+
}
|
|
44
|
+
catch (error) {
|
|
45
|
+
console.log(` ⚠️ Failed to close driver ${modelName}: ${error instanceof Error ? error.message : String(error)}`);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Close all cached drivers
|
|
50
|
+
*/
|
|
51
|
+
async cleanup() {
|
|
52
|
+
console.log();
|
|
53
|
+
console.log('🧹 Cleaning up...');
|
|
54
|
+
for (const [key, driver] of this.cache.entries()) {
|
|
55
|
+
try {
|
|
56
|
+
if (driver && typeof driver.close === 'function') {
|
|
57
|
+
await driver.close();
|
|
58
|
+
console.log(` Closed driver: ${key}`);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
catch (error) {
|
|
62
|
+
console.log(` Warning: Failed to close driver ${key}: ${error instanceof Error ? error.message : String(error)}`);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
console.log('✅ Cleanup completed');
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
//# sourceMappingURL=driver-manager.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"driver-manager.js","sourceRoot":"","sources":["../../../src/runner/driver-manager.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,MAAM,OAAO,aAAa;IAChB,KAAK,GAAG,IAAI,GAAG,EAAe,CAAC;IAEvC;;;;;;;;;;OAUG;IACH,KAAK,CAAC,WAAW,CAAC,SAAoB,EAAE,SAAiB,EAAE,SAAoB;QAC7E,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;YAC9B,OAAO,CAAC,GAAG,CAAC,8BAA8B,SAAS,EAAE,CAAC,CAAC;YACvD,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACnC,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,8BAA8B,SAAS,KAAK,SAAS,CAAC,QAAQ,IAAI,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC;QAClG,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;QACvD,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;QAClC,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,KAAK,CAAC,SAAiB;QAC3B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAEzC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,OAAO;QACT,CAAC;QAED,IAAI,CAAC;YACH,IAAI,OAAO,MAAM,CAAC,KAAK,KAAK,UAAU,EAAE,CAAC;gBACvC,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;gBACrB,OAAO,CAAC,GAAG,CAAC,uBAAuB,SAAS,EAAE,CAAC,CAAC;YAClD,CAAC;YACD,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAC/B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,GAAG,CAAC,iCAAiC,SAAS,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACvH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;QAEjC,KAAK,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC;YACjD,IAAI,CAAC;gBACH,IAAI,MAAM,IAAI,OAAO,MAAM,CAAC,KAAK,KAAK,UAAU,EAAE,CAAC;oBACjD,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;oBACrB,OAAO,CAAC,GAAG,CAAC,qBAAqB,GAAG,EAAE,CAAC,CAAC;gBAC1C,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,GAAG,CAAC,sCAAsC,GAAG,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YACtH,CAAC;QACH,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;IACrC,CAAC;CACF"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evaluator runner
|
|
3
|
+
*
|
|
4
|
+
* Runs evaluation (code or prompt-based) for a single module
|
|
5
|
+
*/
|
|
6
|
+
import type { AIService, ModelSpec } from '@modular-prompt/driver';
|
|
7
|
+
import type { EvaluationContext, EvaluationResult } from '../types.js';
|
|
8
|
+
import type { LoadedEvaluator } from '../config/dynamic-loader.js';
|
|
9
|
+
export declare class EvaluatorRunner {
|
|
10
|
+
private aiService;
|
|
11
|
+
private evaluatorModel;
|
|
12
|
+
constructor(aiService: AIService, evaluatorModel: ModelSpec);
|
|
13
|
+
/**
|
|
14
|
+
* Run evaluation for a single module
|
|
15
|
+
*
|
|
16
|
+
* @param evaluator - Loaded evaluator
|
|
17
|
+
* @param context - Evaluation context
|
|
18
|
+
* @returns Evaluation result
|
|
19
|
+
*/
|
|
20
|
+
evaluate(evaluator: LoadedEvaluator, context: EvaluationContext): Promise<EvaluationResult>;
|
|
21
|
+
/**
|
|
22
|
+
* Evaluate using prompt-based evaluator
|
|
23
|
+
*/
|
|
24
|
+
private evaluateWithPrompt;
|
|
25
|
+
/**
|
|
26
|
+
* Display evaluation results
|
|
27
|
+
*
|
|
28
|
+
* @param results - Evaluation results to display
|
|
29
|
+
*/
|
|
30
|
+
displayResults(results: EvaluationResult[]): void;
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=evaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../../../src/runner/evaluator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AACnE,OAAO,KAAK,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAEnE,qBAAa,eAAe;IAExB,OAAO,CAAC,SAAS;IACjB,OAAO,CAAC,cAAc;gBADd,SAAS,EAAE,SAAS,EACpB,cAAc,EAAE,SAAS;IAGnC;;;;;;OAMG;IACG,QAAQ,CACZ,SAAS,EAAE,eAAe,EAC1B,OAAO,EAAE,iBAAiB,GACzB,OAAO,CAAC,gBAAgB,CAAC;IAuB5B;;OAEG;YACW,kBAAkB;IAiEhC;;;;OAIG;IACH,cAAc,CAAC,OAAO,EAAE,gBAAgB,EAAE,GAAG,IAAI;CA2ClD"}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evaluator runner
|
|
3
|
+
*
|
|
4
|
+
* Runs evaluation (code or prompt-based) for a single module
|
|
5
|
+
*/
|
|
6
|
+
import { compile } from '@modular-prompt/core';
|
|
7
|
+
export class EvaluatorRunner {
|
|
8
|
+
aiService;
|
|
9
|
+
evaluatorModel;
|
|
10
|
+
constructor(aiService, evaluatorModel) {
|
|
11
|
+
this.aiService = aiService;
|
|
12
|
+
this.evaluatorModel = evaluatorModel;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Run evaluation for a single module
|
|
16
|
+
*
|
|
17
|
+
* @param evaluator - Loaded evaluator
|
|
18
|
+
* @param context - Evaluation context
|
|
19
|
+
* @returns Evaluation result
|
|
20
|
+
*/
|
|
21
|
+
async evaluate(evaluator, context) {
|
|
22
|
+
console.log(`🔍 [${evaluator.name}] Evaluating ${context.moduleName}...`);
|
|
23
|
+
try {
|
|
24
|
+
if (evaluator.type === 'code') {
|
|
25
|
+
// Code evaluator - direct execution
|
|
26
|
+
return await evaluator.codeEvaluator.evaluate(context);
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
// Prompt evaluator - LLM execution
|
|
30
|
+
return await this.evaluateWithPrompt(evaluator, context);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
catch (error) {
|
|
34
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
35
|
+
console.log(`🔍 [${evaluator.name}] ❌ Error: ${errorMessage}`);
|
|
36
|
+
return {
|
|
37
|
+
evaluator: evaluator.name,
|
|
38
|
+
moduleName: context.moduleName,
|
|
39
|
+
error: errorMessage,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Evaluate using prompt-based evaluator
|
|
45
|
+
*/
|
|
46
|
+
async evaluateWithPrompt(evaluator, context) {
|
|
47
|
+
// Compile evaluation prompt
|
|
48
|
+
const compiled = compile(evaluator.promptEvaluator.module, context);
|
|
49
|
+
// Create driver for evaluator model
|
|
50
|
+
const driver = await this.aiService.createDriver(this.evaluatorModel);
|
|
51
|
+
// Run evaluation
|
|
52
|
+
const startTime = Date.now();
|
|
53
|
+
const result = await driver.query(compiled, {
|
|
54
|
+
temperature: 0.3, // Lower temperature for consistent evaluation
|
|
55
|
+
maxTokens: 4096,
|
|
56
|
+
});
|
|
57
|
+
const elapsed = Date.now() - startTime;
|
|
58
|
+
console.log(`🔍 [${evaluator.name}] ✅ Completed (${elapsed}ms)`);
|
|
59
|
+
// Close driver
|
|
60
|
+
if (driver && typeof driver.close === 'function') {
|
|
61
|
+
await driver.close();
|
|
62
|
+
}
|
|
63
|
+
// Use structured output if available
|
|
64
|
+
if (result.structuredOutput) {
|
|
65
|
+
const structured = result.structuredOutput;
|
|
66
|
+
return {
|
|
67
|
+
evaluator: evaluator.name,
|
|
68
|
+
moduleName: context.moduleName,
|
|
69
|
+
score: structured.score,
|
|
70
|
+
reasoning: structured.reasoning,
|
|
71
|
+
details: structured.details,
|
|
72
|
+
raw: result.content,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
// Fallback: try to parse JSON from content
|
|
76
|
+
try {
|
|
77
|
+
const jsonMatch = result.content.match(/```json\s*\n([\s\S]*?)\n```/);
|
|
78
|
+
if (jsonMatch) {
|
|
79
|
+
const parsed = JSON.parse(jsonMatch[1]);
|
|
80
|
+
return {
|
|
81
|
+
evaluator: evaluator.name,
|
|
82
|
+
moduleName: context.moduleName,
|
|
83
|
+
score: parsed.score,
|
|
84
|
+
reasoning: parsed.reasoning,
|
|
85
|
+
details: parsed.details,
|
|
86
|
+
raw: result.content,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
console.log(`🔍 [${evaluator.name}] ⚠️ Failed to parse JSON response`);
|
|
92
|
+
}
|
|
93
|
+
// Fallback: return raw response
|
|
94
|
+
return {
|
|
95
|
+
evaluator: evaluator.name,
|
|
96
|
+
moduleName: context.moduleName,
|
|
97
|
+
reasoning: result.content,
|
|
98
|
+
raw: result.content,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Display evaluation results
|
|
103
|
+
*
|
|
104
|
+
* @param results - Evaluation results to display
|
|
105
|
+
*/
|
|
106
|
+
displayResults(results) {
|
|
107
|
+
console.log();
|
|
108
|
+
console.log('='.repeat(80));
|
|
109
|
+
console.log('📊 Evaluation Results');
|
|
110
|
+
console.log('='.repeat(80));
|
|
111
|
+
console.log();
|
|
112
|
+
// Group by module
|
|
113
|
+
const byModule = new Map();
|
|
114
|
+
for (const result of results) {
|
|
115
|
+
if (!byModule.has(result.moduleName)) {
|
|
116
|
+
byModule.set(result.moduleName, []);
|
|
117
|
+
}
|
|
118
|
+
byModule.get(result.moduleName).push(result);
|
|
119
|
+
}
|
|
120
|
+
for (const [moduleName, moduleResults] of byModule) {
|
|
121
|
+
console.log(`📦 ${moduleName}`);
|
|
122
|
+
console.log('─'.repeat(80));
|
|
123
|
+
for (const result of moduleResults) {
|
|
124
|
+
console.log(` 🔍 ${result.evaluator}`);
|
|
125
|
+
if (result.error) {
|
|
126
|
+
console.log(` ❌ Error: ${result.error}`);
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
if (result.score !== undefined) {
|
|
130
|
+
console.log(` Score: ${result.score.toFixed(1)}/10`);
|
|
131
|
+
}
|
|
132
|
+
if (result.reasoning) {
|
|
133
|
+
console.log(` Reasoning: ${result.reasoning}`);
|
|
134
|
+
}
|
|
135
|
+
if (result.details) {
|
|
136
|
+
console.log(` Details:`);
|
|
137
|
+
console.log(` ${JSON.stringify(result.details, null, 2).split('\n').join('\n ')}`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
console.log();
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
console.log('='.repeat(80));
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
//# sourceMappingURL=evaluator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../../../src/runner/evaluator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAK/C,MAAM,OAAO,eAAe;IAEhB;IACA;IAFV,YACU,SAAoB,EACpB,cAAyB;QADzB,cAAS,GAAT,SAAS,CAAW;QACpB,mBAAc,GAAd,cAAc,CAAW;IAChC,CAAC;IAEJ;;;;;;OAMG;IACH,KAAK,CAAC,QAAQ,CACZ,SAA0B,EAC1B,OAA0B;QAE1B,OAAO,CAAC,GAAG,CAAC,OAAO,SAAS,CAAC,IAAI,gBAAgB,OAAO,CAAC,UAAU,KAAK,CAAC,CAAC;QAE1E,IAAI,CAAC;YACH,IAAI,SAAS,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC9B,oCAAoC;gBACpC,OAAO,MAAM,SAAS,CAAC,aAAc,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC1D,CAAC;iBAAM,CAAC;gBACN,mCAAmC;gBACnC,OAAO,MAAM,IAAI,CAAC,kBAAkB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;YAC3D,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC5E,OAAO,CAAC,GAAG,CAAC,OAAO,SAAS,CAAC,IAAI,cAAc,YAAY,EAAE,CAAC,CAAC;YAE/D,OAAO;gBACL,SAAS,EAAE,SAAS,CAAC,IAAI;gBACzB,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,KAAK,EAAE,YAAY;aACpB,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,kBAAkB,CAC9B,SAA0B,EAC1B,OAA0B;QAE1B,4BAA4B;QAC5B,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,CAAC,eAAgB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAErE,oCAAoC;QACpC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAEtE,iBAAiB;QACjB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,QAAQ,EAAE;YAC1C,WAAW,EAAE,GAAG,EAAE,8CAA8C;YAChE,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAEvC,OAAO,CAAC,GAAG,CAAC,OAAO,SAAS,CAAC,IAAI,kBAAkB,OAAO,KAAK,CAAC,CAAC;QAEjE,eAAe;QACf,IAAI,MAAM,IAAI,OAAO,MAAM,CAAC,KAAK,KAAK,UAAU,EAAE,CAAC;YACjD,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;QACvB,CAAC;QAED,qCAAqC;QACrC,IAAI,MAAM,CAAC,gBAAgB,EAAE,CAAC;YAC5B,MAAM,UAAU,GAAG,MAAM,CAAC,gBAAuB,CAAC;YAClD,OAAO;gBACL,SAAS,EAAE,SAAS,CAAC,IAAI;gBACzB,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,KAAK,EAAE,UAAU,CAAC,KAAK;gBACvB,SAAS,EAAE,UAAU,CAAC,SAAS;gBAC/B,OAAO,EAAE,UAAU,CAAC,OAAO;gBAC3B,GAAG,EAAE,MAAM,CAAC,OAAO;aACpB,CAAC;QACJ,CAAC;QAED,2CAA2C;QAC3C,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;YACtE,IAAI,SAAS,EAAE,CAAC;gBACd,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxC,OAAO;oBACL,SAAS,EAAE,SAAS,CAAC,IAAI;oBACzB,UAAU,EAAE,OAAO,CAAC,UAAU;oBAC9B,KAAK,EAAE,MAAM,CAAC,KAAK;oBACnB,SAAS,EAAE,MAAM,CAAC,SAAS;oBAC3B,OAAO,EAAE,MAAM,CAAC,OAAO;oBACvB,GAAG,EAAE,MAAM,CAAC,OAAO;iBACpB,CAAC;YACJ,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,CAAC,GAAG,CAAC,OAAO,SAAS,CAAC,IAAI,qCAAqC,CAAC,CAAC;QAC1E,CAAC;QAED,gCAAgC;QAChC,OAAO;YACL,SAAS,EAAE,SAAS,CAAC,IAAI;YACzB,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,SAAS,EAAE,MAAM,CAAC,OAAO;YACzB,GAAG,EAAE,MAAM,CAAC,OAAO;SACpB,CAAC;IACJ,CAAC;IAED;;;;OAIG;IACH,cAAc,CAAC,OAA2B;QACxC,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACrC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,EAAE,CAAC;QAEd,kBAAkB;QAClB,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA8B,CAAC;QACvD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC;gBACrC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;YACtC,CAAC;YACD,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,UAAU,CAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChD,CAAC;QAED,KAAK,MAAM,CAAC,UAAU,EAAE,aAAa,CAAC,IAAI,QAAQ,EAAE,CAAC;YACnD,OAAO,CAAC,GAAG,CAAC,MAAM,UAAU,EAAE,CAAC,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;YAE5B,KAAK,MAAM,MAAM,IAAI,aAAa,EAAE,CAAC;gBACnC,OAAO,CAAC,GAAG,CAAC,SAAS,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;gBAEzC,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;oBACjB,OAAO,CAAC,GAAG,CAAC,kBAAkB,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;gBAChD,CAAC;qBAAM,CAAC;oBACN,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;wBAC/B,OAAO,CAAC,GAAG,CAAC,gBAAgB,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;oBAC5D,CAAC;oBACD,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;wBACrB,OAAO,CAAC,GAAG,CAAC,oBAAoB,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;oBACtD,CAAC;oBACD,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;wBACnB,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;wBAC9B,OAAO,CAAC,GAAG,CAAC,SAAS,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC;oBAC/F,CAAC;gBACH,CAAC;gBACD,OAAO,CAAC,GAAG,EAAE,CAAC;YAChB,CAAC;QACH,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAC9B,CAAC;CACF"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Experiment runner - orchestrates the entire experiment
|
|
3
|
+
*/
|
|
4
|
+
import type { AIService, ModelSpec } from '@modular-prompt/driver';
|
|
5
|
+
import type { ModuleDefinition, TestResult, TestCase } from '../types.js';
|
|
6
|
+
import type { DriverManager } from './driver-manager.js';
|
|
7
|
+
import type { LoadedEvaluator } from '../config/dynamic-loader.js';
|
|
8
|
+
export declare class ExperimentRunner {
|
|
9
|
+
private aiService;
|
|
10
|
+
private driverManager;
|
|
11
|
+
private modules;
|
|
12
|
+
private testCases;
|
|
13
|
+
private models;
|
|
14
|
+
private repeatCount;
|
|
15
|
+
private evaluators?;
|
|
16
|
+
private evaluatorModel?;
|
|
17
|
+
constructor(aiService: AIService, driverManager: DriverManager, modules: ModuleDefinition[], testCases: TestCase[], models: Record<string, ModelSpec>, repeatCount: number, evaluators?: LoadedEvaluator[] | undefined, evaluatorModel?: {
|
|
18
|
+
name: string;
|
|
19
|
+
spec: ModelSpec;
|
|
20
|
+
} | undefined);
|
|
21
|
+
/**
|
|
22
|
+
* Run the experiment
|
|
23
|
+
*
|
|
24
|
+
* @returns Array of TestResult
|
|
25
|
+
*/
|
|
26
|
+
run(): Promise<TestResult[]>;
|
|
27
|
+
/**
|
|
28
|
+
* Run module test with multiple repetitions
|
|
29
|
+
*/
|
|
30
|
+
private runModuleTest;
|
|
31
|
+
/**
|
|
32
|
+
* Run evaluation phase
|
|
33
|
+
*/
|
|
34
|
+
private runEvaluationPhase;
|
|
35
|
+
/**
|
|
36
|
+
* Compare prompts across modules
|
|
37
|
+
*/
|
|
38
|
+
private comparePrompts;
|
|
39
|
+
}
|
|
40
|
+
//# sourceMappingURL=experiment.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"experiment.d.ts","sourceRoot":"","sources":["../../../src/runner/experiment.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,SAAS,EAAe,SAAS,EAAE,MAAM,wBAAwB,CAAC;AAChF,OAAO,KAAK,EAAE,gBAAgB,EAAE,UAAU,EAAE,QAAQ,EAAuC,MAAM,aAAa,CAAC;AAC/G,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAGnE,qBAAa,gBAAgB;IAEzB,OAAO,CAAC,SAAS;IACjB,OAAO,CAAC,aAAa;IACrB,OAAO,CAAC,OAAO;IACf,OAAO,CAAC,SAAS;IACjB,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,WAAW;IACnB,OAAO,CAAC,UAAU,CAAC;IACnB,OAAO,CAAC,cAAc,CAAC;gBAPf,SAAS,EAAE,SAAS,EACpB,aAAa,EAAE,aAAa,EAC5B,OAAO,EAAE,gBAAgB,EAAE,EAC3B,SAAS,EAAE,QAAQ,EAAE,EACrB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,EACjC,WAAW,EAAE,MAAM,EACnB,UAAU,CAAC,EAAE,eAAe,EAAE,YAAA,EAC9B,cAAc,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,SAAS,CAAA;KAAE,YAAA;IAG5D;;;;OAIG;IACG,GAAG,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;IA6GlC;;OAEG;YACW,aAAa;IA0C3B;;OAEG;YACW,kBAAkB;IA2BhC;;OAEG;IACH,OAAO,CAAC,cAAc;CAgCvB"}
|