@modular-prompt/experiment 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +347 -0
  3. package/dist/src/cli/args.d.ts +6 -0
  4. package/dist/src/cli/args.d.ts.map +1 -0
  5. package/dist/src/cli/args.js +31 -0
  6. package/dist/src/cli/args.js.map +1 -0
  7. package/dist/src/config/dynamic-loader.d.ts +41 -0
  8. package/dist/src/config/dynamic-loader.d.ts.map +1 -0
  9. package/dist/src/config/dynamic-loader.js +101 -0
  10. package/dist/src/config/dynamic-loader.js.map +1 -0
  11. package/dist/src/config/loader.d.ts +23 -0
  12. package/dist/src/config/loader.d.ts.map +1 -0
  13. package/dist/src/config/loader.js +125 -0
  14. package/dist/src/config/loader.js.map +1 -0
  15. package/dist/src/evaluators/base-module.d.ts +10 -0
  16. package/dist/src/evaluators/base-module.d.ts.map +1 -0
  17. package/dist/src/evaluators/base-module.js +103 -0
  18. package/dist/src/evaluators/base-module.js.map +1 -0
  19. package/dist/src/evaluators/functional-correctness.d.ts +14 -0
  20. package/dist/src/evaluators/functional-correctness.d.ts.map +1 -0
  21. package/dist/src/evaluators/functional-correctness.js +95 -0
  22. package/dist/src/evaluators/functional-correctness.js.map +1 -0
  23. package/dist/src/evaluators/json-validator.d.ts +13 -0
  24. package/dist/src/evaluators/json-validator.d.ts.map +1 -0
  25. package/dist/src/evaluators/json-validator.js +51 -0
  26. package/dist/src/evaluators/json-validator.js.map +1 -0
  27. package/dist/src/index.d.ts +14 -0
  28. package/dist/src/index.d.ts.map +1 -0
  29. package/dist/src/index.js +19 -0
  30. package/dist/src/index.js.map +1 -0
  31. package/dist/src/reporter/statistics.d.ts +21 -0
  32. package/dist/src/reporter/statistics.d.ts.map +1 -0
  33. package/dist/src/reporter/statistics.js +68 -0
  34. package/dist/src/reporter/statistics.js.map +1 -0
  35. package/dist/src/run-comparison.d.ts +22 -0
  36. package/dist/src/run-comparison.d.ts.map +1 -0
  37. package/dist/src/run-comparison.js +142 -0
  38. package/dist/src/run-comparison.js.map +1 -0
  39. package/dist/src/runner/driver-manager.d.ts +30 -0
  40. package/dist/src/runner/driver-manager.d.ts.map +1 -0
  41. package/dist/src/runner/driver-manager.js +68 -0
  42. package/dist/src/runner/driver-manager.js.map +1 -0
  43. package/dist/src/runner/evaluator.d.ts +32 -0
  44. package/dist/src/runner/evaluator.d.ts.map +1 -0
  45. package/dist/src/runner/evaluator.js +146 -0
  46. package/dist/src/runner/evaluator.js.map +1 -0
  47. package/dist/src/runner/experiment.d.ts +40 -0
  48. package/dist/src/runner/experiment.d.ts.map +1 -0
  49. package/dist/src/runner/experiment.js +214 -0
  50. package/dist/src/runner/experiment.js.map +1 -0
  51. package/dist/src/types.d.ts +112 -0
  52. package/dist/src/types.d.ts.map +1 -0
  53. package/dist/src/types.js +5 -0
  54. package/dist/src/types.js.map +1 -0
  55. package/dist/tsconfig.tsbuildinfo +1 -0
  56. package/examples/experiment.yaml +70 -0
  57. package/package.json +70 -0
@@ -0,0 +1,142 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Module Comparison Experiment
4
+ *
5
+ * Compares the performance and output quality of multiple prompt modules.
6
+ *
7
+ * Usage:
8
+ * moduler-experiment <config> [options]
9
+ *
10
+ * Arguments:
11
+ * <config> Config file path (YAML, TypeScript, or JavaScript)
12
+ *
13
+ * Options:
14
+ * --test-case <name> Test case name filter
15
+ * --model <provider> Model provider filter (mlx, vertexai, googlegenai)
16
+ * --modules <names> Comma-separated module names (default: all)
17
+ * --repeat <count> Number of repetitions (default: 1)
18
+ * --evaluate Enable evaluation phase
19
+ * --evaluators <names> Comma-separated evaluator names (default: all)
20
+ */
21
+ import { parseArgs } from './cli/args.js';
22
+ import { loadExperimentConfig } from './config/loader.js';
23
+ import { loadModules, loadEvaluators } from './config/dynamic-loader.js';
24
+ import { DriverManager } from './runner/driver-manager.js';
25
+ import { ExperimentRunner } from './runner/experiment.js';
26
+ import { StatisticsReporter } from './reporter/statistics.js';
27
+ // Parse CLI arguments
28
+ const options = parseArgs();
29
+ // Display header
30
+ console.log('='.repeat(80));
31
+ console.log('Module Comparison Experiment');
32
+ console.log('='.repeat(80));
33
+ console.log(`Config: ${options.configPath}`);
34
+ console.log(`Test case filter: ${options.testCaseFilter || 'all'}`);
35
+ console.log(`Model filter: ${options.modelFilter || 'all enabled models'}`);
36
+ console.log(`Modules: ${options.moduleFilter?.join(', ') || 'all'}`);
37
+ console.log(`Repeat: ${options.repeatCount} time(s)`);
38
+ console.log(`Evaluation: ${options.enableEvaluation ? 'enabled' : 'disabled'}`);
39
+ if (options.enableEvaluation) {
40
+ console.log(`Evaluators: ${options.evaluatorFilter?.join(', ') || 'all'}`);
41
+ }
42
+ console.log('='.repeat(80));
43
+ console.log();
44
+ // Load configuration
45
+ const { serverConfig, modules: configModules, testCases: configTestCases, evaluators: configEvaluators, aiService, configDir } = await loadExperimentConfig(options.configPath);
46
+ // Keep models as object for experiment runner
47
+ const models = serverConfig.models;
48
+ // Display available models for logging
49
+ const modelEntries = Object.entries(models).filter(([_, spec]) => spec.enabled !== false && (!spec.role || spec.role === 'test'));
50
+ if (options.modelFilter) {
51
+ const filteredEntries = modelEntries.filter(([_, spec]) => spec.provider === options.modelFilter);
52
+ if (filteredEntries.length === 0) {
53
+ console.error(`❌ No enabled test models found for provider: ${options.modelFilter}`);
54
+ process.exit(1);
55
+ }
56
+ console.log(`📋 Testing with ${filteredEntries.length} model(s) (filtered by ${options.modelFilter}):`);
57
+ filteredEntries.forEach(([name, spec]) => console.log(` - ${name}: ${spec.model} (${spec.provider})`));
58
+ }
59
+ else {
60
+ console.log(`📋 Testing with ${modelEntries.length} model(s):`);
61
+ modelEntries.forEach(([name, spec]) => console.log(` - ${name}: ${spec.model} (${spec.provider})`));
62
+ }
63
+ console.log();
64
+ // Load test cases
65
+ const allTestCases = configTestCases;
66
+ const testCases = options.testCaseFilter
67
+ ? allTestCases.filter((tc) => tc.name === options.testCaseFilter)
68
+ : allTestCases;
69
+ if (testCases.length === 0) {
70
+ console.error(`❌ No test cases found${options.testCaseFilter ? ` matching: ${options.testCaseFilter}` : ''}`);
71
+ console.error(' Please add test cases to config file');
72
+ process.exit(1);
73
+ }
74
+ console.log(`🧪 Running ${testCases.length} test case(s)`);
75
+ console.log();
76
+ // Load modules (from module references)
77
+ const allModules = await loadModules(configModules, configDir);
78
+ const modules = options.moduleFilter
79
+ ? allModules.filter(m => options.moduleFilter.includes(m.name))
80
+ : allModules;
81
+ if (modules.length === 0) {
82
+ console.error('❌ No modules to test');
83
+ console.error(' Please add modules to config file');
84
+ process.exit(1);
85
+ }
86
+ console.log(`📦 Testing ${modules.length} module(s):`);
87
+ modules.forEach(m => console.log(` - ${m.name}: ${m.description}`));
88
+ console.log();
89
+ // Get evaluators and evaluator model if evaluation is enabled
90
+ let evaluators;
91
+ let evaluatorModel;
92
+ if (options.enableEvaluation) {
93
+ // Load evaluators (from evaluator references)
94
+ const allEvaluators = await loadEvaluators(configEvaluators, configDir);
95
+ evaluators = options.evaluatorFilter
96
+ ? allEvaluators.filter(e => options.evaluatorFilter.includes(e.name))
97
+ : allEvaluators;
98
+ if (evaluators.length === 0) {
99
+ console.error('❌ No evaluators found');
100
+ process.exit(1);
101
+ }
102
+ // Find evaluator model from evaluation config
103
+ if (!serverConfig.evaluation || !serverConfig.evaluation.enabled) {
104
+ console.error('❌ Evaluation is not configured in config file');
105
+ console.error(' Please add evaluation section to your config.yaml:');
106
+ console.error(' evaluation:');
107
+ console.error(' enabled: true');
108
+ console.error(' model: "model-name"');
109
+ console.error(' provider: "provider-name"');
110
+ process.exit(1);
111
+ }
112
+ const evaluationConfig = serverConfig.evaluation;
113
+ // Find the specified model by name
114
+ const modelName = evaluationConfig.model;
115
+ const modelSpec = serverConfig.models[modelName];
116
+ if (!modelSpec || modelSpec.enabled === false) {
117
+ console.error(`❌ Evaluator model not found or disabled: ${modelName}`);
118
+ console.error(' Please ensure the model is defined in the models section and enabled');
119
+ process.exit(1);
120
+ }
121
+ evaluatorModel = { name: modelName, spec: modelSpec };
122
+ console.log(`🔍 Evaluation enabled with ${evaluators.length} evaluator(s):`);
123
+ evaluators.forEach(e => console.log(` - [${e.type}] ${e.name}: ${e.description}`));
124
+ console.log(`🔍 Evaluator model: ${modelName} (${modelSpec.provider}:${modelSpec.model})`);
125
+ console.log();
126
+ }
127
+ // Run experiment
128
+ const driverManager = new DriverManager();
129
+ const runner = new ExperimentRunner(aiService, driverManager, modules, testCases, models, options.repeatCount, evaluators, evaluatorModel);
130
+ const results = await runner.run();
131
+ // Display completion
132
+ console.log('='.repeat(80));
133
+ console.log('✨ Experiment completed');
134
+ console.log('='.repeat(80));
135
+ // Cleanup drivers
136
+ await driverManager.cleanup();
137
+ // Display statistics if repeated
138
+ if (options.repeatCount > 1) {
139
+ const reporter = new StatisticsReporter(results);
140
+ reporter.report();
141
+ }
142
+ //# sourceMappingURL=run-comparison.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"run-comparison.js","sourceRoot":"","sources":["../../src/run-comparison.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAC1C,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AACzE,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAE9D,sBAAsB;AACtB,MAAM,OAAO,GAAG,SAAS,EAAE,CAAC;AAE5B,iBAAiB;AACjB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAC,CAAC;AAC5C,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,OAAO,CAAC,GAAG,CAAC,WAAW,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC;AAC7C,OAAO,CAAC,GAAG,CAAC,qBAAqB,OAAO,CAAC,cAAc,IAAI,KAAK,EAAE,CAAC,CAAC;AACpE,OAAO,CAAC,GAAG,CAAC,iBAAiB,OAAO,CAAC,WAAW,IAAI,oBAAoB,EAAE,CAAC,CAAC;AAC5E,OAAO,CAAC,GAAG,CAAC,YAAY,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC;AACrE,OAAO,CAAC,GAAG,CAAC,WAAW,OAAO,CAAC,WAAW,UAAU,CAAC,CAAC;AACtD,OAAO,CAAC,GAAG,CAAC,eAAe,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC;AAChF,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAC7B,OAAO,CAAC,GAAG,CAAC,eAAe,OAAO,CAAC,eAAe,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC;AAC7E,CAAC;AACD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,OAAO,CAAC,GAAG,EAAE,CAAC;AAEd,qBAAqB;AACrB,MAAM,EACJ,YAAY,EACZ,OAAO,EAAE,aAAa,EACtB,SAAS,EAAE,eAAe,EAC1B,UAAU,EAAE,gBAAgB,EAC5B,SAAS,EACT,SAAS,EACV,GAAG,MAAM,oBAAoB,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAEnD,8CAA8C;AAC9C,MAAM,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC;AAEnC,uCAAuC;AACvC,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAgB,EAAE,EAAE,CAC9E,IAAI,CAAC,OAAO,KAAK,KAAK,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,CAAC,CAC/D,CAAC;AAEF,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;IACxB,MAAM,eAAe,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAgB,EAAE,EAAE,CACvE,IAAI,CAAC,QAAQ,KAAK,OAAO,CAAC,WAAW,CACtC,CAAC;IACF,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjC,OAAO,CAAC,KAAK,CAAC,gDAAgD,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;QACrF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,mBAAmB,eAAe,CAAC,MAAM,0BAA0B,OAAO,CAAC,WAAW,IAAI,CAAC,CAAC;IACxG,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAgB,EAAE,EAAE,CACtD,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,KAAK,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,QAAQ,GAAG,CAAC,CAC7D,CAAC;AACJ,CAAC;KAAM,CAAC;IACN,OAAO,CAAC,GAAG,CAAC,mBAAmB,YAAY,CAAC,MAAM,YAAY,CAAC,CAAC;IAChE,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAgB,EAAE,EAAE,CACnD,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,KAAK,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,QAAQ,GAAG,CAAC,CAC7D,CAAC;AACJ,CAAC;AACD,OAAO,CAAC,GAAG,EAAE,CAAC;AAEd,kBAAkB;AAClB,MAAM,YAAY,GAAG,eAAe,CAAC;AACrC,MAAM,SAAS,GAAG,OAAO,CAAC,cAAc;IACtC,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,EAAO,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,KAAK,OAAO,CAAC,cAAc,CAAC;IACtE,CAAC,CAAC,YAAY,CAAC;AAEjB,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;IAC3B,OAAO,CAAC,KAAK,CAAC,wBAAwB,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,cAAc,OAAO,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC9G,OAAO,CAAC,KAAK,CAAC,yCAAyC,CAAC,CAAC;IACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,CAAC,MAAM,eAAe,CAAC,CAAC;AAC3D,OAAO,CAAC,GAAG,EAAE,CAAC;AAEd,wCAAwC;AACxC,MAAM,UAAU,GAAG,MAAM,WAAW,CAAC,aAAa,EAAE,SAAS,CAAC,CAAC;AAC/D,MAAM,OAAO,GAAG,OAAO,CAAC,YAAY;IAClC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,YAAa,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAChE,CAAC,CAAC,UAAU,CAAC;AAEf,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;IACzB,OAAO,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;IACtC,OAAO,CAAC,KAAK,CAAC,sCAAsC,CAAC,CAAC;IACtD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,OAAO,CAAC,GAAG,CAAC,cAAc,OAAO,CAAC,MAAM,aAAa,CAAC,CAAC;AACvD,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;AACrE,OAAO,CAAC,GAAG,EAAE,CAAC;AAEd,8DAA8D;AAC9D,IAAI,UAAU,CAAC;AACf,IAAI,cAAc,CAAC;AACnB,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAC7B,8CAA8C;IAC9C,MAAM,aAAa,GAAG,MAAM,cAAc,CAAC,gBAAgB,EAAE,SAAS,CAAC,CAAC;IACxE,UAAU,GAAG,OAAO,CAAC,eAAe;QAClC,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,eAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACtE,CAAC,CAAC,aAAa,CAAC;IAElB,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC5B,OAAO,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,8CAA8C;IAC9C,IAAI,CAAC,YAAY,CAAC,UAAU,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;QACjE,OAAO,CAAC,KAAK,CAAC,+CAA+C,CAAC,CAAC;QAC/D,OAAO,CAAC,KAAK,CAAC,uDAAuD,CAAC,CAAC;QACvE,OAAO,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QAChC,OAAO,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC;QACpC,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QAC1C,OAAO,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,gBAAgB,GAAG,YAAY,CAAC,UAAU,CAAC;IAEjD,mCAAmC;IACnC,MAAM,SAAS,GAAG,gBAAgB,CAAC,KAAK,CAAC;IACzC,MAAM,SAAS,GAAG,YAAY,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAEjD,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,OAAO,KAAK,KAAK,EAAE,CAAC;QAC9C,OAAO,CAAC,KAAK,CAAC,4CAA4C,SAAS,EAAE,CAAC,CAAC;QACvE,OAAO,CAAC,KAAK,CAAC,yEAAyE,CAAC,CAAC;QACzF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,cAAc,GAAG,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;IAEtD,OAAO,CAAC,GAAG,CAAC,8BAA8B,UAAU,CAAC,MAAM,gBAAgB,CAAC,CAAC;IAC7E,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;IACpF,OAAO,CAAC,GAAG,CAAC,uBAAuB,SAAS,KAAK,SAAS,CAAC,QAAQ,IAAI,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC;IAC3F,OAAO,CAAC,GAAG,EAAE,CAAC;AAChB,CAAC;AAED,iBAAiB;AACjB,MAAM,aAAa,GAAG,IAAI,aAAa,EAAE,CAAC;AAC1C,MAAM,MAAM,GAAG,IAAI,gBAAgB,CACjC,SAAS,EACT,aAAa,EACb,OAAO,EACP,SAAS,EACT,MAAM,EACN,OAAO,CAAC,WAAW,EACnB,UAAU,EACV,cAAc,CACf,CAAC;AAEF,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,GAAG,EAAE,CAAC;AAEnC,qBAAqB;AACrB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;AACtC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAE5B,kBAAkB;AAClB,MAAM,aAAa,CAAC,OAAO,EAAE,CAAC;AAE9B,iCAAiC;AACjC,IAAI,OAAO,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC;IAC5B,MAAM,QAAQ,GAAG,IAAI,kBAAkB,CAAC,OAAO,CAAC,CAAC;IACjD,QAAQ,CAAC,MAAM,EAAE,CAAC;AACpB,CAAC"}
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Driver manager for caching and cleanup
3
+ */
4
+ import type { AIService, ModelSpec } from '@modular-prompt/driver';
5
+ export declare class DriverManager {
6
+ private cache;
7
+ /**
8
+ * Get or create driver for a model
9
+ *
10
+ * Drivers are cached by model name.
11
+ * Reuses existing driver if available.
12
+ *
13
+ * @param aiService - AIService instance
14
+ * @param modelName - Model name for caching
15
+ * @param modelSpec - Model spec
16
+ * @returns Driver instance
17
+ */
18
+ getOrCreate(aiService: AIService, modelName: string, modelSpec: ModelSpec): Promise<any>;
19
+ /**
20
+ * Close and remove a specific driver from cache
21
+ *
22
+ * @param modelName - Model name to close
23
+ */
24
+ close(modelName: string): Promise<void>;
25
+ /**
26
+ * Close all cached drivers
27
+ */
28
+ cleanup(): Promise<void>;
29
+ }
30
+ //# sourceMappingURL=driver-manager.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"driver-manager.d.ts","sourceRoot":"","sources":["../../../src/runner/driver-manager.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AAEnE,qBAAa,aAAa;IACxB,OAAO,CAAC,KAAK,CAA0B;IAEvC;;;;;;;;;;OAUG;IACG,WAAW,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC;IAY9F;;;;OAIG;IACG,KAAK,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAkB7C;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAiB/B"}
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Driver manager for caching and cleanup
3
+ */
4
+ export class DriverManager {
5
+ cache = new Map();
6
+ /**
7
+ * Get or create driver for a model
8
+ *
9
+ * Drivers are cached by model name.
10
+ * Reuses existing driver if available.
11
+ *
12
+ * @param aiService - AIService instance
13
+ * @param modelName - Model name for caching
14
+ * @param modelSpec - Model spec
15
+ * @returns Driver instance
16
+ */
17
+ async getOrCreate(aiService, modelName, modelSpec) {
18
+ if (this.cache.has(modelName)) {
19
+ console.log(` Using cached driver for ${modelName}`);
20
+ return this.cache.get(modelName);
21
+ }
22
+ console.log(` Creating new driver for ${modelName} (${modelSpec.provider}:${modelSpec.model})`);
23
+ const driver = await aiService.createDriver(modelSpec);
24
+ this.cache.set(modelName, driver);
25
+ return driver;
26
+ }
27
+ /**
28
+ * Close and remove a specific driver from cache
29
+ *
30
+ * @param modelName - Model name to close
31
+ */
32
+ async close(modelName) {
33
+ const driver = this.cache.get(modelName);
34
+ if (!driver) {
35
+ return;
36
+ }
37
+ try {
38
+ if (typeof driver.close === 'function') {
39
+ await driver.close();
40
+ console.log(` ✅ Closed driver: ${modelName}`);
41
+ }
42
+ this.cache.delete(modelName);
43
+ }
44
+ catch (error) {
45
+ console.log(` ⚠️ Failed to close driver ${modelName}: ${error instanceof Error ? error.message : String(error)}`);
46
+ }
47
+ }
48
+ /**
49
+ * Close all cached drivers
50
+ */
51
+ async cleanup() {
52
+ console.log();
53
+ console.log('🧹 Cleaning up...');
54
+ for (const [key, driver] of this.cache.entries()) {
55
+ try {
56
+ if (driver && typeof driver.close === 'function') {
57
+ await driver.close();
58
+ console.log(` Closed driver: ${key}`);
59
+ }
60
+ }
61
+ catch (error) {
62
+ console.log(` Warning: Failed to close driver ${key}: ${error instanceof Error ? error.message : String(error)}`);
63
+ }
64
+ }
65
+ console.log('✅ Cleanup completed');
66
+ }
67
+ }
68
+ //# sourceMappingURL=driver-manager.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"driver-manager.js","sourceRoot":"","sources":["../../../src/runner/driver-manager.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,MAAM,OAAO,aAAa;IAChB,KAAK,GAAG,IAAI,GAAG,EAAe,CAAC;IAEvC;;;;;;;;;;OAUG;IACH,KAAK,CAAC,WAAW,CAAC,SAAoB,EAAE,SAAiB,EAAE,SAAoB;QAC7E,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;YAC9B,OAAO,CAAC,GAAG,CAAC,8BAA8B,SAAS,EAAE,CAAC,CAAC;YACvD,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACnC,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,8BAA8B,SAAS,KAAK,SAAS,CAAC,QAAQ,IAAI,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC;QAClG,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;QACvD,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;QAClC,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,KAAK,CAAC,SAAiB;QAC3B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAEzC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,OAAO;QACT,CAAC;QAED,IAAI,CAAC;YACH,IAAI,OAAO,MAAM,CAAC,KAAK,KAAK,UAAU,EAAE,CAAC;gBACvC,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;gBACrB,OAAO,CAAC,GAAG,CAAC,uBAAuB,SAAS,EAAE,CAAC,CAAC;YAClD,CAAC;YACD,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAC/B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,GAAG,CAAC,iCAAiC,SAAS,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACvH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;QAEjC,KAAK,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC;YACjD,IAAI,CAAC;gBACH,IAAI,MAAM,IAAI,OAAO,MAAM,CAAC,KAAK,KAAK,UAAU,EAAE,CAAC;oBACjD,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;oBACrB,OAAO,CAAC,GAAG,CAAC,qBAAqB,GAAG,EAAE,CAAC,CAAC;gBAC1C,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,GAAG,CAAC,sCAAsC,GAAG,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YACtH,CAAC;QACH,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;IACrC,CAAC;CACF"}
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Evaluator runner
3
+ *
4
+ * Runs evaluation (code or prompt-based) for a single module
5
+ */
6
+ import type { AIService, ModelSpec } from '@modular-prompt/driver';
7
+ import type { EvaluationContext, EvaluationResult } from '../types.js';
8
+ import type { LoadedEvaluator } from '../config/dynamic-loader.js';
9
+ export declare class EvaluatorRunner {
10
+ private aiService;
11
+ private evaluatorModel;
12
+ constructor(aiService: AIService, evaluatorModel: ModelSpec);
13
+ /**
14
+ * Run evaluation for a single module
15
+ *
16
+ * @param evaluator - Loaded evaluator
17
+ * @param context - Evaluation context
18
+ * @returns Evaluation result
19
+ */
20
+ evaluate(evaluator: LoadedEvaluator, context: EvaluationContext): Promise<EvaluationResult>;
21
+ /**
22
+ * Evaluate using prompt-based evaluator
23
+ */
24
+ private evaluateWithPrompt;
25
+ /**
26
+ * Display evaluation results
27
+ *
28
+ * @param results - Evaluation results to display
29
+ */
30
+ displayResults(results: EvaluationResult[]): void;
31
+ }
32
+ //# sourceMappingURL=evaluator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../../../src/runner/evaluator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AACnE,OAAO,KAAK,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAEnE,qBAAa,eAAe;IAExB,OAAO,CAAC,SAAS;IACjB,OAAO,CAAC,cAAc;gBADd,SAAS,EAAE,SAAS,EACpB,cAAc,EAAE,SAAS;IAGnC;;;;;;OAMG;IACG,QAAQ,CACZ,SAAS,EAAE,eAAe,EAC1B,OAAO,EAAE,iBAAiB,GACzB,OAAO,CAAC,gBAAgB,CAAC;IAuB5B;;OAEG;YACW,kBAAkB;IAiEhC;;;;OAIG;IACH,cAAc,CAAC,OAAO,EAAE,gBAAgB,EAAE,GAAG,IAAI;CA2ClD"}
@@ -0,0 +1,146 @@
1
+ /**
2
+ * Evaluator runner
3
+ *
4
+ * Runs evaluation (code or prompt-based) for a single module
5
+ */
6
+ import { compile } from '@modular-prompt/core';
7
+ export class EvaluatorRunner {
8
+ aiService;
9
+ evaluatorModel;
10
+ constructor(aiService, evaluatorModel) {
11
+ this.aiService = aiService;
12
+ this.evaluatorModel = evaluatorModel;
13
+ }
14
+ /**
15
+ * Run evaluation for a single module
16
+ *
17
+ * @param evaluator - Loaded evaluator
18
+ * @param context - Evaluation context
19
+ * @returns Evaluation result
20
+ */
21
+ async evaluate(evaluator, context) {
22
+ console.log(`🔍 [${evaluator.name}] Evaluating ${context.moduleName}...`);
23
+ try {
24
+ if (evaluator.type === 'code') {
25
+ // Code evaluator - direct execution
26
+ return await evaluator.codeEvaluator.evaluate(context);
27
+ }
28
+ else {
29
+ // Prompt evaluator - LLM execution
30
+ return await this.evaluateWithPrompt(evaluator, context);
31
+ }
32
+ }
33
+ catch (error) {
34
+ const errorMessage = error instanceof Error ? error.message : String(error);
35
+ console.log(`🔍 [${evaluator.name}] ❌ Error: ${errorMessage}`);
36
+ return {
37
+ evaluator: evaluator.name,
38
+ moduleName: context.moduleName,
39
+ error: errorMessage,
40
+ };
41
+ }
42
+ }
43
+ /**
44
+ * Evaluate using prompt-based evaluator
45
+ */
46
+ async evaluateWithPrompt(evaluator, context) {
47
+ // Compile evaluation prompt
48
+ const compiled = compile(evaluator.promptEvaluator.module, context);
49
+ // Create driver for evaluator model
50
+ const driver = await this.aiService.createDriver(this.evaluatorModel);
51
+ // Run evaluation
52
+ const startTime = Date.now();
53
+ const result = await driver.query(compiled, {
54
+ temperature: 0.3, // Lower temperature for consistent evaluation
55
+ maxTokens: 4096,
56
+ });
57
+ const elapsed = Date.now() - startTime;
58
+ console.log(`🔍 [${evaluator.name}] ✅ Completed (${elapsed}ms)`);
59
+ // Close driver
60
+ if (driver && typeof driver.close === 'function') {
61
+ await driver.close();
62
+ }
63
+ // Use structured output if available
64
+ if (result.structuredOutput) {
65
+ const structured = result.structuredOutput;
66
+ return {
67
+ evaluator: evaluator.name,
68
+ moduleName: context.moduleName,
69
+ score: structured.score,
70
+ reasoning: structured.reasoning,
71
+ details: structured.details,
72
+ raw: result.content,
73
+ };
74
+ }
75
+ // Fallback: try to parse JSON from content
76
+ try {
77
+ const jsonMatch = result.content.match(/```json\s*\n([\s\S]*?)\n```/);
78
+ if (jsonMatch) {
79
+ const parsed = JSON.parse(jsonMatch[1]);
80
+ return {
81
+ evaluator: evaluator.name,
82
+ moduleName: context.moduleName,
83
+ score: parsed.score,
84
+ reasoning: parsed.reasoning,
85
+ details: parsed.details,
86
+ raw: result.content,
87
+ };
88
+ }
89
+ }
90
+ catch {
91
+ console.log(`🔍 [${evaluator.name}] ⚠️ Failed to parse JSON response`);
92
+ }
93
+ // Fallback: return raw response
94
+ return {
95
+ evaluator: evaluator.name,
96
+ moduleName: context.moduleName,
97
+ reasoning: result.content,
98
+ raw: result.content,
99
+ };
100
+ }
101
+ /**
102
+ * Display evaluation results
103
+ *
104
+ * @param results - Evaluation results to display
105
+ */
106
+ displayResults(results) {
107
+ console.log();
108
+ console.log('='.repeat(80));
109
+ console.log('📊 Evaluation Results');
110
+ console.log('='.repeat(80));
111
+ console.log();
112
+ // Group by module
113
+ const byModule = new Map();
114
+ for (const result of results) {
115
+ if (!byModule.has(result.moduleName)) {
116
+ byModule.set(result.moduleName, []);
117
+ }
118
+ byModule.get(result.moduleName).push(result);
119
+ }
120
+ for (const [moduleName, moduleResults] of byModule) {
121
+ console.log(`📦 ${moduleName}`);
122
+ console.log('─'.repeat(80));
123
+ for (const result of moduleResults) {
124
+ console.log(` 🔍 ${result.evaluator}`);
125
+ if (result.error) {
126
+ console.log(` ❌ Error: ${result.error}`);
127
+ }
128
+ else {
129
+ if (result.score !== undefined) {
130
+ console.log(` Score: ${result.score.toFixed(1)}/10`);
131
+ }
132
+ if (result.reasoning) {
133
+ console.log(` Reasoning: ${result.reasoning}`);
134
+ }
135
+ if (result.details) {
136
+ console.log(` Details:`);
137
+ console.log(` ${JSON.stringify(result.details, null, 2).split('\n').join('\n ')}`);
138
+ }
139
+ }
140
+ console.log();
141
+ }
142
+ }
143
+ console.log('='.repeat(80));
144
+ }
145
+ }
146
+ //# sourceMappingURL=evaluator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../../../src/runner/evaluator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAK/C,MAAM,OAAO,eAAe;IAEhB;IACA;IAFV,YACU,SAAoB,EACpB,cAAyB;QADzB,cAAS,GAAT,SAAS,CAAW;QACpB,mBAAc,GAAd,cAAc,CAAW;IAChC,CAAC;IAEJ;;;;;;OAMG;IACH,KAAK,CAAC,QAAQ,CACZ,SAA0B,EAC1B,OAA0B;QAE1B,OAAO,CAAC,GAAG,CAAC,OAAO,SAAS,CAAC,IAAI,gBAAgB,OAAO,CAAC,UAAU,KAAK,CAAC,CAAC;QAE1E,IAAI,CAAC;YACH,IAAI,SAAS,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC9B,oCAAoC;gBACpC,OAAO,MAAM,SAAS,CAAC,aAAc,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC1D,CAAC;iBAAM,CAAC;gBACN,mCAAmC;gBACnC,OAAO,MAAM,IAAI,CAAC,kBAAkB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;YAC3D,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC5E,OAAO,CAAC,GAAG,CAAC,OAAO,SAAS,CAAC,IAAI,cAAc,YAAY,EAAE,CAAC,CAAC;YAE/D,OAAO;gBACL,SAAS,EAAE,SAAS,CAAC,IAAI;gBACzB,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,KAAK,EAAE,YAAY;aACpB,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,kBAAkB,CAC9B,SAA0B,EAC1B,OAA0B;QAE1B,4BAA4B;QAC5B,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,CAAC,eAAgB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAErE,oCAAoC;QACpC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAEtE,iBAAiB;QACjB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,QAAQ,EAAE;YAC1C,WAAW,EAAE,GAAG,EAAE,8CAA8C;YAChE,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAEvC,OAAO,CAAC,GAAG,CAAC,OAAO,SAAS,CAAC,IAAI,kBAAkB,OAAO,KAAK,CAAC,CAAC;QAEjE,eAAe;QACf,IAAI,MAAM,IAAI,OAAO,MAAM,CAAC,KAAK,KAAK,UAAU,EAAE,CAAC;YACjD,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;QACvB,CAAC;QAED,qCAAqC;QACrC,IAAI,MAAM,CAAC,gBAAgB,EAAE,CAAC;YAC5B,MAAM,UAAU,GAAG,MAAM,CAAC,gBAAuB,CAAC;YAClD,OAAO;gBACL,SAAS,EAAE,SAAS,CAAC,IAAI;gBACzB,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,KAAK,EAAE,UAAU,CAAC,KAAK;gBACvB,SAAS,EAAE,UAAU,CAAC,SAAS;gBAC/B,OAAO,EAAE,UAAU,CAAC,OAAO;gBAC3B,GAAG,EAAE,MAAM,CAAC,OAAO;aACpB,CAAC;QACJ,CAAC;QAED,2CAA2C;QAC3C,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;YACtE,IAAI,SAAS,EAAE,CAAC;gBACd,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxC,OAAO;oBACL,SAAS,EAAE,SAAS,CAAC,IAAI;oBACzB,UAAU,EAAE,OAAO,CAAC,UAAU;oBAC9B,KAAK,EAAE,MAAM,CAAC,KAAK;oBACnB,SAAS,EAAE,MAAM,CAAC,SAAS;oBAC3B,OAAO,EAAE,MAAM,CAAC,OAAO;oBACvB,GAAG,EAAE,MAAM,CAAC,OAAO;iBACpB,CAAC;YACJ,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,CAAC,GAAG,CAAC,OAAO,SAAS,CAAC,IAAI,qCAAqC,CAAC,CAAC;QAC1E,CAAC;QAED,gCAAgC;QAChC,OAAO;YACL,SAAS,EAAE,SAAS,CAAC,IAAI;YACzB,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,SAAS,EAAE,MAAM,CAAC,OAAO;YACzB,GAAG,EAAE,MAAM,CAAC,OAAO;SACpB,CAAC;IACJ,CAAC;IAED;;;;OAIG;IACH,cAAc,CAAC,OAA2B;QACxC,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACrC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,EAAE,CAAC;QAEd,kBAAkB;QAClB,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA8B,CAAC;QACvD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC;gBACrC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;YACtC,CAAC;YACD,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,UAAU,CAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChD,CAAC;QAED,KAAK,MAAM,CAAC,UAAU,EAAE,aAAa,CAAC,IAAI,QAAQ,EAAE,CAAC;YACnD,OAAO,CAAC,GAAG,CAAC,MAAM,UAAU,EAAE,CAAC,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;YAE5B,KAAK,MAAM,MAAM,IAAI,aAAa,EAAE,CAAC;gBACnC,OAAO,CAAC,GAAG,CAAC,SAAS,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;gBAEzC,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;oBACjB,OAAO,CAAC,GAAG,CAAC,kBAAkB,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;gBAChD,CAAC;qBAAM,CAAC;oBACN,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;wBAC/B,OAAO,CAAC,GAAG,CAAC,gBAAgB,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;oBAC5D,CAAC;oBACD,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;wBACrB,OAAO,CAAC,GAAG,CAAC,oBAAoB,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;oBACtD,CAAC;oBACD,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;wBACnB,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;wBAC9B,OAAO,CAAC,GAAG,CAAC,SAAS,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC;oBAC/F,CAAC;gBACH,CAAC;gBACD,OAAO,CAAC,GAAG,EAAE,CAAC;YAChB,CAAC;QACH,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAC9B,CAAC;CACF"}
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Experiment runner - orchestrates the entire experiment
3
+ */
4
+ import type { AIService, ModelSpec } from '@modular-prompt/driver';
5
+ import type { ModuleDefinition, TestResult, TestCase } from '../types.js';
6
+ import type { DriverManager } from './driver-manager.js';
7
+ import type { LoadedEvaluator } from '../config/dynamic-loader.js';
8
+ export declare class ExperimentRunner {
9
+ private aiService;
10
+ private driverManager;
11
+ private modules;
12
+ private testCases;
13
+ private models;
14
+ private repeatCount;
15
+ private evaluators?;
16
+ private evaluatorModel?;
17
+ constructor(aiService: AIService, driverManager: DriverManager, modules: ModuleDefinition[], testCases: TestCase[], models: Record<string, ModelSpec>, repeatCount: number, evaluators?: LoadedEvaluator[] | undefined, evaluatorModel?: {
18
+ name: string;
19
+ spec: ModelSpec;
20
+ } | undefined);
21
+ /**
22
+ * Run the experiment
23
+ *
24
+ * @returns Array of TestResult
25
+ */
26
+ run(): Promise<TestResult[]>;
27
+ /**
28
+ * Run module test with multiple repetitions
29
+ */
30
+ private runModuleTest;
31
+ /**
32
+ * Run evaluation phase
33
+ */
34
+ private runEvaluationPhase;
35
+ /**
36
+ * Compare prompts across modules
37
+ */
38
+ private comparePrompts;
39
+ }
40
+ //# sourceMappingURL=experiment.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"experiment.d.ts","sourceRoot":"","sources":["../../../src/runner/experiment.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,SAAS,EAAe,SAAS,EAAE,MAAM,wBAAwB,CAAC;AAChF,OAAO,KAAK,EAAE,gBAAgB,EAAE,UAAU,EAAE,QAAQ,EAAuC,MAAM,aAAa,CAAC;AAC/G,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAGnE,qBAAa,gBAAgB;IAEzB,OAAO,CAAC,SAAS;IACjB,OAAO,CAAC,aAAa;IACrB,OAAO,CAAC,OAAO;IACf,OAAO,CAAC,SAAS;IACjB,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,WAAW;IACnB,OAAO,CAAC,UAAU,CAAC;IACnB,OAAO,CAAC,cAAc,CAAC;gBAPf,SAAS,EAAE,SAAS,EACpB,aAAa,EAAE,aAAa,EAC5B,OAAO,EAAE,gBAAgB,EAAE,EAC3B,SAAS,EAAE,QAAQ,EAAE,EACrB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,EACjC,WAAW,EAAE,MAAM,EACnB,UAAU,CAAC,EAAE,eAAe,EAAE,YAAA,EAC9B,cAAc,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,SAAS,CAAA;KAAE,YAAA;IAG5D;;;;OAIG;IACG,GAAG,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;IA6GlC;;OAEG;YACW,aAAa;IA0C3B;;OAEG;YACW,kBAAkB;IA2BhC;;OAEG;IACH,OAAO,CAAC,cAAc;CAgCvB"}