@modular-prompt/experiment 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +347 -0
  3. package/dist/src/cli/args.d.ts +6 -0
  4. package/dist/src/cli/args.d.ts.map +1 -0
  5. package/dist/src/cli/args.js +31 -0
  6. package/dist/src/cli/args.js.map +1 -0
  7. package/dist/src/config/dynamic-loader.d.ts +41 -0
  8. package/dist/src/config/dynamic-loader.d.ts.map +1 -0
  9. package/dist/src/config/dynamic-loader.js +101 -0
  10. package/dist/src/config/dynamic-loader.js.map +1 -0
  11. package/dist/src/config/loader.d.ts +23 -0
  12. package/dist/src/config/loader.d.ts.map +1 -0
  13. package/dist/src/config/loader.js +125 -0
  14. package/dist/src/config/loader.js.map +1 -0
  15. package/dist/src/evaluators/base-module.d.ts +10 -0
  16. package/dist/src/evaluators/base-module.d.ts.map +1 -0
  17. package/dist/src/evaluators/base-module.js +103 -0
  18. package/dist/src/evaluators/base-module.js.map +1 -0
  19. package/dist/src/evaluators/functional-correctness.d.ts +14 -0
  20. package/dist/src/evaluators/functional-correctness.d.ts.map +1 -0
  21. package/dist/src/evaluators/functional-correctness.js +95 -0
  22. package/dist/src/evaluators/functional-correctness.js.map +1 -0
  23. package/dist/src/evaluators/json-validator.d.ts +13 -0
  24. package/dist/src/evaluators/json-validator.d.ts.map +1 -0
  25. package/dist/src/evaluators/json-validator.js +51 -0
  26. package/dist/src/evaluators/json-validator.js.map +1 -0
  27. package/dist/src/index.d.ts +14 -0
  28. package/dist/src/index.d.ts.map +1 -0
  29. package/dist/src/index.js +19 -0
  30. package/dist/src/index.js.map +1 -0
  31. package/dist/src/reporter/statistics.d.ts +21 -0
  32. package/dist/src/reporter/statistics.d.ts.map +1 -0
  33. package/dist/src/reporter/statistics.js +68 -0
  34. package/dist/src/reporter/statistics.js.map +1 -0
  35. package/dist/src/run-comparison.d.ts +22 -0
  36. package/dist/src/run-comparison.d.ts.map +1 -0
  37. package/dist/src/run-comparison.js +142 -0
  38. package/dist/src/run-comparison.js.map +1 -0
  39. package/dist/src/runner/driver-manager.d.ts +30 -0
  40. package/dist/src/runner/driver-manager.d.ts.map +1 -0
  41. package/dist/src/runner/driver-manager.js +68 -0
  42. package/dist/src/runner/driver-manager.js.map +1 -0
  43. package/dist/src/runner/evaluator.d.ts +32 -0
  44. package/dist/src/runner/evaluator.d.ts.map +1 -0
  45. package/dist/src/runner/evaluator.js +146 -0
  46. package/dist/src/runner/evaluator.js.map +1 -0
  47. package/dist/src/runner/experiment.d.ts +40 -0
  48. package/dist/src/runner/experiment.d.ts.map +1 -0
  49. package/dist/src/runner/experiment.js +214 -0
  50. package/dist/src/runner/experiment.js.map +1 -0
  51. package/dist/src/types.d.ts +112 -0
  52. package/dist/src/types.d.ts.map +1 -0
  53. package/dist/src/types.js +5 -0
  54. package/dist/src/types.js.map +1 -0
  55. package/dist/tsconfig.tsbuildinfo +1 -0
  56. package/examples/experiment.yaml +70 -0
  57. package/package.json +70 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 otolab
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,347 @@
1
+ # @modular-prompt/experiment
2
+
3
+ Experiment framework for comparing and evaluating modular prompt modules.
4
+
5
+ ## Overview
6
+
7
+ This framework provides tools to compare and evaluate different prompt module variations under identical conditions. It integrates with the `@modular-prompt/core` system to test multiple prompt variations and evaluate their output quality.
8
+
9
+ ### Use Cases
10
+
11
+ - **Prompt Engineering**: Validate the effectiveness of new prompt structures
12
+ - **Module Separation**: Verify that modularized prompts produce equivalent outputs
13
+ - **Quality Evaluation**: Assess output stability and consistency through repeated executions
14
+ - **Multi-Model Testing**: Test across different LLM providers (MLX, VertexAI, GoogleGenAI, etc.)
15
+
16
+ ## Features
17
+
18
+ - ✅ **Dynamic Module Loading**: Load prompt modules from external files or inline definitions
19
+ - ✅ **Flexible Evaluators**: Support both code-based and AI-based evaluation
20
+ - ✅ **Statistical Analysis**: Analyze success rates, execution times, and output consistency
21
+ - ✅ **Prompt Diff Detection**: Automatically detect differences between module outputs
22
+ - ✅ **Driver Caching**: Reuse drivers for improved memory efficiency
23
+ - ✅ **Detailed Logging**: Comprehensive logging of all executions
24
+
25
+ ## Installation
26
+
27
+ ```bash
28
+ pnpm add @modular-prompt/experiment
29
+ ```
30
+
31
+ ## Quick Start
32
+
33
+ ### 1. Create Configuration File
34
+
35
+ You can use either YAML or TypeScript format.
36
+
37
+ #### Option A: YAML Configuration (Recommended for static configurations)
38
+
39
+ Create `examples/experiment.yaml`:
40
+
41
+ ```yaml
42
+ models:
43
+ gemini-fast:
44
+ provider: vertexai
45
+ model: gemini-2.0-flash-exp
46
+ capabilities: ["tools", "fast"]
47
+ enabled: true
48
+
49
+ drivers:
50
+ vertexai:
51
+ projectId: your-project-id
52
+ location: us-central1
53
+ # Paths are resolved relative to this config file
54
+ # Can use ~/ for home directory or absolute paths
55
+ credentialsPath: ./credentials.json
56
+
57
+ modules:
58
+ - name: my-module
59
+ path: ./my-module.ts
60
+ description: My custom prompt module
61
+
62
+ testCases:
63
+ - name: Basic Test
64
+ description: Test basic functionality
65
+ input: # Structured context object (passed to module.compile)
66
+ query: user question
67
+ context: additional information
68
+ models: # Optional: specify which models to test (uses all enabled if not specified)
69
+ - gemini-fast
70
+
71
+ evaluators:
72
+ - name: json-validator
73
+ path: ./evaluators/json-validator.ts
74
+ # Or inline prompt evaluator
75
+ - name: quality-check
76
+ prompt:
77
+ objective:
78
+ - Evaluate output quality
79
+ instructions:
80
+ - Check clarity and accuracy
81
+
82
+ evaluation:
83
+ enabled: true
84
+ model: gemini-fast # Reference by model name
85
+ ```
86
+
87
+ #### Option B: TypeScript Configuration (For dynamic configurations)
88
+
89
+ Create `examples/experiment.ts`:
90
+
91
+ ```typescript
92
+ export default {
93
+ models: {
94
+ 'gemini-fast': {
95
+ provider: 'vertexai',
96
+ model: 'gemini-2.0-flash-exp',
97
+ capabilities: ['tools', 'fast'],
98
+ enabled: true,
99
+ },
100
+ },
101
+ drivers: {
102
+ vertexai: {
103
+ projectId: 'your-project-id',
104
+ location: 'us-central1',
105
+ credentialsPath: './credentials.json',
106
+ },
107
+ },
108
+ modules: [
109
+ {
110
+ name: 'my-module',
111
+ path: './my-module.ts',
112
+ description: 'My custom prompt module',
113
+ },
114
+ ],
115
+ testCases: [
116
+ {
117
+ name: 'Basic Test',
118
+ description: 'Test basic functionality',
119
+ input: { // Structured context object
120
+ query: 'user question',
121
+ options: { temperature: 0.7 },
122
+ },
123
+ models: ['gemini-fast'], // Optional
124
+ },
125
+ ],
126
+ evaluators: [
127
+ {
128
+ name: 'json-validator',
129
+ path: './evaluators/json-validator.ts',
130
+ },
131
+ ],
132
+ evaluation: {
133
+ enabled: true,
134
+ model: 'gemini-fast', // Reference by model name
135
+ },
136
+ };
137
+ ```
138
+
139
+ **TypeScript Support**: TypeScript configuration files are automatically transpiled using [jiti](https://github.com/unjs/jiti). You can use TypeScript syntax directly without pre-compilation. Type annotations are stripped automatically, and the file is executed as JavaScript.
140
+
141
+ **Important**: All file paths in the configuration (modules, evaluators, credentials) are resolved relative to the config file location.
142
+
143
+ ### 2. Run Experiment
144
+
145
+ ```bash
146
+ # Run with YAML config
147
+ npx modular-experiment examples/experiment.yaml
148
+
149
+ # Run with TypeScript config
150
+ npx modular-experiment examples/experiment.ts
151
+
152
+ # Run specific module
153
+ npx modular-experiment examples/experiment.yaml --modules my-module
154
+
155
+ # Run with evaluation
156
+ npx modular-experiment examples/experiment.yaml --evaluate
157
+
158
+ # Run multiple times for statistics
159
+ npx modular-experiment examples/experiment.yaml --repeat 10
160
+ ```
161
+
162
+ ## Configuration
163
+
164
+ ### Module Definition
165
+
166
+ Modules can be defined inline or loaded from external files:
167
+
168
+ ```typescript
169
+ // External file
170
+ export const modules: ModuleReference[] = [
171
+ {
172
+ name: 'my-module',
173
+ path: './modules/my-module.ts',
174
+ description: 'Description',
175
+ },
176
+ ];
177
+ ```
178
+
179
+ A module file should export a default object with:
180
+
181
+ ```typescript
182
+ import { compile } from '@modular-prompt/core';
183
+ import { myPromptModule } from './prompts.js';
184
+
185
+ export default {
186
+ name: 'My Module',
187
+ description: 'Module description',
188
+ compile: (context: any) => compile(myPromptModule, context),
189
+ };
190
+ ```
191
+
192
+ ### Evaluator Definition
193
+
194
+ Two types of evaluators are supported:
195
+
196
+ #### 1. Code Evaluator
197
+
198
+ Programmatic validation (e.g., JSON structure validation):
199
+
200
+ ```typescript
201
+ import type { CodeEvaluator, EvaluationContext, EvaluationResult } from '@modular-prompt/experiment';
202
+
203
+ export default {
204
+ name: 'JSON Validator',
205
+ description: 'Validates JSON structure in output',
206
+
207
+ async evaluate(context: EvaluationContext): Promise<EvaluationResult> {
208
+ // Validation logic
209
+ return {
210
+ evaluator: 'json-validator',
211
+ moduleName: context.moduleName,
212
+ score: 10,
213
+ reasoning: 'Valid JSON structure',
214
+ };
215
+ },
216
+ } satisfies CodeEvaluator;
217
+ ```
218
+
219
+ #### 2. Prompt Evaluator
220
+
221
+ AI-based evaluation using LLM:
222
+
223
+ ```typescript
224
+ import type { PromptEvaluator, EvaluationContext } from '@modular-prompt/experiment';
225
+ import type { PromptModule } from '@modular-prompt/core';
226
+
227
+ const evaluationModule: PromptModule<EvaluationContext> = {
228
+ createContext: (): EvaluationContext => ({
229
+ moduleName: '',
230
+ prompt: '',
231
+ runs: [],
232
+ }),
233
+
234
+ objective: [
235
+ '- Assess output quality',
236
+ ],
237
+
238
+ instructions: [
239
+ '- Evaluate clarity and accuracy',
240
+ ],
241
+ };
242
+
243
+ export default {
244
+ name: 'Quality Evaluator',
245
+ description: 'Evaluates output quality',
246
+ module: evaluationModule,
247
+ } satisfies PromptEvaluator;
248
+ ```
249
+
250
+ All prompt evaluators are automatically merged with the base evaluation module.
251
+
252
+ ## Architecture
253
+
254
+ ```
255
+ ┌─────────────────────────────────────────┐
256
+ │ run-comparison.ts (CLI Entry Point) │
257
+ └─────────────────────────────────────────┘
258
+
259
+ ┌─────────┼─────────┐
260
+ ▼ ▼ ▼
261
+ ┌────────┐ ┌────────┐ ┌────────┐
262
+ │ Config │ │ Runner │ │Reporter│
263
+ │ Loader │ │ │ │ │
264
+ └────────┘ └────────┘ └────────┘
265
+ │ │
266
+ ▼ ▼
267
+ ┌────────┐ ┌────────┐
268
+ │Dynamic │ │Driver │
269
+ │Loader │ │Manager │
270
+ └────────┘ └────────┘
271
+ ```
272
+
273
+ ### Components
274
+
275
+ | Component | Responsibility |
276
+ |-----------|----------------|
277
+ | `config/loader.ts` | Load YAML configuration |
278
+ | `config/dynamic-loader.ts` | Dynamic module/evaluator loading |
279
+ | `runner/experiment.ts` | Orchestrate experiment execution |
280
+ | `runner/evaluator.ts` | Execute evaluations |
281
+ | `runner/driver-manager.ts` | Cache and manage AI drivers |
282
+ | `reporter/statistics.ts` | Generate statistical reports |
283
+ | `evaluators/base-module.ts` | Base evaluation prompt module |
284
+
285
+ ## Examples
286
+
287
+ See `examples/experiment.yaml` for a complete configuration template with:
288
+ - Model definitions (MLX, Vertex AI, Google GenAI)
289
+ - Driver configurations with credential paths
290
+ - Evaluation settings
291
+ - Empty sections for modules, test cases, and evaluators (ready for your content)
292
+
293
+ ## API
294
+
295
+ ### Programmatic Usage
296
+
297
+ ```typescript
298
+ import {
299
+ loadExperimentConfig,
300
+ loadModules,
301
+ loadEvaluators,
302
+ ExperimentRunner,
303
+ DriverManager,
304
+ } from '@modular-prompt/experiment';
305
+
306
+ const { serverConfig, aiService } = loadExperimentConfig('config.yaml');
307
+ const modules = await loadModules(moduleRefs, basePath);
308
+ const evaluators = await loadEvaluators(evaluatorRefs, basePath);
309
+
310
+ const driverManager = new DriverManager();
311
+ const runner = new ExperimentRunner(
312
+ aiService,
313
+ driverManager,
314
+ modules,
315
+ testCases,
316
+ models,
317
+ repeatCount,
318
+ evaluators,
319
+ evaluatorModel
320
+ );
321
+
322
+ const results = await runner.run();
323
+ await driverManager.cleanup();
324
+ ```
325
+
326
+ ## CLI Options
327
+
328
+ ```
329
+ Usage: modular-experiment <config> [options]
330
+
331
+ Arguments:
332
+ <config> Config file path (YAML or TypeScript)
333
+
334
+ Options:
335
+ --test-case <name> Test case name filter
336
+ --model <provider> Model provider filter
337
+ --modules <names> Comma-separated module names (default: all)
338
+ --repeat <count> Number of repetitions (default: 1)
339
+ --evaluate Enable evaluation phase
340
+ --evaluators <names> Comma-separated evaluator names (default: all)
341
+ ```
342
+
343
+ **Note**: All paths specified in the config file are resolved relative to the config file's directory.
344
+
345
+ ## License
346
+
347
+ MIT
@@ -0,0 +1,6 @@
1
+ /**
2
+ * CLI argument parsing using commander
3
+ */
4
+ import type { ExtendedExperimentOptions } from '../types.js';
5
+ export declare function parseArgs(): ExtendedExperimentOptions;
6
+ //# sourceMappingURL=args.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"args.d.ts","sourceRoot":"","sources":["../../../src/cli/args.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,aAAa,CAAC;AAE7D,wBAAgB,SAAS,IAAI,yBAAyB,CA2BrD"}
@@ -0,0 +1,31 @@
1
+ /**
2
+ * CLI argument parsing using commander
3
+ */
4
+ import { Command } from 'commander';
5
+ import { resolve } from 'path';
6
+ export function parseArgs() {
7
+ const program = new Command();
8
+ program
9
+ .name('moduler-experiment')
10
+ .description('Compare multiple prompt module variations')
11
+ .argument('<config>', 'Config file path (YAML, TypeScript, or JavaScript)')
12
+ .option('--test-case <name>', 'Test case name filter')
13
+ .option('--model <provider>', 'Model provider filter (mlx, vertexai, googlegenai)')
14
+ .option('--modules <names>', 'Comma-separated module names to test (default: all)')
15
+ .option('--repeat <count>', 'Number of repetitions', '1')
16
+ .option('--evaluate', 'Enable AI-based evaluation of outputs', false)
17
+ .option('--evaluators <names>', 'Comma-separated evaluator names (default: all)')
18
+ .parse();
19
+ const config = program.args[0];
20
+ const options = program.opts();
21
+ return {
22
+ configPath: resolve(process.cwd(), config),
23
+ testCaseFilter: options.testCase,
24
+ modelFilter: options.model,
25
+ moduleFilter: options.modules?.split(',').map((s) => s.trim()),
26
+ repeatCount: parseInt(options.repeat, 10),
27
+ enableEvaluation: options.evaluate,
28
+ evaluatorFilter: options.evaluators?.split(',').map((s) => s.trim()),
29
+ };
30
+ }
31
+ //# sourceMappingURL=args.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"args.js","sourceRoot":"","sources":["../../../src/cli/args.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAG/B,MAAM,UAAU,SAAS;IACvB,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;IAE9B,OAAO;SACJ,IAAI,CAAC,oBAAoB,CAAC;SAC1B,WAAW,CAAC,2CAA2C,CAAC;SACxD,QAAQ,CAAC,UAAU,EAAE,oDAAoD,CAAC;SAC1E,MAAM,CAAC,oBAAoB,EAAE,uBAAuB,CAAC;SACrD,MAAM,CAAC,oBAAoB,EAAE,oDAAoD,CAAC;SAClF,MAAM,CAAC,mBAAmB,EAAE,qDAAqD,CAAC;SAClF,MAAM,CAAC,kBAAkB,EAAE,uBAAuB,EAAE,GAAG,CAAC;SACxD,MAAM,CAAC,YAAY,EAAE,uCAAuC,EAAE,KAAK,CAAC;SACpE,MAAM,CAAC,sBAAsB,EAAE,gDAAgD,CAAC;SAChF,KAAK,EAAE,CAAC;IAEX,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC/B,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO;QACL,UAAU,EAAE,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,CAAC;QAC1C,cAAc,EAAE,OAAO,CAAC,QAAQ;QAChC,WAAW,EAAE,OAAO,CAAC,KAAK;QAC1B,YAAY,EAAE,OAAO,CAAC,OAAO,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACtE,WAAW,EAAE,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;QACzC,gBAAgB,EAAE,OAAO,CAAC,QAAQ;QAClC,eAAe,EAAE,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;KAC7E,CAAC;AACJ,CAAC"}
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Dynamic loader for evaluators and modules
3
+ *
4
+ * Loads evaluators and modules from external files or inline definitions
5
+ */
6
+ import type { EvaluatorReference, CodeEvaluator, PromptEvaluator, ModuleDefinition } from '../types.js';
7
+ /**
8
+ * Loaded evaluator (unified type)
9
+ */
10
+ export interface LoadedEvaluator {
11
+ name: string;
12
+ description: string;
13
+ type: 'code' | 'prompt';
14
+ codeEvaluator?: CodeEvaluator;
15
+ promptEvaluator?: PromptEvaluator;
16
+ }
17
+ /**
18
+ * Load evaluators from references
19
+ *
20
+ * @param refs - Array of evaluator references
21
+ * @param basePath - Base path for resolving relative paths
22
+ * @returns Array of loaded evaluators
23
+ */
24
+ export declare function loadEvaluators(refs: EvaluatorReference[], basePath: string): Promise<LoadedEvaluator[]>;
25
+ /**
26
+ * Module reference in config file
27
+ */
28
+ export type ModuleReference = {
29
+ name: string;
30
+ path: string;
31
+ description?: string;
32
+ };
33
+ /**
34
+ * Load modules from references
35
+ *
36
+ * @param refs - Array of module references
37
+ * @param basePath - Base path for resolving relative paths
38
+ * @returns Array of module definitions
39
+ */
40
+ export declare function loadModules(refs: ModuleReference[], basePath: string): Promise<ModuleDefinition[]>;
41
+ //# sourceMappingURL=dynamic-loader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dynamic-loader.d.ts","sourceRoot":"","sources":["../../../src/config/dynamic-loader.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAKH,OAAO,KAAK,EACV,kBAAkB,EAClB,aAAa,EACb,eAAe,EACf,gBAAgB,EACjB,MAAM,aAAa,CAAC;AAGrB;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,GAAG,QAAQ,CAAC;IACxB,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,eAAe,CAAC,EAAE,eAAe,CAAC;CACnC;AAED;;;;;;GAMG;AACH,wBAAsB,cAAc,CAClC,IAAI,EAAE,kBAAkB,EAAE,EAC1B,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,eAAe,EAAE,CAAC,CA0D5B;AAED;;GAEG;AACH,MAAM,MAAM,eAAe,GACvB;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAEzD;;;;;;GAMG;AACH,wBAAsB,WAAW,CAC/B,IAAI,EAAE,eAAe,EAAE,EACvB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAsB7B"}
@@ -0,0 +1,101 @@
1
+ /**
2
+ * Dynamic loader for evaluators and modules
3
+ *
4
+ * Loads evaluators and modules from external files or inline definitions
5
+ */
6
+ import { merge } from '@modular-prompt/core';
7
+ import { pathToFileURL } from 'url';
8
+ import { resolve } from 'path';
9
+ import { baseEvaluationModule } from '../evaluators/base-module.js';
10
+ /**
11
+ * Load evaluators from references
12
+ *
13
+ * @param refs - Array of evaluator references
14
+ * @param basePath - Base path for resolving relative paths
15
+ * @returns Array of loaded evaluators
16
+ */
17
+ export async function loadEvaluators(refs, basePath) {
18
+ const evaluators = [];
19
+ for (const ref of refs) {
20
+ if ('path' in ref) {
21
+ // External file
22
+ const filePath = resolve(basePath, ref.path);
23
+ const fileUrl = pathToFileURL(filePath).href;
24
+ const imported = await import(fileUrl);
25
+ const evaluator = imported.default;
26
+ if (!evaluator) {
27
+ console.warn(`⚠️ No default export in ${ref.path}`);
28
+ continue;
29
+ }
30
+ // Detect type by checking properties
31
+ if ('evaluate' in evaluator && typeof evaluator.evaluate === 'function') {
32
+ // Code evaluator
33
+ evaluators.push({
34
+ name: ref.name,
35
+ description: ref.description || evaluator.description || '',
36
+ type: 'code',
37
+ codeEvaluator: evaluator,
38
+ });
39
+ }
40
+ else if ('module' in evaluator) {
41
+ // Prompt evaluator - merge with base module
42
+ const mergedModule = merge(baseEvaluationModule, evaluator.module);
43
+ evaluators.push({
44
+ name: ref.name,
45
+ description: ref.description || evaluator.description || '',
46
+ type: 'prompt',
47
+ promptEvaluator: {
48
+ name: evaluator.name,
49
+ description: evaluator.description,
50
+ module: mergedModule,
51
+ },
52
+ });
53
+ }
54
+ else {
55
+ console.warn(`⚠️ Unknown evaluator type in ${ref.path}`);
56
+ }
57
+ }
58
+ else if ('prompt' in ref) {
59
+ // Inline prompt definition - merge with base module
60
+ const mergedModule = merge(baseEvaluationModule, ref.prompt);
61
+ evaluators.push({
62
+ name: ref.name,
63
+ description: ref.description || '',
64
+ type: 'prompt',
65
+ promptEvaluator: {
66
+ name: ref.name,
67
+ description: ref.description || '',
68
+ module: mergedModule,
69
+ },
70
+ });
71
+ }
72
+ }
73
+ return evaluators;
74
+ }
75
+ /**
76
+ * Load modules from references
77
+ *
78
+ * @param refs - Array of module references
79
+ * @param basePath - Base path for resolving relative paths
80
+ * @returns Array of module definitions
81
+ */
82
+ export async function loadModules(refs, basePath) {
83
+ const modules = [];
84
+ for (const ref of refs) {
85
+ const filePath = resolve(basePath, ref.path);
86
+ const fileUrl = pathToFileURL(filePath).href;
87
+ const imported = await import(fileUrl);
88
+ const module = imported.default;
89
+ if (!module) {
90
+ console.warn(`⚠️ No default export in ${ref.path}`);
91
+ continue;
92
+ }
93
+ modules.push({
94
+ name: ref.name,
95
+ description: ref.description || module.description || '',
96
+ compile: module.compile,
97
+ });
98
+ }
99
+ return modules;
100
+ }
101
+ //# sourceMappingURL=dynamic-loader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dynamic-loader.js","sourceRoot":"","sources":["../../../src/config/dynamic-loader.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAO/B,OAAO,EAAE,oBAAoB,EAAE,MAAM,8BAA8B,CAAC;AAapE;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,IAA0B,EAC1B,QAAgB;IAEhB,MAAM,UAAU,GAAsB,EAAE,CAAC;IAEzC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,IAAI,MAAM,IAAI,GAAG,EAAE,CAAC;YAClB,gBAAgB;YAChB,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC;YAC7C,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,CAAC;YACvC,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC;YAEnC,IAAI,CAAC,SAAS,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CAAC,4BAA4B,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;gBACrD,SAAS;YACX,CAAC;YAED,qCAAqC;YACrC,IAAI,UAAU,IAAI,SAAS,IAAI,OAAO,SAAS,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;gBACxE,iBAAiB;gBACjB,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,SAAS,CAAC,WAAW,IAAI,EAAE;oBAC3D,IAAI,EAAE,MAAM;oBACZ,aAAa,EAAE,SAA0B;iBAC1C,CAAC,CAAC;YACL,CAAC;iBAAM,IAAI,QAAQ,IAAI,SAAS,EAAE,CAAC;gBACjC,4CAA4C;gBAC5C,MAAM,YAAY,GAAG,KAAK,CAAC,oBAAoB,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC;gBACnE,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,SAAS,CAAC,WAAW,IAAI,EAAE;oBAC3D,IAAI,EAAE,QAAQ;oBACd,eAAe,EAAE;wBACf,IAAI,EAAE,SAAS,CAAC,IAAI;wBACpB,WAAW,EAAE,SAAS,CAAC,WAAW;wBAClC,MAAM,EAAE,YAAY;qBACrB;iBACF,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,IAAI,CAAC,iCAAiC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;YAC5D,CAAC;QACH,CAAC;aAAM,IAAI,QAAQ,IAAI,GAAG,EAAE,CAAC;YAC3B,oDAAoD;YACpD,MAAM,YAAY,GAAG,KAAK,CAAC,oBAAoB,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;YAC7D,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,EAAE;gBAClC,IAAI,EAAE,QAAQ;gBACd,eAAe,EAAE;oBACf,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,EAAE;oBAClC,MAAM,EAAE,YAAY;iBACrB;aACF,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAQD;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,IAAuB,EACvB,QAAgB;IAEhB,MAAM,OAAO,GAAuB,EAAE,CAAC;IAEvC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;QAC7C,MAAM,OAAO,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC;QAC7C,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,CAAC;QACvC,MAAM,MAAM,GAAG,QAAQ,CAAC,OAAO,CAAC;QAEhC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,OAAO,CAAC,IAAI,CAAC,4BAA4B,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;YACrD,SAAS;QACX,CAAC;QAED,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,MAAM,CAAC,WAAW,IAAI,EAAE;YACxD,OAAO,EAAE,MAAM,CAAC,OAAO;SACxB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Configuration loader
3
+ */
4
+ import { AIService } from '@modular-prompt/driver';
5
+ import type { ModuleReference } from './dynamic-loader.js';
6
+ import type { EvaluatorReference, TestCase } from '../types.js';
7
+ export interface LoadedConfig {
8
+ serverConfig: any;
9
+ modules: ModuleReference[];
10
+ testCases: TestCase[];
11
+ evaluators: EvaluatorReference[];
12
+ aiService: AIService;
13
+ configDir: string;
14
+ }
15
+ export type ExperimentConfig = LoadedConfig;
16
+ /**
17
+ * Load experiment configuration
18
+ *
19
+ * @param configPath - Path to config file (YAML or TypeScript)
20
+ * @returns LoadedConfig with all configuration
21
+ */
22
+ export declare function loadExperimentConfig(configPath: string): Promise<LoadedConfig>;
23
+ //# sourceMappingURL=loader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"loader.d.ts","sourceRoot":"","sources":["../../../src/config/loader.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH,OAAO,EAAE,SAAS,EAA0B,MAAM,wBAAwB,CAAC;AAC3E,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC3D,OAAO,KAAK,EAAE,kBAAkB,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAEhE,MAAM,WAAW,YAAY;IAC3B,YAAY,EAAE,GAAG,CAAC;IAClB,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3B,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,UAAU,EAAE,kBAAkB,EAAE,CAAC;IACjC,SAAS,EAAE,SAAS,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;CACnB;AAGD,MAAM,MAAM,gBAAgB,GAAG,YAAY,CAAC;AAsB5C;;;;;GAKG;AACH,wBAAsB,oBAAoB,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAuGpF"}