@modular-prompt/experiment 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +347 -0
  3. package/dist/src/cli/args.d.ts +6 -0
  4. package/dist/src/cli/args.d.ts.map +1 -0
  5. package/dist/src/cli/args.js +31 -0
  6. package/dist/src/cli/args.js.map +1 -0
  7. package/dist/src/config/dynamic-loader.d.ts +41 -0
  8. package/dist/src/config/dynamic-loader.d.ts.map +1 -0
  9. package/dist/src/config/dynamic-loader.js +101 -0
  10. package/dist/src/config/dynamic-loader.js.map +1 -0
  11. package/dist/src/config/loader.d.ts +23 -0
  12. package/dist/src/config/loader.d.ts.map +1 -0
  13. package/dist/src/config/loader.js +125 -0
  14. package/dist/src/config/loader.js.map +1 -0
  15. package/dist/src/evaluators/base-module.d.ts +10 -0
  16. package/dist/src/evaluators/base-module.d.ts.map +1 -0
  17. package/dist/src/evaluators/base-module.js +103 -0
  18. package/dist/src/evaluators/base-module.js.map +1 -0
  19. package/dist/src/evaluators/functional-correctness.d.ts +14 -0
  20. package/dist/src/evaluators/functional-correctness.d.ts.map +1 -0
  21. package/dist/src/evaluators/functional-correctness.js +95 -0
  22. package/dist/src/evaluators/functional-correctness.js.map +1 -0
  23. package/dist/src/evaluators/json-validator.d.ts +13 -0
  24. package/dist/src/evaluators/json-validator.d.ts.map +1 -0
  25. package/dist/src/evaluators/json-validator.js +51 -0
  26. package/dist/src/evaluators/json-validator.js.map +1 -0
  27. package/dist/src/index.d.ts +14 -0
  28. package/dist/src/index.d.ts.map +1 -0
  29. package/dist/src/index.js +19 -0
  30. package/dist/src/index.js.map +1 -0
  31. package/dist/src/reporter/statistics.d.ts +21 -0
  32. package/dist/src/reporter/statistics.d.ts.map +1 -0
  33. package/dist/src/reporter/statistics.js +68 -0
  34. package/dist/src/reporter/statistics.js.map +1 -0
  35. package/dist/src/run-comparison.d.ts +22 -0
  36. package/dist/src/run-comparison.d.ts.map +1 -0
  37. package/dist/src/run-comparison.js +142 -0
  38. package/dist/src/run-comparison.js.map +1 -0
  39. package/dist/src/runner/driver-manager.d.ts +30 -0
  40. package/dist/src/runner/driver-manager.d.ts.map +1 -0
  41. package/dist/src/runner/driver-manager.js +68 -0
  42. package/dist/src/runner/driver-manager.js.map +1 -0
  43. package/dist/src/runner/evaluator.d.ts +32 -0
  44. package/dist/src/runner/evaluator.d.ts.map +1 -0
  45. package/dist/src/runner/evaluator.js +146 -0
  46. package/dist/src/runner/evaluator.js.map +1 -0
  47. package/dist/src/runner/experiment.d.ts +40 -0
  48. package/dist/src/runner/experiment.d.ts.map +1 -0
  49. package/dist/src/runner/experiment.js +214 -0
  50. package/dist/src/runner/experiment.js.map +1 -0
  51. package/dist/src/types.d.ts +112 -0
  52. package/dist/src/types.d.ts.map +1 -0
  53. package/dist/src/types.js +5 -0
  54. package/dist/src/types.js.map +1 -0
  55. package/dist/tsconfig.tsbuildinfo +1 -0
  56. package/examples/experiment.yaml +70 -0
  57. package/package.json +70 -0
@@ -0,0 +1,125 @@
1
+ /**
2
+ * Configuration loader
3
+ */
4
+ import { readFileSync } from 'fs';
5
+ import { parse as parseYaml } from 'yaml';
6
+ import { resolve, dirname, extname } from 'path';
7
+ import { createJiti } from 'jiti';
8
+ import { AIService } from '@modular-prompt/driver';
9
+ /**
10
+ * Resolve path relative to config file directory
11
+ *
12
+ * @param configDir - Config file directory
13
+ * @param path - Path to resolve (can be relative or absolute)
14
+ * @returns Resolved absolute path
15
+ */
16
+ function resolveConfigPath(configDir, path) {
17
+ // If path starts with ~, resolve it as home directory
18
+ if (path.startsWith('~')) {
19
+ return path.replace('~', process.env.HOME || '~');
20
+ }
21
+ // If path is absolute, return as is
22
+ if (path.startsWith('/')) {
23
+ return path;
24
+ }
25
+ // Otherwise, resolve relative to config directory
26
+ return resolve(configDir, path);
27
+ }
28
+ /**
29
+ * Load experiment configuration
30
+ *
31
+ * @param configPath - Path to config file (YAML or TypeScript)
32
+ * @returns LoadedConfig with all configuration
33
+ */
34
+ export async function loadExperimentConfig(configPath) {
35
+ // Get config directory and file extension
36
+ const configDir = dirname(configPath);
37
+ const ext = extname(configPath);
38
+ // Load config based on file type
39
+ let config;
40
+ if (ext === '.yaml' || ext === '.yml') {
41
+ // YAML format
42
+ const content = readFileSync(configPath, 'utf-8');
43
+ config = parseYaml(content);
44
+ }
45
+ else if (ext === '.ts' || ext === '.js' || ext === '.mjs' || ext === '.cjs') {
46
+ // TypeScript/JavaScript format (auto-transpile with jiti)
47
+ const jiti = createJiti(import.meta.url, {
48
+ interopDefault: true, // Automatically get default export
49
+ cache: true, // Enable caching for better performance
50
+ requireCache: false, // Don't use require cache
51
+ });
52
+ config = await jiti.import(configPath);
53
+ if (!config) {
54
+ throw new Error(`❌ No default export in ${configPath}`);
55
+ }
56
+ }
57
+ else {
58
+ throw new Error(`❌ Unsupported config file format: ${ext}. Use .yaml, .yml, .ts, .js, .mjs, or .cjs`);
59
+ }
60
+ // Extract components
61
+ const modules = config.modules || [];
62
+ const testCases = config.testCases || [];
63
+ const evaluators = config.evaluators || [];
64
+ // Server config (models, drivers, evaluation, etc.)
65
+ const serverConfig = {
66
+ models: config.models,
67
+ drivers: config.drivers,
68
+ evaluation: config.evaluation,
69
+ credentials: config.credentials,
70
+ selection: config.selection,
71
+ server: config.server,
72
+ logging: config.logging,
73
+ };
74
+ // Resolve paths in driver configurations relative to config file
75
+ if (serverConfig.drivers) {
76
+ for (const driverName in serverConfig.drivers) {
77
+ const driverConfig = serverConfig.drivers[driverName];
78
+ // Resolve credentialsPath for vertexai driver
79
+ if (driverConfig.credentialsPath) {
80
+ driverConfig.credentialsPath = resolveConfigPath(configDir, driverConfig.credentialsPath);
81
+ }
82
+ }
83
+ }
84
+ // Setup driver environment variables if specified
85
+ if (serverConfig.credentials?.googleApplicationCredentials) {
86
+ const resolvedPath = resolveConfigPath(configDir, serverConfig.credentials.googleApplicationCredentials);
87
+ process.env.GOOGLE_APPLICATION_CREDENTIALS = resolvedPath;
88
+ console.log(`Setting GOOGLE_APPLICATION_CREDENTIALS=${resolvedPath}`);
89
+ }
90
+ // Validation
91
+ if (!serverConfig.models || Object.keys(serverConfig.models).length === 0) {
92
+ throw new Error('❌ No models configured in config file');
93
+ }
94
+ // Get model names from object keys
95
+ const modelNames = new Set(Object.keys(serverConfig.models));
96
+ // Validate testCase model references
97
+ for (const testCase of testCases) {
98
+ if (testCase.models) {
99
+ for (const modelName of testCase.models) {
100
+ if (!modelNames.has(modelName)) {
101
+ throw new Error(`❌ TestCase '${testCase.name}' references unknown model '${modelName}'`);
102
+ }
103
+ }
104
+ }
105
+ }
106
+ // Initialize AIService
107
+ const aiServiceConfig = {
108
+ models: serverConfig.models,
109
+ drivers: serverConfig.drivers || {},
110
+ defaultOptions: {
111
+ temperature: 0.7,
112
+ maxTokens: 2048,
113
+ },
114
+ };
115
+ const aiService = new AIService(aiServiceConfig);
116
+ return {
117
+ serverConfig,
118
+ modules,
119
+ testCases,
120
+ evaluators,
121
+ aiService,
122
+ configDir
123
+ };
124
+ }
125
+ //# sourceMappingURL=loader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"loader.js","sourceRoot":"","sources":["../../../src/config/loader.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAC1C,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AACjD,OAAO,EAAE,UAAU,EAAE,MAAM,MAAM,CAAC;AAClC,OAAO,EAAE,SAAS,EAA0B,MAAM,wBAAwB,CAAC;AAgB3E;;;;;;GAMG;AACH,SAAS,iBAAiB,CAAC,SAAiB,EAAE,IAAY;IACxD,sDAAsD;IACtD,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,CAAC,CAAC;IACpD,CAAC;IACD,oCAAoC;IACpC,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,kDAAkD;IAClD,OAAO,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;AAClC,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,UAAkB;IAC3D,0CAA0C;IAC1C,MAAM,SAAS,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;IACtC,MAAM,GAAG,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;IAEhC,iCAAiC;IACjC,IAAI,MAAW,CAAC;IAEhB,IAAI,GAAG,KAAK,OAAO,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;QACtC,cAAc;QACd,MAAM,OAAO,GAAG,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;QAClD,MAAM,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;IAC9B,CAAC;SAAM,IAAI,GAAG,KAAK,KAAK,IAAI,GAAG,KAAK,KAAK,IAAI,GAAG,KAAK,MAAM,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;QAC9E,0DAA0D;QAC1D,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,EAAE;YACvC,cAAc,EAAE,IAAI,EAAG,mCAAmC;YAC1D,KAAK,EAAE,IAAI,EAAY,wCAAwC;YAC/D,YAAY,EAAE,KAAK,EAAI,0BAA0B;SAClD,CAAC,CAAC;QAEH,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAEvC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,0BAA0B,UAAU,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,KAAK,CAAC,qCAAqC,GAAG,4CAA4C,CAAC,CAAC;IACxG,CAAC;IAED,qBAAqB;IACrB,MAAM,OAAO,GAAsB,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC;IACxD,MAAM,SAAS,GAAe,MAAM,CAAC,SAAS,IAAI,EAAE,CAAC;IACrD,MAAM,UAAU,GAAyB,MAAM,CAAC,UAAU,IAAI,EAAE,CAAC;IAEjE,oDAAoD;IACpD,MAAM,YAAY,GAAG;QACnB,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,OAAO,EAAE,MAAM,CAAC,OAAO;QACvB,UAAU,EAAE,MAAM,CAAC,UAAU;QAC7B,WAAW,EAAE,MAAM,CAAC,WAAW;QAC/B,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,OAAO,EAAE,MAAM,CAAC,OAAO;KACxB,CAAC;IAEF,iEAAiE;IACjE,IAAI,YAAY,CAAC,OAAO,EAAE,CAAC;QACzB,KAAK,MAAM,UAAU,IAAI,YAAY,CAAC,OAAO,EAAE,CAAC;YAC9C,MAAM,YAAY,GAAG,YAAY,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;YAEtD,8CAA8C;YAC9C,IAAI,YAAY,CAAC,eAAe,EAAE,CAAC;gBACjC,YAAY,CAAC,eAAe,GAAG,iBAAiB,CAAC,SAAS,EAAE,YAAY,CAAC,eAAe,CAAC,CAAC;YAC5F,CAAC;QACH,CAAC;IACH,CAAC;IAED,kDAAkD;IAClD,IAAI,YAAY,CAAC,WAAW,EAAE,4BAA4B,EAAE,CAAC;QAC3D,MAAM,YAAY,GAAG,iBAAiB,CAAC,SAAS,EAAE,YAAY,CAAC,WAAW,CAAC,4BAA4B,CAAC,CAAC;QACzG,OAAO,CAAC,GAAG,CAAC,8BAA8B,GAAG,YAAY,CAAC;QAC1D,OAAO,CAAC,GAAG,CAAC,0CAA0C,YAAY,EAAE,CAAC,CAAC;IACxE,CAAC;IAED,aAAa;IACb,IAAI,CAAC,YAAY,CAAC,MAAM,IAAI,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1E,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;IAC3D,CAAC;IAED,mCAAmC;IACnC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAS,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC;IAErE,qCAAqC;IACrC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;YACpB,KAAK,MAAM,SAAS,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;gBACxC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;oBAC/B,MAAM,IAAI,KAAK,CAAC,eAAe,QAAQ,CAAC,IAAI,+BAA+B,SAAS,GAAG,CAAC,CAAC;gBAC3F,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,uBAAuB;IACvB,MAAM,eAAe,GAAsB;QACzC,MAAM,EAAE,YAAY,CAAC,MAAM;QAC3B,OAAO,EAAE,YAAY,CAAC,OAAO,IAAI,EAAE;QACnC,cAAc,EAAE;YACd,WAAW,EAAE,GAAG;YAChB,SAAS,EAAE,IAAI;SAChB;KACF,CAAC;IAEF,MAAM,SAAS,GAAG,IAAI,SAAS,CAAC,eAAe,CAAC,CAAC;IAEjD,OAAO;QACL,YAAY;QACZ,OAAO;QACP,SAAS;QACT,UAAU;QACV,SAAS;QACT,SAAS;KACV,CAAC;AACJ,CAAC"}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Base evaluation prompt module
3
+ *
4
+ * This module provides the foundation for all evaluation prompts.
5
+ * It defines how test data is presented to the evaluator.
6
+ */
7
+ import type { PromptModule } from '@modular-prompt/core';
8
+ import type { EvaluationContext } from '../types.js';
9
+ export declare const baseEvaluationModule: PromptModule<EvaluationContext>;
10
+ //# sourceMappingURL=base-module.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base-module.d.ts","sourceRoot":"","sources":["../../../src/evaluators/base-module.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,YAAY,EAA4B,MAAM,sBAAsB,CAAC;AACnF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAErD,eAAO,MAAM,oBAAoB,EAAE,YAAY,CAAC,iBAAiB,CAuGhE,CAAC"}
@@ -0,0 +1,103 @@
1
+ /**
2
+ * Base evaluation prompt module
3
+ *
4
+ * This module provides the foundation for all evaluation prompts.
5
+ * It defines how test data is presented to the evaluator.
6
+ */
7
+ export const baseEvaluationModule = {
8
+ createContext: () => ({
9
+ moduleName: '',
10
+ prompt: '',
11
+ runs: [],
12
+ }),
13
+ objective: [
14
+ '- Evaluate the output of a prompt module',
15
+ '- Provide detailed assessment with scores and reasoning',
16
+ ],
17
+ terms: [
18
+ '- Module: A prompt variation being tested',
19
+ '- Prompt: The compiled prompt used to generate the output',
20
+ '- Query Result: The output generated by the LLM',
21
+ '- Run: A single execution of the prompt',
22
+ ],
23
+ instructions: [
24
+ {
25
+ type: 'subsection',
26
+ title: 'Output Format',
27
+ items: [
28
+ 'Return evaluation in JSON format with the following structure:',
29
+ '- score: Overall score (0-10)',
30
+ '- reasoning: Clear explanation of the score',
31
+ '- details: Object with additional evaluation metrics',
32
+ ],
33
+ },
34
+ ],
35
+ materials: [
36
+ {
37
+ type: 'subsection',
38
+ title: 'Module Name',
39
+ items: [
40
+ (ctx) => ctx.moduleName,
41
+ ],
42
+ },
43
+ {
44
+ type: 'subsection',
45
+ title: 'Prompt Used',
46
+ items: [
47
+ (ctx) => ({
48
+ type: 'text',
49
+ content: ctx.prompt,
50
+ }),
51
+ ],
52
+ },
53
+ ],
54
+ inputs: [
55
+ (ctx) => ctx.runs.flatMap((run, idx) => {
56
+ const result = run.queryResult;
57
+ const elements = [];
58
+ // Run number
59
+ elements.push({
60
+ type: 'text',
61
+ content: `Run ${idx + 1}`,
62
+ });
63
+ // Output (prefer structuredOutput over content)
64
+ if (result.structuredOutput) {
65
+ elements.push({
66
+ type: 'json',
67
+ content: result.structuredOutput,
68
+ });
69
+ }
70
+ else {
71
+ elements.push({
72
+ type: 'text',
73
+ content: result.content,
74
+ });
75
+ }
76
+ return elements;
77
+ }),
78
+ ],
79
+ schema: [
80
+ {
81
+ type: 'json',
82
+ content: {
83
+ type: 'object',
84
+ properties: {
85
+ score: {
86
+ type: 'number',
87
+ description: 'Overall score (0-10)',
88
+ },
89
+ reasoning: {
90
+ type: 'string',
91
+ description: 'Explanation of the score',
92
+ },
93
+ details: {
94
+ type: 'object',
95
+ description: 'Additional evaluation metrics',
96
+ },
97
+ },
98
+ required: ['score', 'reasoning'],
99
+ },
100
+ },
101
+ ],
102
+ };
103
+ //# sourceMappingURL=base-module.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base-module.js","sourceRoot":"","sources":["../../../src/evaluators/base-module.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAKH,MAAM,CAAC,MAAM,oBAAoB,GAAoC;IACnE,aAAa,EAAE,GAAsB,EAAE,CAAC,CAAC;QACvC,UAAU,EAAE,EAAE;QACd,MAAM,EAAE,EAAE;QACV,IAAI,EAAE,EAAE;KACT,CAAC;IAEF,SAAS,EAAE;QACT,0CAA0C;QAC1C,yDAAyD;KAC1D;IAED,KAAK,EAAE;QACL,2CAA2C;QAC3C,2DAA2D;QAC3D,iDAAiD;QACjD,yCAAyC;KAC1C;IAED,YAAY,EAAE;QACZ;YACE,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,eAAe;YACtB,KAAK,EAAE;gBACL,gEAAgE;gBAChE,+BAA+B;gBAC/B,6CAA6C;gBAC7C,sDAAsD;aACvD;SACF;KACF;IAED,SAAS,EAAE;QACT;YACE,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,aAAa;YACpB,KAAK,EAAE;gBACL,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,UAAU;aACxB;SACF;QACD;YACE,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,aAAa;YACpB,KAAK,EAAE;gBACL,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;oBACR,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE,GAAG,CAAC,MAAM;iBACJ,CAAA;aAClB;SACF;KACF;IAED,MAAM,EAAE;QACN,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;YACrC,MAAM,MAAM,GAAG,GAAG,CAAC,WAAW,CAAC;YAC/B,MAAM,QAAQ,GAAqC,EAAE,CAAC;YAEtD,aAAa;YACb,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE,OAAO,GAAG,GAAG,CAAC,EAAE;aAC1B,CAAC,CAAC;YAEH,gDAAgD;YAChD,IAAI,MAAM,CAAC,gBAAgB,EAAE,CAAC;gBAC5B,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE,MAAM,CAAC,gBAAgB;iBACjC,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE,MAAM,CAAC,OAAO;iBACxB,CAAC,CAAC;YACL,CAAC;YAED,OAAO,QAAQ,CAAC;QAClB,CAAC,CAAC;KACH;IAED,MAAM,EAAE;QACN;YACE,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE;gBACP,IAAI,EAAE,QAAQ;gBACd,UAAU,EAAE;oBACV,KAAK,EAAE;wBACL,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,sBAAsB;qBACpC;oBACD,SAAS,EAAE;wBACT,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,0BAA0B;qBACxC;oBACD,OAAO,EAAE;wBACP,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,+BAA+B;qBAC7C;iBACF;gBACD,QAAQ,EAAE,CAAC,OAAO,EAAE,WAAW,CAAC;aACjC;SACF;KACF;CACF,CAAC"}
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Functional Correctness Evaluator
3
+ *
4
+ * Evaluates whether the output meets the functional requirements
5
+ */
6
+ import type { PromptModule } from '@modular-prompt/core';
7
+ import type { EvaluationContext } from '../types.js';
8
+ declare const _default: {
9
+ name: string;
10
+ description: string;
11
+ module: PromptModule<EvaluationContext>;
12
+ };
13
+ export default _default;
14
+ //# sourceMappingURL=functional-correctness.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"functional-correctness.d.ts","sourceRoot":"","sources":["../../../src/evaluators/functional-correctness.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,KAAK,EAAmB,iBAAiB,EAAE,MAAM,aAAa,CAAC;;;;;;AA0FtE,wBAI4B"}
@@ -0,0 +1,95 @@
1
+ /**
2
+ * Functional Correctness Evaluator
3
+ *
4
+ * Evaluates whether the output meets the functional requirements
5
+ */
6
+ const functionalCorrectnessModule = {
7
+ createContext: () => ({
8
+ moduleName: '',
9
+ prompt: '',
10
+ runs: [],
11
+ }),
12
+ objective: [
13
+ '- Assess whether the output correctly fulfills the given requirements',
14
+ ],
15
+ instructions: [
16
+ '- Evaluate based on the following criteria:',
17
+ {
18
+ type: 'subsection',
19
+ title: 'Evaluation Criteria',
20
+ items: [
21
+ '1. **Requirement Fulfillment**: Does it satisfy the intent described in the prompt?',
22
+ '2. **Parameter Correctness**: Are all required parameters present and correct?',
23
+ '3. **Parameter Completeness**: Are optional parameters appropriately used or omitted?',
24
+ '4. **Logical Consistency**: Is the output logically consistent with the facts?',
25
+ ],
26
+ },
27
+ {
28
+ type: 'subsection',
29
+ title: 'Scoring',
30
+ items: [
31
+ '- Assign scores (0-10) for each criterion',
32
+ '- Calculate overall score based on all criteria',
33
+ '- Provide clear reasoning for each score',
34
+ ],
35
+ },
36
+ ],
37
+ schema: [
38
+ {
39
+ type: 'json',
40
+ content: {
41
+ type: 'object',
42
+ properties: {
43
+ score: {
44
+ type: 'number',
45
+ description: 'Overall score (0-10)',
46
+ },
47
+ reasoning: {
48
+ type: 'string',
49
+ description: 'Summary of evaluation',
50
+ },
51
+ details: {
52
+ type: 'object',
53
+ properties: {
54
+ requirementFulfillment: {
55
+ type: 'object',
56
+ properties: {
57
+ score: { type: 'number' },
58
+ reasoning: { type: 'string' },
59
+ },
60
+ },
61
+ parameterCorrectness: {
62
+ type: 'object',
63
+ properties: {
64
+ score: { type: 'number' },
65
+ reasoning: { type: 'string' },
66
+ },
67
+ },
68
+ parameterCompleteness: {
69
+ type: 'object',
70
+ properties: {
71
+ score: { type: 'number' },
72
+ reasoning: { type: 'string' },
73
+ },
74
+ },
75
+ logicalConsistency: {
76
+ type: 'object',
77
+ properties: {
78
+ score: { type: 'number' },
79
+ reasoning: { type: 'string' },
80
+ },
81
+ },
82
+ },
83
+ },
84
+ },
85
+ required: ['score', 'reasoning', 'details'],
86
+ },
87
+ },
88
+ ],
89
+ };
90
+ export default {
91
+ name: 'Functional Correctness',
92
+ description: 'Evaluates whether the output meets the functional requirements',
93
+ module: functionalCorrectnessModule,
94
+ };
95
+ //# sourceMappingURL=functional-correctness.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"functional-correctness.js","sourceRoot":"","sources":["../../../src/evaluators/functional-correctness.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAKH,MAAM,2BAA2B,GAAoC;IACnE,aAAa,EAAE,GAAsB,EAAE,CAAC,CAAC;QACvC,UAAU,EAAE,EAAE;QACd,MAAM,EAAE,EAAE;QACV,IAAI,EAAE,EAAE;KACT,CAAC;IAEF,SAAS,EAAE;QACT,uEAAuE;KACxE;IAED,YAAY,EAAE;QACZ,6CAA6C;QAC7C;YACE,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,qBAAqB;YAC5B,KAAK,EAAE;gBACL,qFAAqF;gBACrF,gFAAgF;gBAChF,uFAAuF;gBACvF,gFAAgF;aACjF;SACF;QACD;YACE,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,SAAS;YAChB,KAAK,EAAE;gBACL,2CAA2C;gBAC3C,iDAAiD;gBACjD,0CAA0C;aAC3C;SACF;KACF;IAED,MAAM,EAAE;QACN;YACE,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE;gBACP,IAAI,EAAE,QAAQ;gBACd,UAAU,EAAE;oBACV,KAAK,EAAE;wBACL,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,sBAAsB;qBACpC;oBACD,SAAS,EAAE;wBACT,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,uBAAuB;qBACrC;oBACD,OAAO,EAAE;wBACP,IAAI,EAAE,QAAQ;wBACd,UAAU,EAAE;4BACV,sBAAsB,EAAE;gCACtB,IAAI,EAAE,QAAQ;gCACd,UAAU,EAAE;oCACV,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;oCACzB,SAAS,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;iCAC9B;6BACF;4BACD,oBAAoB,EAAE;gCACpB,IAAI,EAAE,QAAQ;gCACd,UAAU,EAAE;oCACV,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;oCACzB,SAAS,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;iCAC9B;6BACF;4BACD,qBAAqB,EAAE;gCACrB,IAAI,EAAE,QAAQ;gCACd,UAAU,EAAE;oCACV,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;oCACzB,SAAS,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;iCAC9B;6BACF;4BACD,kBAAkB,EAAE;gCAClB,IAAI,EAAE,QAAQ;gCACd,UAAU,EAAE;oCACV,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;oCACzB,SAAS,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;iCAC9B;6BACF;yBACF;qBACF;iBACF;gBACD,QAAQ,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,SAAS,CAAC;aAC5C;SACF;KACF;CACF,CAAC;AAEF,eAAe;IACb,IAAI,EAAE,wBAAwB;IAC9B,WAAW,EAAE,gEAAgE;IAC7E,MAAM,EAAE,2BAA2B;CACV,CAAC"}
@@ -0,0 +1,13 @@
1
+ /**
2
+ * JSON Validator Evaluator
3
+ *
4
+ * Validates JSON structure in structured output
5
+ */
6
+ import type { EvaluationContext, EvaluationResult } from '../types.js';
7
+ declare const _default: {
8
+ name: string;
9
+ description: string;
10
+ evaluate(context: EvaluationContext): Promise<EvaluationResult>;
11
+ };
12
+ export default _default;
13
+ //# sourceMappingURL=json-validator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json-validator.d.ts","sourceRoot":"","sources":["../../../src/evaluators/json-validator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAiB,iBAAiB,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;;;;sBAM5D,iBAAiB,GAAG,OAAO,CAAC,gBAAgB,CAAC;;AAJvE,wBAiD0B"}
@@ -0,0 +1,51 @@
1
+ /**
2
+ * JSON Validator Evaluator
3
+ *
4
+ * Validates JSON structure in structured output
5
+ */
6
+ export default {
7
+ name: 'JSON Validator',
8
+ description: 'Validates JSON structure in output',
9
+ async evaluate(context) {
10
+ const errors = [];
11
+ let validCount = 0;
12
+ const runDetails = [];
13
+ for (let i = 0; i < context.runs.length; i++) {
14
+ const run = context.runs[i];
15
+ const { structuredOutput } = run.queryResult;
16
+ if (!structuredOutput) {
17
+ errors.push(`Run ${i + 1}: No structured output`);
18
+ runDetails.push({ run: i + 1, valid: false, error: 'No structured output' });
19
+ continue;
20
+ }
21
+ // Basic JSON validation
22
+ if (typeof structuredOutput === 'object' && structuredOutput !== null) {
23
+ validCount++;
24
+ runDetails.push({ run: i + 1, valid: true });
25
+ }
26
+ else {
27
+ const error = 'Invalid JSON structure';
28
+ errors.push(`Run ${i + 1}: ${error}`);
29
+ runDetails.push({ run: i + 1, valid: false, error });
30
+ }
31
+ }
32
+ const score = context.runs.length > 0
33
+ ? (validCount / context.runs.length) * 10
34
+ : 0;
35
+ return {
36
+ evaluator: 'json-validator',
37
+ moduleName: context.moduleName,
38
+ score,
39
+ reasoning: errors.length > 0
40
+ ? `${validCount}/${context.runs.length} valid outputs. Issues: ${errors.join('; ')}`
41
+ : `All ${validCount} outputs have valid JSON structure`,
42
+ details: {
43
+ validCount,
44
+ totalCount: context.runs.length,
45
+ errors,
46
+ runs: runDetails,
47
+ },
48
+ };
49
+ },
50
+ };
51
+ //# sourceMappingURL=json-validator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json-validator.js","sourceRoot":"","sources":["../../../src/evaluators/json-validator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAIH,eAAe;IACb,IAAI,EAAE,gBAAgB;IACtB,WAAW,EAAE,oCAAoC;IAEjD,KAAK,CAAC,QAAQ,CAAC,OAA0B;QACvC,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,MAAM,UAAU,GAA2D,EAAE,CAAC;QAE9E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7C,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC5B,MAAM,EAAE,gBAAgB,EAAE,GAAG,GAAG,CAAC,WAAW,CAAC;YAE7C,IAAI,CAAC,gBAAgB,EAAE,CAAC;gBACtB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;gBAClD,UAAU,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,sBAAsB,EAAE,CAAC,CAAC;gBAC7E,SAAS;YACX,CAAC;YAED,wBAAwB;YACxB,IAAI,OAAO,gBAAgB,KAAK,QAAQ,IAAI,gBAAgB,KAAK,IAAI,EAAE,CAAC;gBACtE,UAAU,EAAE,CAAC;gBACb,UAAU,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;YAC/C,CAAC;iBAAM,CAAC;gBACN,MAAM,KAAK,GAAG,wBAAwB,CAAC;gBACvC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,EAAE,CAAC,CAAC;gBACtC,UAAU,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;YACvD,CAAC;QACH,CAAC;QAED,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC;YACnC,CAAC,CAAC,CAAC,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE;YACzC,CAAC,CAAC,CAAC,CAAC;QAEN,OAAO;YACL,SAAS,EAAE,gBAAgB;YAC3B,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,KAAK;YACL,SAAS,EAAE,MAAM,CAAC,MAAM,GAAG,CAAC;gBAC1B,CAAC,CAAC,GAAG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,2BAA2B,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;gBACpF,CAAC,CAAC,OAAO,UAAU,oCAAoC;YACzD,OAAO,EAAE;gBACP,UAAU;gBACV,UAAU,EAAE,OAAO,CAAC,IAAI,CAAC,MAAM;gBAC/B,MAAM;gBACN,IAAI,EAAE,UAAU;aACjB;SACF,CAAC;IACJ,CAAC;CACsB,CAAC"}
@@ -0,0 +1,14 @@
1
+ /**
2
+ * @modular-prompt/experiment
3
+ *
4
+ * Experiment framework for comparing and evaluating prompt modules
5
+ */
6
+ export * from './types.js';
7
+ export { loadExperimentConfig } from './config/loader.js';
8
+ export { loadModules, loadEvaluators } from './config/dynamic-loader.js';
9
+ export { baseEvaluationModule } from './evaluators/base-module.js';
10
+ export { DriverManager } from './runner/driver-manager.js';
11
+ export { ExperimentRunner } from './runner/experiment.js';
12
+ export { EvaluatorRunner } from './runner/evaluator.js';
13
+ export { StatisticsReporter } from './reporter/statistics.js';
14
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,cAAc,YAAY,CAAC;AAG3B,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAGzE,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AAGnE,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAGxD,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC"}
@@ -0,0 +1,19 @@
1
+ /**
2
+ * @modular-prompt/experiment
3
+ *
4
+ * Experiment framework for comparing and evaluating prompt modules
5
+ */
6
+ // Types
7
+ export * from './types.js';
8
+ // Configuration loaders
9
+ export { loadExperimentConfig } from './config/loader.js';
10
+ export { loadModules, loadEvaluators } from './config/dynamic-loader.js';
11
+ // Evaluators
12
+ export { baseEvaluationModule } from './evaluators/base-module.js';
13
+ // Runners
14
+ export { DriverManager } from './runner/driver-manager.js';
15
+ export { ExperimentRunner } from './runner/experiment.js';
16
+ export { EvaluatorRunner } from './runner/evaluator.js';
17
+ // Reporters
18
+ export { StatisticsReporter } from './reporter/statistics.js';
19
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,QAAQ;AACR,cAAc,YAAY,CAAC;AAE3B,wBAAwB;AACxB,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAEzE,aAAa;AACb,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AAEnE,UAAU;AACV,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAExD,YAAY;AACZ,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC"}
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Statistics reporter
3
+ */
4
+ import type { TestResult } from '../types.js';
5
+ export declare class StatisticsReporter {
6
+ private results;
7
+ constructor(results: TestResult[]);
8
+ /**
9
+ * Generate and display statistics report
10
+ */
11
+ report(): void;
12
+ /**
13
+ * Report timing statistics
14
+ */
15
+ private reportTiming;
16
+ /**
17
+ * Report output consistency
18
+ */
19
+ private reportConsistency;
20
+ }
21
+ //# sourceMappingURL=statistics.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"statistics.d.ts","sourceRoot":"","sources":["../../../src/reporter/statistics.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAa,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzD,qBAAa,kBAAkB;IACjB,OAAO,CAAC,OAAO;gBAAP,OAAO,EAAE,UAAU,EAAE;IAEzC;;OAEG;IACH,MAAM,IAAI,IAAI;IA2Bd;;OAEG;IACH,OAAO,CAAC,YAAY;IASpB;;OAEG;IACH,OAAO,CAAC,iBAAiB;CAwB1B"}
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Statistics reporter
3
+ */
4
+ export class StatisticsReporter {
5
+ results;
6
+ constructor(results) {
7
+ this.results = results;
8
+ }
9
+ /**
10
+ * Generate and display statistics report
11
+ */
12
+ report() {
13
+ console.log();
14
+ console.log('='.repeat(80));
15
+ console.log('📊 Statistics Summary');
16
+ console.log('='.repeat(80));
17
+ console.log();
18
+ for (const result of this.results) {
19
+ console.log(`${result.testCase} - ${result.model} - [${result.module.toUpperCase()}]`);
20
+ console.log('─'.repeat(80));
21
+ const successRuns = result.runs.filter(r => r.success);
22
+ const successRate = (successRuns.length / result.runs.length) * 100;
23
+ console.log(`Success rate: ${successRuns.length}/${result.runs.length} (${successRate.toFixed(1)}%)`);
24
+ if (successRuns.length > 0) {
25
+ this.reportTiming(successRuns);
26
+ this.reportConsistency(successRuns);
27
+ }
28
+ console.log();
29
+ }
30
+ console.log('='.repeat(80));
31
+ }
32
+ /**
33
+ * Report timing statistics
34
+ */
35
+ reportTiming(runs) {
36
+ const times = runs.map(r => r.elapsed);
37
+ const avg = times.reduce((a, b) => a + b, 0) / times.length;
38
+ const min = Math.min(...times);
39
+ const max = Math.max(...times);
40
+ console.log(`Execution time: avg=${avg.toFixed(0)}ms, min=${min}ms, max=${max}ms`);
41
+ }
42
+ /**
43
+ * Report output consistency
44
+ */
45
+ reportConsistency(runs) {
46
+ // Extract JSON from output
47
+ const jsonOutputs = runs.map(r => {
48
+ const match = r.content.match(/```json\s*\n([\s\S]*?)\n```/);
49
+ return match ? match[1].trim() : null;
50
+ }).filter(j => j !== null);
51
+ if (jsonOutputs.length === 0) {
52
+ return;
53
+ }
54
+ const uniqueOutputs = new Set(jsonOutputs);
55
+ console.log(`Output consistency: ${uniqueOutputs.size} unique output(s) from ${jsonOutputs.length} run(s)`);
56
+ if (uniqueOutputs.size === 1) {
57
+ console.log('✅ All outputs are identical');
58
+ }
59
+ else {
60
+ console.log('⚠️ Outputs vary:');
61
+ Array.from(uniqueOutputs).forEach((output, idx) => {
62
+ const count = jsonOutputs.filter(j => j === output).length;
63
+ console.log(` Variant ${idx + 1} (${count}x): ${output}`);
64
+ });
65
+ }
66
+ }
67
+ }
68
+ //# sourceMappingURL=statistics.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"statistics.js","sourceRoot":"","sources":["../../../src/reporter/statistics.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,MAAM,OAAO,kBAAkB;IACT;IAApB,YAAoB,OAAqB;QAArB,YAAO,GAAP,OAAO,CAAc;IAAG,CAAC;IAE7C;;OAEG;IACH,MAAM;QACJ,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACrC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,EAAE,CAAC;QAEd,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAClC,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,QAAQ,MAAM,MAAM,CAAC,KAAK,OAAO,MAAM,CAAC,MAAM,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;YACvF,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;YAE5B,MAAM,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACvD,MAAM,WAAW,GAAG,CAAC,WAAW,CAAC,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC;YAEpE,OAAO,CAAC,GAAG,CAAC,iBAAiB,WAAW,CAAC,MAAM,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAEtG,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC;gBAC/B,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,CAAC;YACtC,CAAC;YAED,OAAO,CAAC,GAAG,EAAE,CAAC;QAChB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAC9B,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,IAAiB;QACpC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QACvC,MAAM,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;QAC5D,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC;QAC/B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC;QAE/B,OAAO,CAAC,GAAG,CAAC,uBAAuB,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,GAAG,WAAW,GAAG,IAAI,CAAC,CAAC;IACrF,CAAC;IAED;;OAEG;IACK,iBAAiB,CAAC,IAAiB;QACzC,2BAA2B;QAC3B,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;YAC/B,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;YAC7D,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QACxC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;QAE3B,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC7B,OAAO;QACT,CAAC;QAED,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAC;QAC3C,OAAO,CAAC,GAAG,CAAC,uBAAuB,aAAa,CAAC,IAAI,0BAA0B,WAAW,CAAC,MAAM,SAAS,CAAC,CAAC;QAE5G,IAAI,aAAa,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC7B,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC;QAC7C,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;YACjC,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE;gBAChD,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;gBAC3D,OAAO,CAAC,GAAG,CAAC,cAAc,GAAG,GAAG,CAAC,KAAK,KAAK,OAAO,MAAM,EAAE,CAAC,CAAC;YAC9D,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Module Comparison Experiment
4
+ *
5
+ * Compares the performance and output quality of multiple prompt modules.
6
+ *
7
+ * Usage:
8
+ * moduler-experiment <config> [options]
9
+ *
10
+ * Arguments:
11
+ * <config> Config file path (YAML, TypeScript, or JavaScript)
12
+ *
13
+ * Options:
14
+ * --test-case <name> Test case name filter
15
+ * --model <provider> Model provider filter (mlx, vertexai, googlegenai)
16
+ * --modules <names> Comma-separated module names (default: all)
17
+ * --repeat <count> Number of repetitions (default: 1)
18
+ * --evaluate Enable evaluation phase
19
+ * --evaluators <names> Comma-separated evaluator names (default: all)
20
+ */
21
+ export {};
22
+ //# sourceMappingURL=run-comparison.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"run-comparison.d.ts","sourceRoot":"","sources":["../../src/run-comparison.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;;;;;;;GAkBG"}