@modular-prompt/experiment 0.1.10 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -369
- package/dist/run-comparison.js +6 -0
- package/dist/run-comparison.js.map +1 -1
- package/dist/runner/experiment.d.ts.map +1 -1
- package/dist/runner/experiment.js +37 -12
- package/dist/runner/experiment.js.map +1 -1
- package/dist/types.d.ts +5 -1
- package/dist/types.d.ts.map +1 -1
- package/examples/tools-experiment.yaml +105 -0
- package/examples/tools-test-module.mjs +29 -0
- package/examples/tools-test-module.ts +29 -0
- package/package.json +7 -5
- package/skills/experiment/SKILL.md +292 -0
package/README.md
CHANGED
|
@@ -1,412 +1,75 @@
|
|
|
1
1
|
# @modular-prompt/experiment
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
プロンプトモジュールの比較・評価フレームワーク。
|
|
4
4
|
|
|
5
|
-
##
|
|
6
|
-
|
|
7
|
-
This framework provides tools to compare and evaluate different prompt module variations under identical conditions. It integrates with the `@modular-prompt/core` system to test multiple prompt variations and evaluate their output quality.
|
|
8
|
-
|
|
9
|
-
### Use Cases
|
|
10
|
-
|
|
11
|
-
- **Prompt Engineering**: Validate the effectiveness of new prompt structures
|
|
12
|
-
- **Module Separation**: Verify that modularized prompts produce equivalent outputs
|
|
13
|
-
- **Quality Evaluation**: Assess output stability and consistency through repeated executions
|
|
14
|
-
- **Multi-Model Testing**: Test across different LLM providers (MLX, VertexAI, GoogleGenAI, etc.)
|
|
15
|
-
|
|
16
|
-
## Features
|
|
17
|
-
|
|
18
|
-
- ✅ **Dynamic Module Loading**: Load prompt modules from external files or inline definitions
|
|
19
|
-
- ✅ **Flexible Evaluators**: Support both code-based and AI-based evaluation
|
|
20
|
-
- ✅ **Statistical Analysis**: Analyze success rates, execution times, and output consistency
|
|
21
|
-
- ✅ **Prompt Diff Detection**: Automatically detect differences between module outputs
|
|
22
|
-
- ✅ **Driver Caching**: Reuse drivers for improved memory efficiency
|
|
23
|
-
- ✅ **Detailed Logging**: Comprehensive logging of all executions
|
|
24
|
-
|
|
25
|
-
## Installation
|
|
5
|
+
## インストール
|
|
26
6
|
|
|
27
7
|
```bash
|
|
28
|
-
|
|
8
|
+
npm install @modular-prompt/experiment
|
|
29
9
|
```
|
|
30
10
|
|
|
31
|
-
##
|
|
11
|
+
## 概要
|
|
32
12
|
|
|
33
|
-
|
|
13
|
+
複数のプロンプトモジュールを同一条件下で比較・評価する。YAML設定で実験を定義し、CLIで実行。
|
|
34
14
|
|
|
35
|
-
|
|
15
|
+
- **プロンプト比較**: 異なるプロンプト構造の効果を定量的に比較
|
|
16
|
+
- **マルチモデルテスト**: 異なるLLMプロバイダーでの動作比較
|
|
17
|
+
- **品質評価**: 繰り返し実行による安定性・一貫性の評価
|
|
18
|
+
- **柔軟な評価器**: コードベース・AIベースの評価をサポート
|
|
36
19
|
|
|
37
|
-
|
|
20
|
+
## クイックスタート
|
|
38
21
|
|
|
39
|
-
|
|
22
|
+
### 1. 設定ファイルを作成
|
|
40
23
|
|
|
41
24
|
```yaml
|
|
25
|
+
# experiment.yaml
|
|
42
26
|
models:
|
|
43
|
-
|
|
44
|
-
provider:
|
|
45
|
-
model:
|
|
46
|
-
capabilities: ["tools", "fast"]
|
|
47
|
-
enabled: true
|
|
27
|
+
gpt4o:
|
|
28
|
+
provider: openai
|
|
29
|
+
model: gpt-4o
|
|
48
30
|
|
|
49
31
|
drivers:
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
location: us-central1
|
|
53
|
-
# Paths are resolved relative to this config file
|
|
54
|
-
# Can use ~/ for home directory or absolute paths
|
|
55
|
-
credentialsPath: ./credentials.json
|
|
32
|
+
openai:
|
|
33
|
+
apiKey: ${OPENAI_API_KEY}
|
|
56
34
|
|
|
57
35
|
modules:
|
|
58
36
|
- name: my-module
|
|
59
37
|
path: ./my-module.ts
|
|
60
|
-
description: My custom prompt module
|
|
61
38
|
|
|
62
39
|
testCases:
|
|
63
|
-
- name:
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
query: user question
|
|
67
|
-
context: additional information
|
|
68
|
-
models: # Optional: specify which models to test (uses all enabled if not specified)
|
|
69
|
-
- gemini-fast
|
|
40
|
+
- name: 基本テスト
|
|
41
|
+
input:
|
|
42
|
+
query: "TypeScriptについて説明して"
|
|
70
43
|
|
|
71
|
-
evaluators:
|
|
72
|
-
# Built-in evaluators (name only)
|
|
73
|
-
- name: structured-output-presence
|
|
74
|
-
- name: llm-requirement-fulfillment
|
|
75
|
-
# Or external evaluator (with path)
|
|
76
|
-
- name: custom-validator
|
|
77
|
-
path: ./evaluators/custom-validator.ts
|
|
78
|
-
# Or inline prompt evaluator
|
|
79
|
-
- name: quality-check
|
|
80
|
-
prompt:
|
|
81
|
-
objective:
|
|
82
|
-
- Evaluate output quality
|
|
83
|
-
instructions:
|
|
84
|
-
- Check clarity and accuracy
|
|
85
|
-
|
|
86
|
-
evaluation:
|
|
87
|
-
enabled: true
|
|
88
|
-
model: gemini-fast # Reference by model name
|
|
44
|
+
evaluators: []
|
|
89
45
|
```
|
|
90
46
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
Create `examples/experiment.ts`:
|
|
94
|
-
|
|
95
|
-
```typescript
|
|
96
|
-
export default {
|
|
97
|
-
models: {
|
|
98
|
-
'gemini-fast': {
|
|
99
|
-
provider: 'vertexai',
|
|
100
|
-
model: 'gemini-2.0-flash-exp',
|
|
101
|
-
capabilities: ['tools', 'fast'],
|
|
102
|
-
enabled: true,
|
|
103
|
-
},
|
|
104
|
-
},
|
|
105
|
-
drivers: {
|
|
106
|
-
vertexai: {
|
|
107
|
-
projectId: 'your-project-id',
|
|
108
|
-
location: 'us-central1',
|
|
109
|
-
credentialsPath: './credentials.json',
|
|
110
|
-
},
|
|
111
|
-
},
|
|
112
|
-
modules: [
|
|
113
|
-
{
|
|
114
|
-
name: 'my-module',
|
|
115
|
-
path: './my-module.ts',
|
|
116
|
-
description: 'My custom prompt module',
|
|
117
|
-
},
|
|
118
|
-
],
|
|
119
|
-
testCases: [
|
|
120
|
-
{
|
|
121
|
-
name: 'Basic Test',
|
|
122
|
-
description: 'Test basic functionality',
|
|
123
|
-
input: { // Structured context object
|
|
124
|
-
query: 'user question',
|
|
125
|
-
options: { temperature: 0.7 },
|
|
126
|
-
},
|
|
127
|
-
models: ['gemini-fast'], // Optional
|
|
128
|
-
},
|
|
129
|
-
],
|
|
130
|
-
evaluators: [
|
|
131
|
-
// Built-in evaluators (name only)
|
|
132
|
-
{ name: 'structured-output-presence' },
|
|
133
|
-
{ name: 'llm-requirement-fulfillment' },
|
|
134
|
-
// Or external evaluator (with path)
|
|
135
|
-
{
|
|
136
|
-
name: 'custom-validator',
|
|
137
|
-
path: './evaluators/custom-validator.ts',
|
|
138
|
-
},
|
|
139
|
-
],
|
|
140
|
-
evaluation: {
|
|
141
|
-
enabled: true,
|
|
142
|
-
model: 'gemini-fast', // Reference by model name
|
|
143
|
-
},
|
|
144
|
-
};
|
|
145
|
-
```
|
|
146
|
-
|
|
147
|
-
**TypeScript Support**: TypeScript configuration files are automatically transpiled using [jiti](https://github.com/unjs/jiti). You can use TypeScript syntax directly without pre-compilation. Type annotations are stripped automatically, and the file is executed as JavaScript.
|
|
148
|
-
|
|
149
|
-
**Important**: All file paths in the configuration (modules, evaluators, credentials) are resolved relative to the config file location.
|
|
150
|
-
|
|
151
|
-
### 2. Run Experiment
|
|
152
|
-
|
|
153
|
-
```bash
|
|
154
|
-
# Validate configuration and display execution plan (recommended first step)
|
|
155
|
-
npx modular-experiment examples/experiment.yaml --dry-run
|
|
156
|
-
|
|
157
|
-
# Run with YAML config
|
|
158
|
-
npx modular-experiment examples/experiment.yaml
|
|
159
|
-
|
|
160
|
-
# Run with TypeScript config
|
|
161
|
-
npx modular-experiment examples/experiment.ts
|
|
162
|
-
|
|
163
|
-
# Run specific module
|
|
164
|
-
npx modular-experiment examples/experiment.yaml --modules my-module
|
|
165
|
-
|
|
166
|
-
# Run with evaluation
|
|
167
|
-
npx modular-experiment examples/experiment.yaml --evaluate
|
|
168
|
-
|
|
169
|
-
# Run multiple times for statistics
|
|
170
|
-
npx modular-experiment examples/experiment.yaml --repeat 10
|
|
171
|
-
|
|
172
|
-
# Run with detailed logging to JSONL file
|
|
173
|
-
npx modular-experiment examples/experiment.yaml --log-file experiment.jsonl
|
|
174
|
-
|
|
175
|
-
# Run with verbose output (show internal operations)
|
|
176
|
-
npx modular-experiment examples/experiment.yaml --verbose
|
|
177
|
-
|
|
178
|
-
# Combine options
|
|
179
|
-
npx modular-experiment examples/experiment.yaml --evaluate --log-file experiment.jsonl --verbose
|
|
180
|
-
```
|
|
181
|
-
|
|
182
|
-
## Configuration
|
|
183
|
-
|
|
184
|
-
### Module Definition
|
|
185
|
-
|
|
186
|
-
Modules can be defined inline or loaded from external files:
|
|
187
|
-
|
|
188
|
-
```typescript
|
|
189
|
-
// External file
|
|
190
|
-
export const modules: ModuleReference[] = [
|
|
191
|
-
{
|
|
192
|
-
name: 'my-module',
|
|
193
|
-
path: './modules/my-module.ts',
|
|
194
|
-
description: 'Description',
|
|
195
|
-
},
|
|
196
|
-
];
|
|
197
|
-
```
|
|
198
|
-
|
|
199
|
-
A module file should export a default object with:
|
|
47
|
+
### 2. モジュールファイルを作成
|
|
200
48
|
|
|
201
49
|
```typescript
|
|
50
|
+
// my-module.ts
|
|
202
51
|
import { compile } from '@modular-prompt/core';
|
|
203
|
-
import { myPromptModule } from './prompts.js';
|
|
204
52
|
|
|
205
53
|
export default {
|
|
206
54
|
name: 'My Module',
|
|
207
|
-
description: 'Module description',
|
|
208
55
|
compile: (context: any) => compile(myPromptModule, context),
|
|
209
56
|
};
|
|
210
57
|
```
|
|
211
58
|
|
|
212
|
-
###
|
|
213
|
-
|
|
214
|
-
Two types of evaluators are supported:
|
|
215
|
-
|
|
216
|
-
#### 1. Code Evaluator
|
|
217
|
-
|
|
218
|
-
Programmatic validation (e.g., JSON structure validation):
|
|
219
|
-
|
|
220
|
-
```typescript
|
|
221
|
-
import type { CodeEvaluator, EvaluationContext, EvaluationResult } from '@modular-prompt/experiment';
|
|
222
|
-
|
|
223
|
-
export default {
|
|
224
|
-
name: 'JSON Validator',
|
|
225
|
-
description: 'Validates JSON structure in output',
|
|
226
|
-
|
|
227
|
-
async evaluate(context: EvaluationContext): Promise<EvaluationResult> {
|
|
228
|
-
// Validation logic
|
|
229
|
-
return {
|
|
230
|
-
evaluator: 'json-validator',
|
|
231
|
-
moduleName: context.moduleName,
|
|
232
|
-
score: 10,
|
|
233
|
-
reasoning: 'Valid JSON structure',
|
|
234
|
-
};
|
|
235
|
-
},
|
|
236
|
-
} satisfies CodeEvaluator;
|
|
237
|
-
```
|
|
238
|
-
|
|
239
|
-
#### 2. Prompt Evaluator
|
|
240
|
-
|
|
241
|
-
AI-based evaluation using LLM:
|
|
242
|
-
|
|
243
|
-
```typescript
|
|
244
|
-
import type { PromptEvaluator, EvaluationContext } from '@modular-prompt/experiment';
|
|
245
|
-
import type { PromptModule } from '@modular-prompt/core';
|
|
246
|
-
|
|
247
|
-
const evaluationModule: PromptModule<EvaluationContext> = {
|
|
248
|
-
createContext: (): EvaluationContext => ({
|
|
249
|
-
moduleName: '',
|
|
250
|
-
prompt: '',
|
|
251
|
-
runs: [],
|
|
252
|
-
}),
|
|
253
|
-
|
|
254
|
-
objective: [
|
|
255
|
-
'- Assess output quality',
|
|
256
|
-
],
|
|
257
|
-
|
|
258
|
-
instructions: [
|
|
259
|
-
'- Evaluate clarity and accuracy',
|
|
260
|
-
],
|
|
261
|
-
};
|
|
262
|
-
|
|
263
|
-
export default {
|
|
264
|
-
name: 'Quality Evaluator',
|
|
265
|
-
description: 'Evaluates output quality',
|
|
266
|
-
module: evaluationModule,
|
|
267
|
-
} satisfies PromptEvaluator;
|
|
268
|
-
```
|
|
269
|
-
|
|
270
|
-
All prompt evaluators are automatically merged with the base evaluation module.
|
|
271
|
-
|
|
272
|
-
## Built-in Evaluators
|
|
273
|
-
|
|
274
|
-
The framework includes built-in evaluators that can be referenced by name only (no path required):
|
|
275
|
-
|
|
276
|
-
### structured-output-presence
|
|
277
|
-
|
|
278
|
-
- **Type**: Code Evaluator
|
|
279
|
-
- **What it measures**: Checks if `structuredOutput` exists and is a valid object
|
|
280
|
-
- **Evaluation logic**:
|
|
281
|
-
- Verifies presence of `structuredOutput` in query result
|
|
282
|
-
- Confirms it's a non-null object type
|
|
283
|
-
- **Score**: `(validCount / totalRuns) * 10`
|
|
284
|
-
- **Use case**: Verify that the model returns structured JSON output (essential for structured output workflows)
|
|
285
|
-
- **Usage**:
|
|
286
|
-
```yaml
|
|
287
|
-
evaluators:
|
|
288
|
-
- name: "structured-output-presence"
|
|
289
|
-
```
|
|
290
|
-
|
|
291
|
-
### llm-requirement-fulfillment
|
|
292
|
-
|
|
293
|
-
- **Type**: Prompt Evaluator (uses LLM for evaluation)
|
|
294
|
-
- **What it measures**: Uses LLM to comprehensively evaluate whether output meets functional requirements
|
|
295
|
-
- **Evaluation criteria**:
|
|
296
|
-
1. **Requirement Fulfillment**: Does it satisfy the intent described in the prompt?
|
|
297
|
-
2. **Parameter Correctness**: Are all required parameters present and correct?
|
|
298
|
-
3. **Parameter Completeness**: Are optional parameters appropriately used or omitted?
|
|
299
|
-
4. **Logical Consistency**: Is the output logically consistent with the facts?
|
|
300
|
-
- **Score**: 0-10 overall score with detailed sub-scores for each criterion
|
|
301
|
-
- **Use case**: Comprehensive quality assessment of output (requires evaluation model to be configured)
|
|
302
|
-
- **Usage**:
|
|
303
|
-
```yaml
|
|
304
|
-
evaluators:
|
|
305
|
-
- name: "llm-requirement-fulfillment"
|
|
306
|
-
|
|
307
|
-
evaluation:
|
|
308
|
-
enabled: true
|
|
309
|
-
model: "gemini-fast" # Model used for evaluation
|
|
310
|
-
```
|
|
311
|
-
|
|
312
|
-
**Note**: `llm-requirement-fulfillment` requires an evaluation model to be configured in the `evaluation` section.
|
|
313
|
-
|
|
314
|
-
## Architecture
|
|
315
|
-
|
|
316
|
-
```
|
|
317
|
-
┌─────────────────────────────────────────┐
|
|
318
|
-
│ run-comparison.ts (CLI Entry Point) │
|
|
319
|
-
└─────────────────────────────────────────┘
|
|
320
|
-
│
|
|
321
|
-
┌─────────┼─────────┐
|
|
322
|
-
▼ ▼ ▼
|
|
323
|
-
┌────────┐ ┌────────┐ ┌────────┐
|
|
324
|
-
│ Config │ │ Runner │ │Reporter│
|
|
325
|
-
│ Loader │ │ │ │ │
|
|
326
|
-
└────────┘ └────────┘ └────────┘
|
|
327
|
-
│ │
|
|
328
|
-
▼ ▼
|
|
329
|
-
┌────────┐ ┌────────┐
|
|
330
|
-
│Dynamic │ │Driver │
|
|
331
|
-
│Loader │ │Manager │
|
|
332
|
-
└────────┘ └────────┘
|
|
333
|
-
```
|
|
334
|
-
|
|
335
|
-
### Components
|
|
336
|
-
|
|
337
|
-
| Component | Responsibility |
|
|
338
|
-
|-----------|----------------|
|
|
339
|
-
| `config/loader.ts` | Load YAML configuration |
|
|
340
|
-
| `config/dynamic-loader.ts` | Dynamic module/evaluator loading |
|
|
341
|
-
| `runner/experiment.ts` | Orchestrate experiment execution |
|
|
342
|
-
| `runner/evaluator.ts` | Execute evaluations |
|
|
343
|
-
| `runner/driver-manager.ts` | Cache and manage AI drivers |
|
|
344
|
-
| `reporter/statistics.ts` | Generate statistical reports |
|
|
345
|
-
| `base-evaluation-module.ts` | Base evaluation prompt module |
|
|
346
|
-
| `evaluators/index.ts` | Built-in evaluator registry |
|
|
347
|
-
|
|
348
|
-
## Examples
|
|
349
|
-
|
|
350
|
-
See `examples/experiment.yaml` for a complete configuration template with:
|
|
351
|
-
- Model definitions (MLX, Vertex AI, Google GenAI)
|
|
352
|
-
- Driver configurations with credential paths
|
|
353
|
-
- Evaluation settings
|
|
354
|
-
- Empty sections for modules, test cases, and evaluators (ready for your content)
|
|
355
|
-
|
|
356
|
-
## API
|
|
357
|
-
|
|
358
|
-
### Programmatic Usage
|
|
359
|
-
|
|
360
|
-
```typescript
|
|
361
|
-
import {
|
|
362
|
-
loadExperimentConfig,
|
|
363
|
-
loadModules,
|
|
364
|
-
loadEvaluators,
|
|
365
|
-
ExperimentRunner,
|
|
366
|
-
DriverManager,
|
|
367
|
-
} from '@modular-prompt/experiment';
|
|
368
|
-
|
|
369
|
-
const { serverConfig, aiService } = loadExperimentConfig('config.yaml');
|
|
370
|
-
const modules = await loadModules(moduleRefs, basePath);
|
|
371
|
-
const evaluators = await loadEvaluators(evaluatorRefs, basePath);
|
|
372
|
-
|
|
373
|
-
const driverManager = new DriverManager();
|
|
374
|
-
const runner = new ExperimentRunner(
|
|
375
|
-
aiService,
|
|
376
|
-
driverManager,
|
|
377
|
-
modules,
|
|
378
|
-
testCases,
|
|
379
|
-
models,
|
|
380
|
-
repeatCount,
|
|
381
|
-
evaluators,
|
|
382
|
-
evaluatorModel
|
|
383
|
-
);
|
|
384
|
-
|
|
385
|
-
const results = await runner.run();
|
|
386
|
-
await driverManager.cleanup();
|
|
387
|
-
```
|
|
388
|
-
|
|
389
|
-
## CLI Options
|
|
59
|
+
### 3. 実行
|
|
390
60
|
|
|
61
|
+
```bash
|
|
62
|
+
npx modular-experiment experiment.yaml --dry-run # 確認
|
|
63
|
+
npx modular-experiment experiment.yaml # 実行
|
|
64
|
+
npx modular-experiment experiment.yaml --evaluate # 評価付き
|
|
65
|
+
npx modular-experiment experiment.yaml --repeat 10 # 複数回実行
|
|
391
66
|
```
|
|
392
|
-
Usage: modular-experiment <config> [options]
|
|
393
67
|
|
|
394
|
-
|
|
395
|
-
<config> Config file path (YAML or TypeScript)
|
|
68
|
+
設定ファイルの詳細、評価器の書き方、プログラマティックAPIについては `skills/experiment/SKILL.md` を参照。
|
|
396
69
|
|
|
397
|
-
|
|
398
|
-
--test-case <name> Test case name filter
|
|
399
|
-
--model <provider> Model provider filter
|
|
400
|
-
--modules <names> Comma-separated module names (default: all)
|
|
401
|
-
--repeat <count> Number of repetitions (default: 1)
|
|
402
|
-
--evaluate Enable evaluation phase
|
|
403
|
-
--evaluators <names> Comma-separated evaluator names (default: all)
|
|
404
|
-
--dry-run Display execution plan without running the experiment
|
|
405
|
-
--log-file <path> Log file path for JSONL output (detailed logs)
|
|
406
|
-
--verbose Enable verbose output (show detailed internal operations)
|
|
407
|
-
```
|
|
70
|
+
## Skills (for Claude Code)
|
|
408
71
|
|
|
409
|
-
|
|
72
|
+
This package includes `skills/experiment/SKILL.md`. It can be used as a Claude Code skill to guide experiment framework usage.
|
|
410
73
|
|
|
411
74
|
## License
|
|
412
75
|
|
package/dist/run-comparison.js
CHANGED
|
@@ -159,6 +159,12 @@ const results = await runner.run();
|
|
|
159
159
|
console.log('='.repeat(80));
|
|
160
160
|
console.log('✨ Experiment completed');
|
|
161
161
|
console.log('='.repeat(80));
|
|
162
|
+
// Flush log file if configured
|
|
163
|
+
if (options.logFile) {
|
|
164
|
+
const { logger } = await import('./logger.js');
|
|
165
|
+
await logger.flush();
|
|
166
|
+
console.log(`📄 Log file written: ${options.logFile}`);
|
|
167
|
+
}
|
|
162
168
|
// Cleanup drivers
|
|
163
169
|
await driverManager.cleanup();
|
|
164
170
|
// Display statistics if repeated
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run-comparison.js","sourceRoot":"","sources":["../src/run-comparison.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAC1C,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AACzE,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAE/C,sBAAsB;AACtB,MAAM,OAAO,GAAG,SAAS,EAAE,CAAC;AAE5B,mBAAmB;AACnB,MAAM,CAAC,SAAS,CAAC;IACf,KAAK,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM;IACzC,eAAe,EAAE,OAAO;IACxB,SAAS,EAAE,KAAK;IAChB,UAAU,EAAE,CAAC,CAAC,OAAO,CAAC,OAAO;IAC7B,UAAU,EAAE,KAAK;IACjB,OAAO,EAAE,OAAO,CAAC,OAAO;CACzB,CAAC,CAAC;AAEH,iBAAiB;AACjB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAC,CAAC;AAC5C,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,OAAO,CAAC,GAAG,CAAC,WAAW,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC;AAC7C,OAAO,CAAC,GAAG,CAAC,qBAAqB,OAAO,CAAC,cAAc,IAAI,KAAK,EAAE,CAAC,CAAC;AACpE,OAAO,CAAC,GAAG,CAAC,iBAAiB,OAAO,CAAC,WAAW,IAAI,oBAAoB,EAAE,CAAC,CAAC;AAC5E,OAAO,CAAC,GAAG,CAAC,YAAY,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC;AACrE,OAAO,CAAC,GAAG,CAAC,WAAW,OAAO,CAAC,WAAW,UAAU,CAAC,CAAC;AACtD,OAAO,CAAC,GAAG,CAAC,eAAe,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC;AAChF,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAC7B,OAAO,CAAC,GAAG,CAAC,eAAe,OAAO,CAAC,eAAe,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC;AAC7E,CAAC;AACD,OAAO,CAAC,GAAG,CAAC,YAAY,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC;AAC/E,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,OAAO,CAAC,GAAG,EAAE,CAAC;AAEd,qBAAqB;AACrB,MAAM,EACJ,YAAY,EACZ,OAAO,EAAE,aAAa,EACtB,SAAS,EAAE,eAAe,EAC1B,UAAU,EAAE,gBAAgB,EAC5B,SAAS,EACT,SAAS,EACV,GAAG,MAAM,oBAAoB,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAEnD,8CAA8C;AAC9C,MAAM,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC;AAEnC,uCAAuC;AACvC,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAgB,EAAE,EAAE,CAC9E,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,CAAC,CACvD,CAAC;AAEF,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;IACxB,MAAM,eAAe,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAgB,EAAE,EAAE,CACvE,IAAI,CAAC,QAAQ,KAAK,OAAO,CAAC,WAAW,CACtC,CAAC;IACF,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjC,OAAO,CAAC,KAAK,CAAC,gDAAgD,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;QACrF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,mBAAmB,eAAe,CAAC,MAAM,0BAA0B,OAAO,CAAC,WAAW,IAAI,CAAC,CAAC;IACxG,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAgB,EAAE,EAAE,CACtD,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,KAAK,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,QAAQ,GAAG,CAAC,CAC7D,CAAC;AACJ,CAAC;KAAM,CAAC;IACN,OAAO,CAAC,GAAG,CAAC,mBAAmB,YAAY,CAAC,MAAM,YAAY,CAAC,CAAC;IAChE,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAgB,EAAE,EAAE,CACnD,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,KAAK,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,QAAQ,GAAG,CAAC,CAC7D,CAAC;AACJ,CAAC;AAED,gCAAgC;AAChC,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAgB,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,KAAK,KAAK,CAAC,CAAC;AACxF,IAAI,MAAM,EAAE,CAAC;IACX,OAAO,CAAC,GAAG,EAAE,CAAC;IACd,OAAO,CAAC,GAAG,CAAC,4GAA4G,CAAC,CAAC;AAC5H,CAAC;AACD,OAAO,CAAC,GAAG,EAAE,CAAC;AAEd,kBAAkB;AAClB,MAAM,YAAY,GAAG,eAAe,CAAC;AACrC,MAAM,SAAS,GAAG,OAAO,CAAC,cAAc;IACtC,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,EAAO,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,KAAK,OAAO,CAAC,cAAc,CAAC;IACtE,CAAC,CAAC,YAAY,CAAC;AAEjB,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;IAC3B,OAAO,CAAC,KAAK,CAAC,wBAAwB,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,cAAc,OAAO,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC9G,OAAO,CAAC,KAAK,CAAC,yCAAyC,CAAC,CAAC;IACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,CAAC,MAAM,eAAe,CAAC,CAAC;AAC3D,OAAO,CAAC,GAAG,EAAE,CAAC;AAEd,wCAAwC;AACxC,MAAM,UAAU,GAAG,MAAM,WAAW,CAAC,aAAa,EAAE,SAAS,CAAC,CAAC;AAC/D,MAAM,OAAO,GAAG,OAAO,CAAC,YAAY;IAClC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,YAAa,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAChE,CAAC,CAAC,UAAU,CAAC;AAEf,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;IACzB,OAAO,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;IACtC,OAAO,CAAC,KAAK,CAAC,sCAAsC,CAAC,CAAC;IACtD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,OAAO,CAAC,GAAG,CAAC,cAAc,OAAO,CAAC,MAAM,aAAa,CAAC,CAAC;AACvD,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;AACrE,OAAO,CAAC,GAAG,EAAE,CAAC;AAEd,8DAA8D;AAC9D,IAAI,UAAU,CAAC;AACf,IAAI,cAAc,CAAC;AACnB,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAC7B,8CAA8C;IAC9C,MAAM,aAAa,GAAG,MAAM,cAAc,CAAC,gBAAgB,EAAE,SAAS,CAAC,CAAC;IACxE,UAAU,GAAG,OAAO,CAAC,eAAe;QAClC,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,eAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACtE,CAAC,CAAC,aAAa,CAAC;IAElB,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC5B,OAAO,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,8CAA8C;IAC9C,IAAI,CAAC,YAAY,CAAC,UAAU,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;QACjE,OAAO,CAAC,KAAK,CAAC,+CAA+C,CAAC,CAAC;QAC/D,OAAO,CAAC,KAAK,CAAC,uDAAuD,CAAC,CAAC;QACvE,OAAO,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QAChC,OAAO,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC;QACpC,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QAC1C,OAAO,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,gBAAgB,GAAG,YAAY,CAAC,UAAU,CAAC;IAEjD,mCAAmC;IACnC,MAAM,SAAS,GAAG,gBAAgB,CAAC,KAAK,CAAC;IACzC,MAAM,SAAS,GAAG,YAAY,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAEjD,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,QAAQ,EAAE,CAAC;QACrC,OAAO,CAAC,KAAK,CAAC,4CAA4C,SAAS,EAAE,CAAC,CAAC;QACvE,OAAO,CAAC,KAAK,CAAC,yEAAyE,CAAC,CAAC;QACzF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,cAAc,GAAG,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;IAEtD,OAAO,CAAC,GAAG,CAAC,8BAA8B,UAAU,CAAC,MAAM,gBAAgB,CAAC,CAAC;IAC7E,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;IACpF,OAAO,CAAC,GAAG,CAAC,uBAAuB,SAAS,KAAK,SAAS,CAAC,QAAQ,IAAI,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC;IAC3F,OAAO,CAAC,GAAG,EAAE,CAAC;AAChB,CAAC;AAED,kBAAkB;AAClB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;IACnB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAC5B,OAAO,CAAC,GAAG,CAAC,wCAAwC,CAAC,CAAC;IACtD,OAAO,CAAC,GAAG,CAAC,mCAAmC,CAAC,CAAC;IACjD,OAAO,CAAC,GAAG,CAAC,+CAA+C,CAAC,CAAC;IAC7D,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAC5B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,iBAAiB;AACjB,MAAM,aAAa,GAAG,IAAI,aAAa,EAAE,CAAC;AAC1C,MAAM,MAAM,GAAG,IAAI,gBAAgB,CACjC,SAAS,EACT,aAAa,EACb,OAAO,EACP,SAAS,EACT,MAAM,EACN,OAAO,CAAC,WAAW,EACnB,UAAU,EACV,cAAc,CACf,CAAC;AAEF,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,GAAG,EAAE,CAAC;AAEnC,qBAAqB;AACrB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;AACtC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAE5B,kBAAkB;AAClB,MAAM,aAAa,CAAC,OAAO,EAAE,CAAC;AAE9B,iCAAiC;AACjC,IAAI,OAAO,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC;IAC5B,MAAM,QAAQ,GAAG,IAAI,kBAAkB,CAAC,OAAO,CAAC,CAAC;IACjD,QAAQ,CAAC,MAAM,EAAE,CAAC;AACpB,CAAC"}
|
|
1
|
+
{"version":3,"file":"run-comparison.js","sourceRoot":"","sources":["../src/run-comparison.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAC1C,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AACzE,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAE/C,sBAAsB;AACtB,MAAM,OAAO,GAAG,SAAS,EAAE,CAAC;AAE5B,mBAAmB;AACnB,MAAM,CAAC,SAAS,CAAC;IACf,KAAK,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM;IACzC,eAAe,EAAE,OAAO;IACxB,SAAS,EAAE,KAAK;IAChB,UAAU,EAAE,CAAC,CAAC,OAAO,CAAC,OAAO;IAC7B,UAAU,EAAE,KAAK;IACjB,OAAO,EAAE,OAAO,CAAC,OAAO;CACzB,CAAC,CAAC;AAEH,iBAAiB;AACjB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAC,CAAC;AAC5C,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,OAAO,CAAC,GAAG,CAAC,WAAW,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC;AAC7C,OAAO,CAAC,GAAG,CAAC,qBAAqB,OAAO,CAAC,cAAc,IAAI,KAAK,EAAE,CAAC,CAAC;AACpE,OAAO,CAAC,GAAG,CAAC,iBAAiB,OAAO,CAAC,WAAW,IAAI,oBAAoB,EAAE,CAAC,CAAC;AAC5E,OAAO,CAAC,GAAG,CAAC,YAAY,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC;AACrE,OAAO,CAAC,GAAG,CAAC,WAAW,OAAO,CAAC,WAAW,UAAU,CAAC,CAAC;AACtD,OAAO,CAAC,GAAG,CAAC,eAAe,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC;AAChF,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAC7B,OAAO,CAAC,GAAG,CAAC,eAAe,OAAO,CAAC,eAAe,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC;AAC7E,CAAC;AACD,OAAO,CAAC,GAAG,CAAC,YAAY,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC;AAC/E,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,OAAO,CAAC,GAAG,EAAE,CAAC;AAEd,qBAAqB;AACrB,MAAM,EACJ,YAAY,EACZ,OAAO,EAAE,aAAa,EACtB,SAAS,EAAE,eAAe,EAC1B,UAAU,EAAE,gBAAgB,EAC5B,SAAS,EACT,SAAS,EACV,GAAG,MAAM,oBAAoB,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAEnD,8CAA8C;AAC9C,MAAM,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC;AAEnC,uCAAuC;AACvC,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAgB,EAAE,EAAE,CAC9E,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,CAAC,CACvD,CAAC;AAEF,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;IACxB,MAAM,eAAe,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAgB,EAAE,EAAE,CACvE,IAAI,CAAC,QAAQ,KAAK,OAAO,CAAC,WAAW,CACtC,CAAC;IACF,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjC,OAAO,CAAC,KAAK,CAAC,gDAAgD,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;QACrF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,mBAAmB,eAAe,CAAC,MAAM,0BAA0B,OAAO,CAAC,WAAW,IAAI,CAAC,CAAC;IACxG,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAgB,EAAE,EAAE,CACtD,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,KAAK,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,QAAQ,GAAG,CAAC,CAC7D,CAAC;AACJ,CAAC;KAAM,CAAC;IACN,OAAO,CAAC,GAAG,CAAC,mBAAmB,YAAY,CAAC,MAAM,YAAY,CAAC,CAAC;IAChE,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAgB,EAAE,EAAE,CACnD,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,KAAK,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,QAAQ,GAAG,CAAC,CAC7D,CAAC;AACJ,CAAC;AAED,gCAAgC;AAChC,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAgB,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,KAAK,KAAK,CAAC,CAAC;AACxF,IAAI,MAAM,EAAE,CAAC;IACX,OAAO,CAAC,GAAG,EAAE,CAAC;IACd,OAAO,CAAC,GAAG,CAAC,4GAA4G,CAAC,CAAC;AAC5H,CAAC;AACD,OAAO,CAAC,GAAG,EAAE,CAAC;AAEd,kBAAkB;AAClB,MAAM,YAAY,GAAG,eAAe,CAAC;AACrC,MAAM,SAAS,GAAG,OAAO,CAAC,cAAc;IACtC,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,EAAO,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,KAAK,OAAO,CAAC,cAAc,CAAC;IACtE,CAAC,CAAC,YAAY,CAAC;AAEjB,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;IAC3B,OAAO,CAAC,KAAK,CAAC,wBAAwB,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,cAAc,OAAO,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC9G,OAAO,CAAC,KAAK,CAAC,yCAAyC,CAAC,CAAC;IACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,CAAC,MAAM,eAAe,CAAC,CAAC;AAC3D,OAAO,CAAC,GAAG,EAAE,CAAC;AAEd,wCAAwC;AACxC,MAAM,UAAU,GAAG,MAAM,WAAW,CAAC,aAAa,EAAE,SAAS,CAAC,CAAC;AAC/D,MAAM,OAAO,GAAG,OAAO,CAAC,YAAY;IAClC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,YAAa,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAChE,CAAC,CAAC,UAAU,CAAC;AAEf,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;IACzB,OAAO,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;IACtC,OAAO,CAAC,KAAK,CAAC,sCAAsC,CAAC,CAAC;IACtD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,OAAO,CAAC,GAAG,CAAC,cAAc,OAAO,CAAC,MAAM,aAAa,CAAC,CAAC;AACvD,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;AACrE,OAAO,CAAC,GAAG,EAAE,CAAC;AAEd,8DAA8D;AAC9D,IAAI,UAAU,CAAC;AACf,IAAI,cAAc,CAAC;AACnB,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAC7B,8CAA8C;IAC9C,MAAM,aAAa,GAAG,MAAM,cAAc,CAAC,gBAAgB,EAAE,SAAS,CAAC,CAAC;IACxE,UAAU,GAAG,OAAO,CAAC,eAAe;QAClC,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,eAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACtE,CAAC,CAAC,aAAa,CAAC;IAElB,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC5B,OAAO,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,8CAA8C;IAC9C,IAAI,CAAC,YAAY,CAAC,UAAU,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;QACjE,OAAO,CAAC,KAAK,CAAC,+CAA+C,CAAC,CAAC;QAC/D,OAAO,CAAC,KAAK,CAAC,uDAAuD,CAAC,CAAC;QACvE,OAAO,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QAChC,OAAO,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC;QACpC,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QAC1C,OAAO,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,gBAAgB,GAAG,YAAY,CAAC,UAAU,CAAC;IAEjD,mCAAmC;IACnC,MAAM,SAAS,GAAG,gBAAgB,CAAC,KAAK,CAAC;IACzC,MAAM,SAAS,GAAG,YAAY,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAEjD,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,QAAQ,EAAE,CAAC;QACrC,OAAO,CAAC,KAAK,CAAC,4CAA4C,SAAS,EAAE,CAAC,CAAC;QACvE,OAAO,CAAC,KAAK,CAAC,yEAAyE,CAAC,CAAC;QACzF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,cAAc,GAAG,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;IAEtD,OAAO,CAAC,GAAG,CAAC,8BAA8B,UAAU,CAAC,MAAM,gBAAgB,CAAC,CAAC;IAC7E,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;IACpF,OAAO,CAAC,GAAG,CAAC,uBAAuB,SAAS,KAAK,SAAS,CAAC,QAAQ,IAAI,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC;IAC3F,OAAO,CAAC,GAAG,EAAE,CAAC;AAChB,CAAC;AAED,kBAAkB;AAClB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;IACnB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAC5B,OAAO,CAAC,GAAG,CAAC,wCAAwC,CAAC,CAAC;IACtD,OAAO,CAAC,GAAG,CAAC,mCAAmC,CAAC,CAAC;IACjD,OAAO,CAAC,GAAG,CAAC,+CAA+C,CAAC,CAAC;IAC7D,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAC5B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,iBAAiB;AACjB,MAAM,aAAa,GAAG,IAAI,aAAa,EAAE,CAAC;AAC1C,MAAM,MAAM,GAAG,IAAI,gBAAgB,CACjC,SAAS,EACT,aAAa,EACb,OAAO,EACP,SAAS,EACT,MAAM,EACN,OAAO,CAAC,WAAW,EACnB,UAAU,EACV,cAAc,CACf,CAAC;AAEF,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,GAAG,EAAE,CAAC;AAEnC,qBAAqB;AACrB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;AACtC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;AAE5B,+BAA+B;AAC/B,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;IACpB,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC;IAC/C,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;IACrB,OAAO,CAAC,GAAG,CAAC,wBAAwB,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;AACzD,CAAC;AAED,kBAAkB;AAClB,MAAM,aAAa,CAAC,OAAO,EAAE,CAAC;AAE9B,iCAAiC;AACjC,IAAI,OAAO,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC;IAC5B,MAAM,QAAQ,GAAG,IAAI,kBAAkB,CAAC,OAAO,CAAC,CAAC;IACjD,QAAQ,CAAC,MAAM,EAAE,CAAC;AACpB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"experiment.d.ts","sourceRoot":"","sources":["../../src/runner/experiment.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,SAAS,EAAe,SAAS,EAAE,MAAM,wBAAwB,CAAC;AAChF,OAAO,KAAK,EAAE,gBAAgB,EAAE,UAAU,EAAE,QAAQ,EAAuC,MAAM,aAAa,CAAC;AAC/G,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAMnE,qBAAa,gBAAgB;IAEzB,OAAO,CAAC,SAAS;IACjB,OAAO,CAAC,aAAa;IACrB,OAAO,CAAC,OAAO;IACf,OAAO,CAAC,SAAS;IACjB,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,WAAW;IACnB,OAAO,CAAC,UAAU,CAAC;IACnB,OAAO,CAAC,cAAc,CAAC;gBAPf,SAAS,EAAE,SAAS,EACpB,aAAa,EAAE,aAAa,EAC5B,OAAO,EAAE,gBAAgB,EAAE,EAC3B,SAAS,EAAE,QAAQ,EAAE,EACrB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,EACjC,WAAW,EAAE,MAAM,EACnB,UAAU,CAAC,EAAE,eAAe,EAAE,YAAA,EAC9B,cAAc,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,SAAS,CAAA;KAAE,YAAA;IAG5D;;;;OAIG;IACG,GAAG,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"experiment.d.ts","sourceRoot":"","sources":["../../src/runner/experiment.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,SAAS,EAAe,SAAS,EAAE,MAAM,wBAAwB,CAAC;AAChF,OAAO,KAAK,EAAE,gBAAgB,EAAE,UAAU,EAAE,QAAQ,EAAuC,MAAM,aAAa,CAAC;AAC/G,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAMnE,qBAAa,gBAAgB;IAEzB,OAAO,CAAC,SAAS;IACjB,OAAO,CAAC,aAAa;IACrB,OAAO,CAAC,OAAO;IACf,OAAO,CAAC,SAAS;IACjB,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,WAAW;IACnB,OAAO,CAAC,UAAU,CAAC;IACnB,OAAO,CAAC,cAAc,CAAC;gBAPf,SAAS,EAAE,SAAS,EACpB,aAAa,EAAE,aAAa,EAC5B,OAAO,EAAE,gBAAgB,EAAE,EAC3B,SAAS,EAAE,QAAQ,EAAE,EACrB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,EACjC,WAAW,EAAE,MAAM,EACnB,UAAU,CAAC,EAAE,eAAe,EAAE,YAAA,EAC9B,cAAc,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,SAAS,CAAA;KAAE,YAAA;IAG5D;;;;OAIG;IACG,GAAG,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;IAkHlC;;OAEG;YACW,aAAa;IA8D3B;;OAEG;YACW,kBAAkB;IA2BhC;;OAEG;IACH,OAAO,CAAC,cAAc;CAiCvB"}
|
|
@@ -32,6 +32,8 @@ export class ExperimentRunner {
|
|
|
32
32
|
async run() {
|
|
33
33
|
const allResults = [];
|
|
34
34
|
const evaluationContexts = [];
|
|
35
|
+
// ドライバー切り替えをテストケースをまたいでトラッキング
|
|
36
|
+
let activeModelName = null;
|
|
35
37
|
for (const testCase of this.testCases) {
|
|
36
38
|
console.log('─'.repeat(80));
|
|
37
39
|
console.log(`Test Case: ${testCase.name}`);
|
|
@@ -75,23 +77,26 @@ export class ExperimentRunner {
|
|
|
75
77
|
continue;
|
|
76
78
|
}
|
|
77
79
|
// Test with each model
|
|
78
|
-
let previousDriver = null;
|
|
79
|
-
let previousModelName = null;
|
|
80
80
|
for (const { name: modelName, spec: modelSpec } of modelsToTest) {
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
81
|
+
// モデル切り替え: 前のドライバーをクローズしてから新しいモデルを起動
|
|
82
|
+
if (activeModelName && activeModelName !== modelName) {
|
|
83
|
+
logger.info(`Closing driver: ${activeModelName} (switching to ${modelName})`);
|
|
84
|
+
await this.driverManager.close(activeModelName);
|
|
85
|
+
activeModelName = null;
|
|
86
|
+
}
|
|
87
|
+
if (activeModelName === modelName) {
|
|
88
|
+
logger.verbose(`Reusing driver for ${modelName}`);
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
logger.info(`Creating new driver for ${modelName} (${modelSpec.provider}:${modelSpec.model})`);
|
|
87
92
|
}
|
|
93
|
+
console.log(`🤖 Testing with ${modelName} (${modelSpec.provider}:${modelSpec.model})`);
|
|
88
94
|
// Get or create driver for this model
|
|
89
95
|
const driver = await this.driverManager.getOrCreate(this.aiService, modelName, modelSpec);
|
|
90
|
-
|
|
91
|
-
previousModelName = modelName;
|
|
96
|
+
activeModelName = modelName;
|
|
92
97
|
// Test each module
|
|
93
98
|
for (const { name, compiled, prompt } of compiledModules) {
|
|
94
|
-
const runs = await this.runModuleTest(name, compiled, driver);
|
|
99
|
+
const runs = await this.runModuleTest(name, compiled, driver, testCase);
|
|
95
100
|
allResults.push({
|
|
96
101
|
testCase: testCase.name,
|
|
97
102
|
model: modelName,
|
|
@@ -100,6 +105,8 @@ export class ExperimentRunner {
|
|
|
100
105
|
success: r.success,
|
|
101
106
|
elapsed: r.elapsed,
|
|
102
107
|
content: r.queryResult?.content || '',
|
|
108
|
+
toolCalls: r.queryResult?.toolCalls,
|
|
109
|
+
finishReason: r.queryResult?.finishReason,
|
|
103
110
|
error: r.error,
|
|
104
111
|
})),
|
|
105
112
|
});
|
|
@@ -124,7 +131,7 @@ export class ExperimentRunner {
|
|
|
124
131
|
/**
|
|
125
132
|
* Run module test with multiple repetitions
|
|
126
133
|
*/
|
|
127
|
-
async runModuleTest(moduleName, compiled, driver) {
|
|
134
|
+
async runModuleTest(moduleName, compiled, driver, testCase) {
|
|
128
135
|
logger.verbose(`Running ${this.repeatCount} time(s) for module: ${moduleName}`);
|
|
129
136
|
const runs = [];
|
|
130
137
|
for (let i = 0; i < this.repeatCount; i++) {
|
|
@@ -134,9 +141,27 @@ export class ExperimentRunner {
|
|
|
134
141
|
const result = await driver.query(compiled, {
|
|
135
142
|
temperature: 0.7,
|
|
136
143
|
maxTokens: 2048,
|
|
144
|
+
...testCase.queryOptions,
|
|
137
145
|
});
|
|
138
146
|
const elapsed = Date.now() - startTime;
|
|
139
147
|
logger.verbose(`Module ${moduleName} run ${i + 1}: Success (${elapsed}ms)`);
|
|
148
|
+
// Display result summary (思考ブロックはプレビューから除外)
|
|
149
|
+
// パターン: <think>...</think> または 先頭から</think>まで(テンプレートが<think>を付与する場合)
|
|
150
|
+
const displayContent = result.content
|
|
151
|
+
.replace(/<think>[\s\S]*?<\/think>\s*/g, '')
|
|
152
|
+
.replace(/^[\s\S]*?<\/think>\s*/g, '');
|
|
153
|
+
const contentPreview = displayContent.length > 200
|
|
154
|
+
? displayContent.substring(0, 200) + '...'
|
|
155
|
+
: displayContent;
|
|
156
|
+
console.log(` ✅ [${moduleName}] run ${i + 1} (${elapsed}ms) finishReason=${result.finishReason || 'unknown'}`);
|
|
157
|
+
if (result.toolCalls && result.toolCalls.length > 0) {
|
|
158
|
+
for (const tc of result.toolCalls) {
|
|
159
|
+
console.log(` 🔧 toolCall: ${tc.name}(${JSON.stringify(tc.arguments)})`);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
if (contentPreview.trim()) {
|
|
163
|
+
console.log(` 📝 ${contentPreview}`);
|
|
164
|
+
}
|
|
140
165
|
runs.push({
|
|
141
166
|
success: true,
|
|
142
167
|
elapsed,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"experiment.js","sourceRoot":"","sources":["../../src/runner/experiment.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAC;AAKhE,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,MAAM,IAAI,UAAU,EAAE,MAAM,cAAc,CAAC;AAEpD,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;AAE5C,MAAM,OAAO,gBAAgB;IAEjB;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IARV,YACU,SAAoB,EACpB,aAA4B,EAC5B,OAA2B,EAC3B,SAAqB,EACrB,MAAiC,EACjC,WAAmB,EACnB,UAA8B,EAC9B,cAAkD;QAPlD,cAAS,GAAT,SAAS,CAAW;QACpB,kBAAa,GAAb,aAAa,CAAe;QAC5B,YAAO,GAAP,OAAO,CAAoB;QAC3B,cAAS,GAAT,SAAS,CAAY;QACrB,WAAM,GAAN,MAAM,CAA2B;QACjC,gBAAW,GAAX,WAAW,CAAQ;QACnB,eAAU,GAAV,UAAU,CAAoB;QAC9B,mBAAc,GAAd,cAAc,CAAoC;IACzD,CAAC;IAEJ;;;;OAIG;IACH,KAAK,CAAC,GAAG;QACP,MAAM,UAAU,GAAiB,EAAE,CAAC;QACpC,MAAM,kBAAkB,GAAwB,EAAE,CAAC;QAEnD,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACtC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;YAC5B,OAAO,CAAC,GAAG,CAAC,cAAc,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;YAC3C,IAAI,QAAQ,CAAC,WAAW,EAAE,CAAC;gBACzB,OAAO,CAAC,GAAG,CAAC,gBAAgB,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;YACtD,CAAC;YACD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;YAC5B,OAAO,CAAC,GAAG,EAAE,CAAC;YAEd,qDAAqD;YACrD,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE;gBAChD,MAAM,CAAC,OAAO,CAAC,gCAAgC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;gBAC9D,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;gBAChD,MAAM,MAAM,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;gBAChD,MAAM,CAAC,OAAO,CAAC,qBAAqB,MAAM,CAAC,IAAI,KAAK,MAAM,CAAC,MAAM,QAAQ,CAAC,CAAC;gBAE3E,OAAO;oBACL,IAAI,EAAE,MAAM,CAAC,IAAI;oBACjB,QAAQ;oBACR,MAAM;iBACP,CAAC;YACJ,CAAC,CAAC,CAAC;YAEH,sCAAsC;YACtC,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC/B,IAAI,CAAC,cAAc,CAAC,eAAe,CAAC,CAAC;YACvC,CAAC;YAED,oDAAoD;YACpD,MAAM,YAAY,GAA6C,QAAQ,CAAC,MAAM;gBAC5E,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;oBACzB,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;oBAC/B,IAAI,CAAC,IAAI,EAAE,CAAC;wBACV,OAAO,CAAC,IAAI,CAAC,cAAc,IAAI,wCAAwC,CAAC,CAAC;wBACzE,OAAO,IAAI,CAAC;oBACd,CAAC;oBACD,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;gBACxB,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAA6C;gBAChE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC;qBACxB,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC;qBACrC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;YAE/C,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC9B,OAAO,CAAC,GAAG,CAAC,oDAAoD,CAAC,CAAC;gBAClE,OAAO,CAAC,GAAG,EAAE,CAAC;gBACd,SAAS;YACX,CAAC;YAED,uBAAuB;YACvB,
|
|
1
|
+
{"version":3,"file":"experiment.js","sourceRoot":"","sources":["../../src/runner/experiment.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAC;AAKhE,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,MAAM,IAAI,UAAU,EAAE,MAAM,cAAc,CAAC;AAEpD,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;AAE5C,MAAM,OAAO,gBAAgB;IAEjB;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IARV,YACU,SAAoB,EACpB,aAA4B,EAC5B,OAA2B,EAC3B,SAAqB,EACrB,MAAiC,EACjC,WAAmB,EACnB,UAA8B,EAC9B,cAAkD;QAPlD,cAAS,GAAT,SAAS,CAAW;QACpB,kBAAa,GAAb,aAAa,CAAe;QAC5B,YAAO,GAAP,OAAO,CAAoB;QAC3B,cAAS,GAAT,SAAS,CAAY;QACrB,WAAM,GAAN,MAAM,CAA2B;QACjC,gBAAW,GAAX,WAAW,CAAQ;QACnB,eAAU,GAAV,UAAU,CAAoB;QAC9B,mBAAc,GAAd,cAAc,CAAoC;IACzD,CAAC;IAEJ;;;;OAIG;IACH,KAAK,CAAC,GAAG;QACP,MAAM,UAAU,GAAiB,EAAE,CAAC;QACpC,MAAM,kBAAkB,GAAwB,EAAE,CAAC;QAEnD,8BAA8B;QAC9B,IAAI,eAAe,GAAkB,IAAI,CAAC;QAE1C,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACtC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;YAC5B,OAAO,CAAC,GAAG,CAAC,cAAc,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;YAC3C,IAAI,QAAQ,CAAC,WAAW,EAAE,CAAC;gBACzB,OAAO,CAAC,GAAG,CAAC,gBAAgB,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;YACtD,CAAC;YACD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;YAC5B,OAAO,CAAC,GAAG,EAAE,CAAC;YAEd,qDAAqD;YACrD,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE;gBAChD,MAAM,CAAC,OAAO,CAAC,gCAAgC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;gBAC9D,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;gBAChD,MAAM,MAAM,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;gBAChD,MAAM,CAAC,OAAO,CAAC,qBAAqB,MAAM,CAAC,IAAI,KAAK,MAAM,CAAC,MAAM,QAAQ,CAAC,CAAC;gBAE3E,OAAO;oBACL,IAAI,EAAE,MAAM,CAAC,IAAI;oBACjB,QAAQ;oBACR,MAAM;iBACP,CAAC;YACJ,CAAC,CAAC,CAAC;YAEH,sCAAsC;YACtC,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC/B,IAAI,CAAC,cAAc,CAAC,eAAe,CAAC,CAAC;YACvC,CAAC;YAED,oDAAoD;YACpD,MAAM,YAAY,GAA6C,QAAQ,CAAC,MAAM;gBAC5E,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;oBACzB,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;oBAC/B,IAAI,CAAC,IAAI,EAAE,CAAC;wBACV,OAAO,CAAC,IAAI,CAAC,cAAc,IAAI,wCAAwC,CAAC,CAAC;wBACzE,OAAO,IAAI,CAAC;oBACd,CAAC;oBACD,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;gBACxB,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAA6C;gBAChE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC;qBACxB,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC;qBACrC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;YAE/C,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC9B,OAAO,CAAC,GAAG,CAAC,oDAAoD,CAAC,CAAC;gBAClE,OAAO,CAAC,GAAG,EAAE,CAAC;gBACd,SAAS;YACX,CAAC;YAED,uBAAuB;YACvB,KAAK,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,YAAY,EAAE,CAAC;gBAChE,qCAAqC;gBACrC,IAAI,eAAe,IAAI,eAAe,KAAK,SAAS,EAAE,CAAC;oBACrD,MAAM,CAAC,IAAI,CAAC,mBAAmB,eAAe,kBAAkB,SAAS,GAAG,CAAC,CAAC;oBAC9E,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;oBAChD,eAAe,GAAG,IAAI,CAAC;gBACzB,CAAC;gBAED,IAAI,eAAe,KAAK,SAAS,EAAE,CAAC;oBAClC,MAAM,CAAC,OAAO,CAAC,sBAAsB,SAAS,EAAE,CAAC,CAAC;gBACpD,CAAC;qBAAM,CAAC;oBACN,MAAM,CAAC,IAAI,CAAC,2BAA2B,SAAS,KAAK,SAAS,CAAC,QAAQ,IAAI,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC;gBACjG,CAAC;gBACD,OAAO,CAAC,GAAG,CAAC,mBAAmB,SAAS,KAAK,SAAS,CAAC,QAAQ,IAAI,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC;gBAEvF,sCAAsC;gBACtC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,WAAW,CAAC,IAAI,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;gBAC1F,eAAe,GAAG,SAAS,CAAC;gBAE5B,mBAAmB;gBACnB,KAAK,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,eAAe,EAAE,CAAC;oBACzD,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;oBAExE,UAAU,CAAC,IAAI,CAAC;wBACd,QAAQ,EAAE,QAAQ,CAAC,IAAI;wBACvB,KAAK,EAAE,SAAS;wBAChB,MAAM,EAAE,IAAI;wBACZ,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;4BACnB,OAAO,EAAE,CAAC,CAAC,OAAO;4BAClB,OAAO,EAAE,CAAC,CAAC,OAAO;4BAClB,OAAO,EAAE,CAAC,CAAC,WAAW,EAAE,OAAO,IAAI,EAAE;4BACrC,SAAS,EAAE,CAAC,CAAC,WAAW,EAAE,SAAS;4BACnC,YAAY,EAAE,CAAC,CAAC,WAAW,EAAE,YAAY;4BACzC,KAAK,EAAE,CAAC,CAAC,KAAK;yBACf,CAAC,CAAC;qBACJ,CAAC,CAAC;oBAEH,iDAAiD;oBACjD,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;oBACnD,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBAC9B,kBAAkB,CAAC,IAAI,CAAC;4BACtB,UAAU,EAAE,IAAI;4BAChB,MAAM;4BACN,IAAI,EAAE,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,WAAW,EAAE,CAAC,CAAC,WAAY,EAAE,CAAC,CAAC;yBACjE,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAED,kDAAkD;QAClD,IAAI,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACzE,MAAM,IAAI,CAAC,kBAAkB,CAAC,kBAAkB,CAAC,CAAC;QACpD,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,aAAa,CACzB,UAAkB,EAClB,QAAa,EACb,MAAW,EACX,QAAkB;QAElB,MAAM,CAAC,OAAO,CAAC,WAAW,IAAI,CAAC,WAAW,wBAAwB,UAAU,EAAE,CAAC,CAAC;QAEhF,MAAM,IAAI,GAA4F,EAAE,CAAC;QAEzG,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,WAAW,gBAAgB,UAAU,EAAE,CAAC,CAAC;YAE7E,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC7B,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,QAAQ,EAAE;oBAC1C,WAAW,EAAE,GAAG;oBAChB,SAAS,EAAE,IAAI;oBACf,GAAG,QAAQ,CAAC,YAAY;iBACzB,CAAC,CAAC;gBACH,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;gBAEvC,MAAM,CAAC,OAAO,CAAC,UAAU,UAAU,QAAQ,CAAC,GAAG,CAAC,cAAc,OAAO,KAAK,CAAC,CAAC;gBAE5E,4CAA4C;gBAC5C,qEAAqE;gBACrE,MAAM,cAAc,GAAG,MAAM,CAAC,OAAO;qBAClC,OAAO,CAAC,8BAA8B,EAAE,EAAE,CAAC;qBAC3C,OAAO,CAAC,wBAAwB,EAAE,EAAE,CAAC,CAAC;gBACzC,MAAM,cAAc,GAAG,cAAc,CAAC,MAAM,GAAG,GAAG;oBAChD,CAAC,CAAC,cAAc,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,KAAK;oBAC1C,CAAC,CAAC,cAAc,CAAC;gBACnB,OAAO,CAAC,GAAG,CAAC,SAAS,UAAU,SAAS,CAAC,GAAG,CAAC,KAAK,OAAO,oBAAoB,MAAM,CAAC,YAAY,IAAI,SAAS,EAAE,CAAC,CAAC;gBACjH,IAAI,MAAM,CAAC,SAAS,IAAI,MAAM,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACpD,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;wBAClC,OAAO,CAAC,GAAG,CAAC,sBAAsB,EAAE,CAAC,IAAI,IAAI,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;oBAChF,CAAC;gBACH,CAAC;gBACD,IAAI,cAAc,CAAC,IAAI,EAAE,EAAE,CAAC;oBAC1B,OAAO,CAAC,GAAG,CAAC,YAAY,cAAc,EAAE,CAAC,CAAC;gBAC5C,CAAC;gBAED,IAAI,CAAC,IAAI,CAAC;oBACR,OAAO,EAAE,IAAI;oBACb,OAAO;oBACP,WAAW,EAAE,MAAM;iBACpB,CAAC,CAAC;YACL,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;gBACvC,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAC5E,MAAM,CAAC,KAAK,CAAC,UAAU,UAAU,QAAQ,CAAC,GAAG,CAAC,YAAY,OAAO,QAAQ,YAAY,EAAE,CAAC,CAAC;gBACzF,IAAI,CAAC,IAAI,CAAC;oBACR,OAAO,EAAE,KAAK;oBACd,OAAO;oBACP,KAAK,EAAE,YAAY;iBACpB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,kBAAkB,CAC9B,kBAAuC;QAEvC,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,EAAE,CAAC;QAEd,MAAM,eAAe,GAAG,IAAI,eAAe,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,cAAe,CAAC,IAAI,CAAC,CAAC;QACvF,MAAM,cAAc,GAAuB,EAAE,CAAC;QAE9C,2CAA2C;QAC3C,KAAK,MAAM,OAAO,IAAI,kBAAkB,EAAE,CAAC;YACzC,OAAO,CAAC,GAAG,CAAC,kBAAkB,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC;YACpD,OAAO,CAAC,GAAG,EAAE,CAAC;YAEd,KAAK,MAAM,SAAS,IAAI,IAAI,CAAC,UAAW,EAAE,CAAC;gBACzC,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,QAAQ,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;gBAClE,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;QAED,iCAAiC;QACjC,eAAe,CAAC,cAAc,CAAC,cAAc,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;IAClE,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,eAAwD;QAC7E,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QAErC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,eAAe,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAChD,MAAM,OAAO,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC;YAEnC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,eAAe,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACpD,MAAM,OAAO,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC;gBAEnC,IAAI,OAAO,CAAC,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC;oBACtC,OAAO,CAAC,GAAG,CAAC,SAAS,OAAO,CAAC,IAAI,UAAU,OAAO,CAAC,IAAI,iBAAiB,CAAC,CAAC;gBAC5E,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,GAAG,CAAC,WAAW,OAAO,CAAC,IAAI,UAAU,OAAO,CAAC,IAAI,UAAU,CAAC,CAAC;oBACrE,MAAM,CAAC,OAAO,CAAC,4BAA4B,CAAC,CAAC;oBAC7C,MAAM,CAAC,OAAO,CAAC,KAAK,OAAO,CAAC,IAAI,KAAK,OAAO,CAAC,MAAM,CAAC,MAAM,QAAQ,CAAC,CAAC;oBACpE,MAAM,CAAC,OAAO,CAAC,KAAK,OAAO,CAAC,IAAI,KAAK,OAAO,CAAC,MAAM,CAAC,MAAM,QAAQ,CAAC,CAAC;oBACpE,MAAM,CAAC,OAAO,CAAC,WAAW,OAAO,CAAC,MAAM,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,QAAQ,CAAC,CAAC;oBAEjF,uCAAuC;oBACvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;wBAChF,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;4BAC5C,MAAM,CAAC,OAAO,CAAC,4BAA4B,CAAC,GAAG,CAAC,CAAC;4BACjD,MAAM,CAAC,OAAO,CAAC,OAAO,OAAO,CAAC,IAAI,KAAK,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC;4BAC9F,MAAM,CAAC,OAAO,CAAC,OAAO,OAAO,CAAC,IAAI,KAAK,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC;4BAC9F,MAAM;wBACR,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,CAAC,GAAG,EAAE,CAAC;IAChB,CAAC;CACF"}
|
package/dist/types.d.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Common type definitions for module comparison experiments
|
|
3
3
|
*/
|
|
4
4
|
import type { PromptModule } from '@modular-prompt/core';
|
|
5
|
-
import type { QueryResult } from '@modular-prompt/driver';
|
|
5
|
+
import type { QueryResult, QueryOptions, ToolCall } from '@modular-prompt/driver';
|
|
6
6
|
/**
|
|
7
7
|
* Test case definition
|
|
8
8
|
*/
|
|
@@ -15,6 +15,8 @@ export interface TestCase {
|
|
|
15
15
|
input: any;
|
|
16
16
|
/** Model names to use for this test case (optional, uses all enabled models if not specified) */
|
|
17
17
|
models?: string[];
|
|
18
|
+
/** Query options for this test case (tools, temperature, etc.) */
|
|
19
|
+
queryOptions?: Partial<QueryOptions>;
|
|
18
20
|
}
|
|
19
21
|
/**
|
|
20
22
|
* Result of a single run
|
|
@@ -23,6 +25,8 @@ export interface RunResult {
|
|
|
23
25
|
success: boolean;
|
|
24
26
|
elapsed: number;
|
|
25
27
|
content: string;
|
|
28
|
+
toolCalls?: ToolCall[];
|
|
29
|
+
finishReason?: string;
|
|
26
30
|
error?: string;
|
|
27
31
|
}
|
|
28
32
|
/**
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAElF;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,qBAAqB;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,4BAA4B;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,+CAA+C;IAC/C,KAAK,EAAE,GAAG,CAAC;IACX,iGAAiG;IACjG,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,kEAAkE;IAClE,YAAY,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC;CACtC;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,QAAQ,EAAE,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,SAAS,EAAE,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,CAAC,OAAO,EAAE,GAAG,KAAK,GAAG,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,KAAK,CAAC;QACV,WAAW,EAAE,WAAW,CAAC;KAC1B,CAAC,CAAC;CACJ;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC9B,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb;;;;;;;;;OASG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,CAAC,OAAO,EAAE,iBAAiB,KAAK,OAAO,CAAC,gBAAgB,CAAC,CAAC;CACrE;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb;;;;;;;;;OASG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,YAAY,CAAC,iBAAiB,CAAC,CAAC;CACzC;AAED;;GAEG;AACH,MAAM,MAAM,kBAAkB,GAC1B;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,GACpD;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,YAAY,CAAC,iBAAiB,CAAC,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,GAC/E;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAE3C;;GAEG;AACH,MAAM,WAAW,yBAA0B,SAAQ,iBAAiB;IAClE,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB"}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# Tools実験設定
|
|
2
|
+
# MLXドライバーでのtool calling動作確認
|
|
3
|
+
|
|
4
|
+
models:
|
|
5
|
+
qwen3-4b:
|
|
6
|
+
model: "mlx-community/Qwen3-4B-Thinking-2507-heretic-8bit"
|
|
7
|
+
provider: "mlx"
|
|
8
|
+
capabilities: ["local", "tools"]
|
|
9
|
+
priority: 10
|
|
10
|
+
gemma3-12b:
|
|
11
|
+
model: "mlx-community/gemma-3-12b-it-qat-4bit"
|
|
12
|
+
provider: "mlx"
|
|
13
|
+
capabilities: ["local", "tools"]
|
|
14
|
+
priority: 20
|
|
15
|
+
lfm2.5-jp:
|
|
16
|
+
model: LiquidAI/LFM2.5-1.2B-JP-MLX-8bit
|
|
17
|
+
provider: "mlx"
|
|
18
|
+
capabilities: ["local", "fast", "japanese"]
|
|
19
|
+
priority: 20
|
|
20
|
+
lfm2.5-instruct:
|
|
21
|
+
model: LiquidAI/LFM2.5-1.2B-Instruct-MLX-8bit
|
|
22
|
+
provider: "mlx"
|
|
23
|
+
capabilities: ["local", "fast", "japanese"]
|
|
24
|
+
priority: 20
|
|
25
|
+
disabled: true
|
|
26
|
+
lfm2.5-thinking:
|
|
27
|
+
model: LiquidAI/LFM2.5-1.2B-Thinking-MLX-8bit
|
|
28
|
+
provider: "mlx"
|
|
29
|
+
capabilities: ["local", "fast", "japanese"]
|
|
30
|
+
priority: 20
|
|
31
|
+
disabled: true
|
|
32
|
+
|
|
33
|
+
drivers:
|
|
34
|
+
mlx: {}
|
|
35
|
+
|
|
36
|
+
modules:
|
|
37
|
+
- name: tools-test
|
|
38
|
+
path: ./tools-test-module.mjs
|
|
39
|
+
description: "ツール呼び出し実験用"
|
|
40
|
+
|
|
41
|
+
testCases:
|
|
42
|
+
# --- gemma3-12b ---
|
|
43
|
+
- name: "[gemma3] 天気ツール呼び出し"
|
|
44
|
+
description: "get_weatherツールを呼び出すことを期待"
|
|
45
|
+
models: ["gemma3-12b"]
|
|
46
|
+
input:
|
|
47
|
+
question: "東京の天気を教えてください。"
|
|
48
|
+
queryOptions: &tool_weather
|
|
49
|
+
temperature: 0.3
|
|
50
|
+
maxTokens: 512
|
|
51
|
+
tools: &tools_def
|
|
52
|
+
- name: get_weather
|
|
53
|
+
description: "指定された場所の現在の天気を取得する"
|
|
54
|
+
parameters:
|
|
55
|
+
type: object
|
|
56
|
+
properties:
|
|
57
|
+
location:
|
|
58
|
+
type: string
|
|
59
|
+
description: "天気を取得する場所(都市名)"
|
|
60
|
+
required:
|
|
61
|
+
- location
|
|
62
|
+
toolChoice: auto
|
|
63
|
+
|
|
64
|
+
- name: "[gemma3] ツール不要の質問"
|
|
65
|
+
description: "ツールを呼び出さずにテキストで回答することを期待"
|
|
66
|
+
models: ["gemma3-12b"]
|
|
67
|
+
input:
|
|
68
|
+
question: "1 + 1 は何ですか?"
|
|
69
|
+
queryOptions: &tool_math
|
|
70
|
+
temperature: 0.3
|
|
71
|
+
maxTokens: 1024
|
|
72
|
+
tools: *tools_def
|
|
73
|
+
toolChoice: auto
|
|
74
|
+
|
|
75
|
+
# # --- qwen3-4b ---
|
|
76
|
+
# - name: "[qwen3] 天気ツール呼び出し"
|
|
77
|
+
# description: "get_weatherツールを呼び出すことを期待"
|
|
78
|
+
# models: ["qwen3-4b"]
|
|
79
|
+
# input:
|
|
80
|
+
# question: "東京の天気を教えてください。"
|
|
81
|
+
# queryOptions: *tool_weather
|
|
82
|
+
|
|
83
|
+
# - name: "[qwen3] ツール不要の質問"
|
|
84
|
+
# description: "ツールを呼び出さずにテキストで回答することを期待"
|
|
85
|
+
# models: ["qwen3-4b"]
|
|
86
|
+
# input:
|
|
87
|
+
# question: "1 + 1 は何ですか?"
|
|
88
|
+
# queryOptions: *tool_math
|
|
89
|
+
|
|
90
|
+
# --- lfm2.5 jp ---
|
|
91
|
+
- name: "[lfm2.5] 天気ツール呼び出し"
|
|
92
|
+
description: "get_weatherツールを呼び出すことを期待"
|
|
93
|
+
models: ["lfm2.5-instruct"]
|
|
94
|
+
input:
|
|
95
|
+
question: "東京の天気を教えてください。"
|
|
96
|
+
queryOptions: *tool_weather
|
|
97
|
+
|
|
98
|
+
- name: "[lfm2.5] ツール不要の質問"
|
|
99
|
+
description: "ツールを呼び出さずにテキストで回答することを期待"
|
|
100
|
+
models: ["lfm2.5-instruct"]
|
|
101
|
+
input:
|
|
102
|
+
question: "1 + 1 は何ですか?"
|
|
103
|
+
queryOptions: *tool_math
|
|
104
|
+
|
|
105
|
+
evaluators: []
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tools実験用モジュール
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { compile } from '@modular-prompt/core';
|
|
6
|
+
|
|
7
|
+
export default {
|
|
8
|
+
name: 'tools-test',
|
|
9
|
+
description: 'ツール呼び出し実験用モジュール',
|
|
10
|
+
compile: (context) => {
|
|
11
|
+
return compile({
|
|
12
|
+
objective: [
|
|
13
|
+
'- あなたは利用者からの質問に答えるアシスタントです。',
|
|
14
|
+
],
|
|
15
|
+
instructions: [
|
|
16
|
+
'- 質問の内容に応じて、適切なツールを使ってください。',
|
|
17
|
+
' - ツールの結果が返ってくるまで、推測で答えないでください。',
|
|
18
|
+
'- 必要がない場合は通常の応答を返します。',
|
|
19
|
+
],
|
|
20
|
+
messages: [
|
|
21
|
+
{
|
|
22
|
+
type: 'message',
|
|
23
|
+
role: 'user',
|
|
24
|
+
content: context.question || 'Hello',
|
|
25
|
+
},
|
|
26
|
+
],
|
|
27
|
+
});
|
|
28
|
+
},
|
|
29
|
+
};
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tools実験用モジュール
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { compile } from '@modular-prompt/core';
|
|
6
|
+
|
|
7
|
+
export default {
|
|
8
|
+
name: 'tools-test',
|
|
9
|
+
description: 'ツール呼び出し実験用モジュール',
|
|
10
|
+
compile: (context: any) => {
|
|
11
|
+
return compile({
|
|
12
|
+
objective: [
|
|
13
|
+
'あなたはツールを使って質問に答えるアシスタントです。',
|
|
14
|
+
'必要に応じてツールを呼び出してください。',
|
|
15
|
+
],
|
|
16
|
+
instructions: [
|
|
17
|
+
'質問の内容に応じて、適切なツールを使ってください。',
|
|
18
|
+
'ツールの結果が返ってくるまで、推測で答えないでください。',
|
|
19
|
+
],
|
|
20
|
+
messages: [
|
|
21
|
+
{
|
|
22
|
+
type: 'message' as const,
|
|
23
|
+
role: 'user' as const,
|
|
24
|
+
content: context.question || 'Hello',
|
|
25
|
+
},
|
|
26
|
+
],
|
|
27
|
+
});
|
|
28
|
+
},
|
|
29
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@modular-prompt/experiment",
|
|
3
|
-
"version": "0.1
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"description": "Experiment framework for comparing and evaluating prompt modules",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
"files": [
|
|
18
18
|
"dist",
|
|
19
19
|
"examples",
|
|
20
|
+
"skills",
|
|
20
21
|
"README.md"
|
|
21
22
|
],
|
|
22
23
|
"dependencies": {
|
|
@@ -24,9 +25,9 @@
|
|
|
24
25
|
"jiti": "^2.4.2",
|
|
25
26
|
"yaml": "^2.3.4",
|
|
26
27
|
"zod": "^3.22.4",
|
|
27
|
-
"@modular-prompt/core": "0.1.
|
|
28
|
-
"@modular-prompt/driver": "0.
|
|
29
|
-
"@modular-prompt/utils": "0.2.
|
|
28
|
+
"@modular-prompt/core": "0.1.13",
|
|
29
|
+
"@modular-prompt/driver": "0.8.1",
|
|
30
|
+
"@modular-prompt/utils": "0.2.4"
|
|
30
31
|
},
|
|
31
32
|
"devDependencies": {
|
|
32
33
|
"@eslint/js": "^9.34.0",
|
|
@@ -64,7 +65,8 @@
|
|
|
64
65
|
"test": "vitest",
|
|
65
66
|
"test:ui": "vitest --ui",
|
|
66
67
|
"test:run": "vitest run",
|
|
67
|
-
"
|
|
68
|
+
"copy-skills": "mkdir -p skills/experiment && cp ../../skills/experiment/SKILL.md skills/experiment/SKILL.md",
|
|
69
|
+
"clean": "rm -rf dist skills",
|
|
68
70
|
"lint": "eslint src",
|
|
69
71
|
"typecheck": "tsc --noEmit"
|
|
70
72
|
}
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: experiment
|
|
3
|
+
description: modular-promptの実験フレームワーク(@modular-prompt/experiment)の使い方ガイド。プロンプトモジュールの比較・評価実験の設定、実行、評価器の定義を参照する。
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# 実験フレームワーク使い方ガイド
|
|
7
|
+
|
|
8
|
+
## 実験フレームワークとは
|
|
9
|
+
|
|
10
|
+
`@modular-prompt/experiment` は、複数のプロンプトモジュールを同一条件下で比較・評価するためのフレームワーク。YAML設定で実験を定義し、CLIまたはプログラマティックに実行できる。
|
|
11
|
+
|
|
12
|
+
### ユースケース
|
|
13
|
+
|
|
14
|
+
- **プロンプト比較**: 異なるプロンプト構造の効果を比較検証
|
|
15
|
+
- **モジュール分離検証**: モジュール化したプロンプトが同等の出力を生成するか確認
|
|
16
|
+
- **品質評価**: 繰り返し実行による出力の安定性・一貫性の評価
|
|
17
|
+
- **マルチモデルテスト**: 異なるLLMプロバイダーでの動作比較
|
|
18
|
+
|
|
19
|
+
## CLI
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
# 設定検証・実行計画表示(まずこれで確認)
|
|
23
|
+
npx modular-experiment config.yaml --dry-run
|
|
24
|
+
|
|
25
|
+
# 実験実行
|
|
26
|
+
npx modular-experiment config.yaml
|
|
27
|
+
|
|
28
|
+
# 評価付き実行
|
|
29
|
+
npx modular-experiment config.yaml --evaluate
|
|
30
|
+
|
|
31
|
+
# 複数回実行(統計用)
|
|
32
|
+
npx modular-experiment config.yaml --repeat 10
|
|
33
|
+
|
|
34
|
+
# 特定モジュール・テストケースのみ
|
|
35
|
+
npx modular-experiment config.yaml --modules my-module --test-case "Basic Test"
|
|
36
|
+
|
|
37
|
+
# 詳細ログ出力
|
|
38
|
+
npx modular-experiment config.yaml --log-file experiment.jsonl --verbose
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### CLIオプション
|
|
42
|
+
|
|
43
|
+
| オプション | 説明 |
|
|
44
|
+
|-----------|------|
|
|
45
|
+
| `<config>` | 設定ファイルパス(YAML or TypeScript) |
|
|
46
|
+
| `--dry-run` | 実行計画のみ表示 |
|
|
47
|
+
| `--evaluate` | 評価フェーズを有効化 |
|
|
48
|
+
| `--repeat <count>` | 繰り返し回数(デフォルト: 1) |
|
|
49
|
+
| `--modules <names>` | カンマ区切りモジュール名フィルター |
|
|
50
|
+
| `--test-case <name>` | テストケース名フィルター |
|
|
51
|
+
| `--model <provider>` | モデルプロバイダーフィルター |
|
|
52
|
+
| `--evaluators <names>` | カンマ区切り評価器名フィルター |
|
|
53
|
+
| `--log-file <path>` | JSONLログファイルパス |
|
|
54
|
+
| `--verbose` | 詳細な内部操作を表示 |
|
|
55
|
+
|
|
56
|
+
## 設定ファイル(YAML)
|
|
57
|
+
|
|
58
|
+
```yaml
|
|
59
|
+
# モデル定義
|
|
60
|
+
models:
|
|
61
|
+
gpt4o:
|
|
62
|
+
provider: openai
|
|
63
|
+
model: gpt-4o
|
|
64
|
+
capabilities: ["streaming", "tools", "structured"]
|
|
65
|
+
enabled: true
|
|
66
|
+
gemini:
|
|
67
|
+
provider: vertexai
|
|
68
|
+
model: gemini-2.0-flash-001
|
|
69
|
+
capabilities: ["tools", "fast"]
|
|
70
|
+
enabled: true
|
|
71
|
+
|
|
72
|
+
# ドライバー認証設定
|
|
73
|
+
drivers:
|
|
74
|
+
openai:
|
|
75
|
+
apiKey: ${OPENAI_API_KEY} # 環境変数
|
|
76
|
+
vertexai:
|
|
77
|
+
projectId: my-gcp-project
|
|
78
|
+
location: us-central1
|
|
79
|
+
|
|
80
|
+
# デフォルトオプション
|
|
81
|
+
defaultOptions:
|
|
82
|
+
temperature: 0.7
|
|
83
|
+
maxTokens: 2048
|
|
84
|
+
|
|
85
|
+
# テスト対象モジュール
|
|
86
|
+
modules:
|
|
87
|
+
- name: baseline
|
|
88
|
+
path: ./modules/baseline.ts # 設定ファイルからの相対パス
|
|
89
|
+
description: ベースラインプロンプト
|
|
90
|
+
- name: optimized
|
|
91
|
+
path: ./modules/optimized.ts
|
|
92
|
+
description: 最適化版プロンプト
|
|
93
|
+
|
|
94
|
+
# テストケース
|
|
95
|
+
testCases:
|
|
96
|
+
- name: 基本テスト
|
|
97
|
+
description: 基本的な動作確認
|
|
98
|
+
input: # module.compile に渡すコンテキスト
|
|
99
|
+
query: "TypeScriptの型推論について説明して"
|
|
100
|
+
models: [gpt4o] # オプション: 未指定時は全有効モデル
|
|
101
|
+
queryOptions: # オプション
|
|
102
|
+
temperature: 0.5
|
|
103
|
+
|
|
104
|
+
- name: ツール呼び出しテスト
|
|
105
|
+
input:
|
|
106
|
+
query: "東京の天気を調べて"
|
|
107
|
+
queryOptions:
|
|
108
|
+
tools:
|
|
109
|
+
- name: get_weather
|
|
110
|
+
description: 天気を取得
|
|
111
|
+
parameters:
|
|
112
|
+
type: object
|
|
113
|
+
properties:
|
|
114
|
+
city: { type: string }
|
|
115
|
+
required: [city]
|
|
116
|
+
|
|
117
|
+
# 評価器
|
|
118
|
+
evaluators:
|
|
119
|
+
- name: structured-output-presence # ビルトイン
|
|
120
|
+
- name: llm-requirement-fulfillment # ビルトイン
|
|
121
|
+
- name: custom-eval # 外部ファイル
|
|
122
|
+
path: ./evaluators/custom-eval.ts
|
|
123
|
+
|
|
124
|
+
# 評価設定
|
|
125
|
+
evaluation:
|
|
126
|
+
enabled: true
|
|
127
|
+
model: gpt4o # 評価に使うモデル
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### パス解決
|
|
131
|
+
|
|
132
|
+
設定ファイル内のパス(modules, evaluators等)は設定ファイルのディレクトリからの相対パスで解決される。`~/` でホームディレクトリ、絶対パスも使用可能。
|
|
133
|
+
|
|
134
|
+
## モジュール定義
|
|
135
|
+
|
|
136
|
+
テスト対象のモジュールファイル:
|
|
137
|
+
|
|
138
|
+
```typescript
|
|
139
|
+
import { compile } from '@modular-prompt/core';
|
|
140
|
+
import { myPromptModule } from './prompts.js';
|
|
141
|
+
|
|
142
|
+
export default {
|
|
143
|
+
name: 'My Module',
|
|
144
|
+
description: 'テスト対象のプロンプトモジュール',
|
|
145
|
+
compile: (context: any) => compile(myPromptModule, context),
|
|
146
|
+
};
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
`compile` 関数はテストケースの `input` をコンテキストとして受け取り、CompiledPrompt を返す。
|
|
150
|
+
|
|
151
|
+
## 評価器
|
|
152
|
+
|
|
153
|
+
### ビルトイン評価器
|
|
154
|
+
|
|
155
|
+
**structured-output-presence** - コード評価器
|
|
156
|
+
- `structuredOutput` の存在と有効性を検証
|
|
157
|
+
- スコア: `(validCount / totalRuns) * 10`
|
|
158
|
+
|
|
159
|
+
**llm-requirement-fulfillment** - プロンプト評価器
|
|
160
|
+
- LLMが要件充足度を包括的に評価
|
|
161
|
+
- 評価基準: 要件充足度、パラメータ正確性、パラメータ完全性、論理的一貫性
|
|
162
|
+
- 評価用モデルの設定が必要(`evaluation.model`)
|
|
163
|
+
|
|
164
|
+
### カスタム評価器(コード)
|
|
165
|
+
|
|
166
|
+
```typescript
|
|
167
|
+
import type { CodeEvaluator, EvaluationContext, EvaluationResult } from '@modular-prompt/experiment';
|
|
168
|
+
|
|
169
|
+
export default {
|
|
170
|
+
name: 'json-validator',
|
|
171
|
+
description: 'JSON構造を検証',
|
|
172
|
+
|
|
173
|
+
async evaluate(context: EvaluationContext): Promise<EvaluationResult> {
|
|
174
|
+
const allValid = context.runs.every(run =>
|
|
175
|
+
run.queryResult.structuredOutput != null
|
|
176
|
+
);
|
|
177
|
+
return {
|
|
178
|
+
evaluator: 'json-validator',
|
|
179
|
+
moduleName: context.moduleName,
|
|
180
|
+
score: allValid ? 10 : 0,
|
|
181
|
+
reasoning: allValid ? '全実行でJSON出力あり' : 'JSON出力なし',
|
|
182
|
+
};
|
|
183
|
+
},
|
|
184
|
+
} satisfies CodeEvaluator;
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### カスタム評価器(プロンプト)
|
|
188
|
+
|
|
189
|
+
LLMに評価させる場合:
|
|
190
|
+
|
|
191
|
+
```typescript
|
|
192
|
+
import type { PromptEvaluator, EvaluationContext } from '@modular-prompt/experiment';
|
|
193
|
+
import type { PromptModule } from '@modular-prompt/core';
|
|
194
|
+
|
|
195
|
+
const evaluationModule: PromptModule<EvaluationContext> = {
|
|
196
|
+
createContext: () => ({ moduleName: '', prompt: '', runs: [] }),
|
|
197
|
+
objective: ['出力の品質を0-10で評価する'],
|
|
198
|
+
instructions: [
|
|
199
|
+
'- 明確さ、正確さ、完全性を基準にする',
|
|
200
|
+
(ctx) => `対象モジュール: ${ctx.moduleName}`,
|
|
201
|
+
(ctx) => ctx.runs.map((run, i) =>
|
|
202
|
+
`実行${i + 1}: ${run.queryResult.content.slice(0, 500)}`
|
|
203
|
+
),
|
|
204
|
+
],
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
export default {
|
|
208
|
+
name: 'quality-evaluator',
|
|
209
|
+
description: '出力品質を評価',
|
|
210
|
+
module: evaluationModule, // baseEvaluationModuleと自動マージされる
|
|
211
|
+
} satisfies PromptEvaluator;
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## 主要な型
|
|
215
|
+
|
|
216
|
+
### TestCase
|
|
217
|
+
|
|
218
|
+
```typescript
|
|
219
|
+
interface TestCase {
|
|
220
|
+
name: string;
|
|
221
|
+
description?: string;
|
|
222
|
+
input: any; // module.compileに渡すコンテキスト
|
|
223
|
+
models?: string[]; // 未指定時は全有効モデル
|
|
224
|
+
queryOptions?: Partial<QueryOptions>;
|
|
225
|
+
}
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### EvaluationContext
|
|
229
|
+
|
|
230
|
+
```typescript
|
|
231
|
+
interface EvaluationContext {
|
|
232
|
+
moduleName: string;
|
|
233
|
+
prompt: string; // コンパイル済みプロンプト(文字列化)
|
|
234
|
+
runs: Array<{
|
|
235
|
+
queryResult: QueryResult;
|
|
236
|
+
}>;
|
|
237
|
+
}
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
### EvaluationResult
|
|
241
|
+
|
|
242
|
+
```typescript
|
|
243
|
+
interface EvaluationResult {
|
|
244
|
+
evaluator: string;
|
|
245
|
+
moduleName: string;
|
|
246
|
+
score?: number; // 0-10
|
|
247
|
+
reasoning?: string;
|
|
248
|
+
details?: Record<string, any>;
|
|
249
|
+
error?: string;
|
|
250
|
+
}
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
## プログラマティック使用
|
|
254
|
+
|
|
255
|
+
```typescript
|
|
256
|
+
import {
|
|
257
|
+
loadExperimentConfig,
|
|
258
|
+
loadModules,
|
|
259
|
+
loadEvaluators,
|
|
260
|
+
ExperimentRunner,
|
|
261
|
+
DriverManager,
|
|
262
|
+
} from '@modular-prompt/experiment';
|
|
263
|
+
|
|
264
|
+
const { serverConfig, aiService, configDir } = loadExperimentConfig('config.yaml');
|
|
265
|
+
const modules = await loadModules(serverConfig.modules, configDir);
|
|
266
|
+
const evaluators = await loadEvaluators(serverConfig.evaluators, configDir);
|
|
267
|
+
|
|
268
|
+
const driverManager = new DriverManager();
|
|
269
|
+
const runner = new ExperimentRunner(
|
|
270
|
+
aiService,
|
|
271
|
+
driverManager,
|
|
272
|
+
modules,
|
|
273
|
+
serverConfig.testCases,
|
|
274
|
+
serverConfig.models,
|
|
275
|
+
5, // repeat count
|
|
276
|
+
evaluators,
|
|
277
|
+
evaluatorModel
|
|
278
|
+
);
|
|
279
|
+
|
|
280
|
+
const results = await runner.run();
|
|
281
|
+
await driverManager.cleanup();
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
## 実験フロー
|
|
285
|
+
|
|
286
|
+
```
|
|
287
|
+
設定ロード → モジュール・評価器ロード → テストケースごとに:
|
|
288
|
+
全モジュールをコンパイル → プロンプト比較 → 各モデルで実行(繰り返し対応)
|
|
289
|
+
→ 評価フェーズ(オプション) → 統計レポート生成 → クリーンアップ
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
DriverManagerがモデル名をキーにドライバーをキャッシュし、同じモデルであればドライバーを再利用する。異なるモデルに切り替わると前のドライバーをcloseできる。これはローカルLLM(MLX等)のメモリ消費を抑えるための設計。
|