@modular-prompt/experiment 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -369
- package/dist/config/dynamic-loader.js +2 -2
- package/dist/config/dynamic-loader.js.map +1 -1
- package/dist/runner/experiment.d.ts +8 -4
- package/dist/runner/experiment.d.ts.map +1 -1
- package/dist/runner/experiment.js +112 -106
- package/dist/runner/experiment.js.map +1 -1
- package/dist/types.d.ts +1 -1
- package/dist/types.d.ts.map +1 -1
- package/examples/tools-experiment.yaml +50 -37
- package/examples/tools-test-module.mjs +18 -24
- package/examples/tools-test-module.ts +19 -23
- package/package.json +8 -5
- package/skills/experiment/SKILL.md +313 -0
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Experiment runner - orchestrates the entire experiment
|
|
3
3
|
*/
|
|
4
|
+
import { compile } from '@modular-prompt/core';
|
|
4
5
|
import { formatCompletionPrompt } from '@modular-prompt/driver';
|
|
6
|
+
import { defaultProcess } from '@modular-prompt/process';
|
|
5
7
|
import { EvaluatorRunner } from './evaluator.js';
|
|
6
8
|
import { logger as baseLogger } from '../logger.js';
|
|
7
9
|
const logger = baseLogger.context('runner');
|
|
@@ -30,35 +32,28 @@ export class ExperimentRunner {
|
|
|
30
32
|
* @returns Array of TestResult
|
|
31
33
|
*/
|
|
32
34
|
async run() {
|
|
33
|
-
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
// Phase 1: テスト計画の生成
|
|
36
|
+
const plan = this.buildTestPlan();
|
|
37
|
+
if (plan.length === 0) {
|
|
38
|
+
console.log('No test plan items generated.');
|
|
39
|
+
return [];
|
|
40
|
+
}
|
|
41
|
+
// Phase 2: モデルごとにグループ化して実行
|
|
42
|
+
const { results, evaluationContexts } = await this.executePlan(plan);
|
|
43
|
+
// Phase 3: 評価フェーズ
|
|
44
|
+
if (this.evaluators && this.evaluators.length > 0 && this.evaluatorModel) {
|
|
45
|
+
await this.runEvaluationPhase(evaluationContexts);
|
|
46
|
+
}
|
|
47
|
+
return results;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Build test plan: expand all testCase × model × module combinations
|
|
51
|
+
*/
|
|
52
|
+
buildTestPlan() {
|
|
53
|
+
const plan = [];
|
|
54
|
+
let order = 0;
|
|
37
55
|
for (const testCase of this.testCases) {
|
|
38
|
-
|
|
39
|
-
console.log(`Test Case: ${testCase.name}`);
|
|
40
|
-
if (testCase.description) {
|
|
41
|
-
console.log(`Description: ${testCase.description}`);
|
|
42
|
-
}
|
|
43
|
-
console.log('─'.repeat(80));
|
|
44
|
-
console.log();
|
|
45
|
-
// Compile all modules with testCase.input as context
|
|
46
|
-
const compiledModules = this.modules.map(module => {
|
|
47
|
-
logger.verbose(`Compiling prompt for module: ${module.name}`);
|
|
48
|
-
const compiled = module.compile(testCase.input);
|
|
49
|
-
const prompt = formatCompletionPrompt(compiled);
|
|
50
|
-
logger.verbose(`Prompt length for ${module.name}: ${prompt.length} chars`);
|
|
51
|
-
return {
|
|
52
|
-
name: module.name,
|
|
53
|
-
compiled,
|
|
54
|
-
prompt,
|
|
55
|
-
};
|
|
56
|
-
});
|
|
57
|
-
// Compare prompts if multiple modules
|
|
58
|
-
if (compiledModules.length > 1) {
|
|
59
|
-
this.comparePrompts(compiledModules);
|
|
60
|
-
}
|
|
61
|
-
// Determine which models to test with this testCase
|
|
56
|
+
// テストケースで使うモデルを決定
|
|
62
57
|
const modelsToTest = testCase.models
|
|
63
58
|
? testCase.models.map(name => {
|
|
64
59
|
const spec = this.models[name];
|
|
@@ -71,36 +66,62 @@ export class ExperimentRunner {
|
|
|
71
66
|
: Object.entries(this.models)
|
|
72
67
|
.filter(([_, spec]) => !spec.disabled)
|
|
73
68
|
.map(([name, spec]) => ({ name, spec }));
|
|
74
|
-
if (modelsToTest.length === 0) {
|
|
75
|
-
console.log('⚠️ No models to test for this test case, skipping');
|
|
76
|
-
console.log();
|
|
77
|
-
continue;
|
|
78
|
-
}
|
|
79
|
-
// Test with each model
|
|
80
69
|
for (const { name: modelName, spec: modelSpec } of modelsToTest) {
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
70
|
+
for (const module of this.modules) {
|
|
71
|
+
// compile for logging/evaluation purposes
|
|
72
|
+
const compiled = compile(module.module, testCase.input);
|
|
73
|
+
const prompt = formatCompletionPrompt(compiled);
|
|
74
|
+
plan.push({
|
|
75
|
+
order: order++,
|
|
76
|
+
testCase,
|
|
77
|
+
modelName,
|
|
78
|
+
modelSpec,
|
|
79
|
+
module,
|
|
80
|
+
prompt,
|
|
81
|
+
});
|
|
89
82
|
}
|
|
90
|
-
|
|
91
|
-
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
logger.info(`Test plan: ${plan.length} items (${this.testCases.length} test cases × models × ${this.modules.length} modules)`);
|
|
86
|
+
return plan;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Execute test plan grouped by model
|
|
90
|
+
*/
|
|
91
|
+
async executePlan(plan) {
|
|
92
|
+
const allResults = [];
|
|
93
|
+
const allEvalContexts = [];
|
|
94
|
+
// モデルごとにグループ化(出現順を維持)
|
|
95
|
+
const modelGroups = new Map();
|
|
96
|
+
for (const item of plan) {
|
|
97
|
+
const group = modelGroups.get(item.modelName);
|
|
98
|
+
if (group) {
|
|
99
|
+
group.push(item);
|
|
100
|
+
}
|
|
101
|
+
else {
|
|
102
|
+
modelGroups.set(item.modelName, [item]);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
// モデルごとに実行
|
|
106
|
+
for (const [modelName, items] of modelGroups) {
|
|
107
|
+
const modelSpec = items[0].modelSpec;
|
|
108
|
+
console.log('='.repeat(80));
|
|
109
|
+
console.log(`🤖 Model: ${modelName} (${modelSpec.provider}:${modelSpec.model})`);
|
|
110
|
+
console.log('='.repeat(80));
|
|
111
|
+
logger.info(`Creating driver for ${modelName} (${modelSpec.provider}:${modelSpec.model})`);
|
|
112
|
+
const driver = await this.driverManager.getOrCreate(this.aiService, modelName, modelSpec);
|
|
113
|
+
for (const item of items) {
|
|
114
|
+
console.log(` ── ${item.testCase.name} ──`);
|
|
115
|
+
if (item.testCase.description) {
|
|
116
|
+
console.log(` ${item.testCase.description}`);
|
|
92
117
|
}
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
for (const { name, compiled, prompt } of compiledModules) {
|
|
99
|
-
const runs = await this.runModuleTest(name, compiled, driver, testCase);
|
|
100
|
-
allResults.push({
|
|
101
|
-
testCase: testCase.name,
|
|
118
|
+
const runs = await this.runModuleTest(item.module.name, item.module.module, driver, item.testCase);
|
|
119
|
+
allResults.push({
|
|
120
|
+
order: item.order,
|
|
121
|
+
result: {
|
|
122
|
+
testCase: item.testCase.name,
|
|
102
123
|
model: modelName,
|
|
103
|
-
module: name,
|
|
124
|
+
module: item.module.name,
|
|
104
125
|
runs: runs.map(r => ({
|
|
105
126
|
success: r.success,
|
|
106
127
|
elapsed: r.elapsed,
|
|
@@ -109,44 +130,61 @@ export class ExperimentRunner {
|
|
|
109
130
|
finishReason: r.queryResult?.finishReason,
|
|
110
131
|
error: r.error,
|
|
111
132
|
})),
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
133
|
+
},
|
|
134
|
+
});
|
|
135
|
+
// Collect for evaluation
|
|
136
|
+
const successfulRuns = runs.filter(r => r.success);
|
|
137
|
+
if (successfulRuns.length > 0) {
|
|
138
|
+
allEvalContexts.push({
|
|
139
|
+
order: item.order,
|
|
140
|
+
context: {
|
|
141
|
+
moduleName: item.module.name,
|
|
142
|
+
prompt: item.prompt,
|
|
119
143
|
runs: successfulRuns.map(r => ({ queryResult: r.queryResult })),
|
|
120
|
-
}
|
|
121
|
-
}
|
|
144
|
+
},
|
|
145
|
+
});
|
|
122
146
|
}
|
|
123
147
|
}
|
|
148
|
+
// モデルの全テスト完了後にドライバーをクローズ
|
|
149
|
+
logger.info(`Closing driver: ${modelName}`);
|
|
150
|
+
await this.driverManager.close(modelName);
|
|
151
|
+
console.log();
|
|
124
152
|
}
|
|
125
|
-
//
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
153
|
+
// Retire: 元の定義順にソートして返す
|
|
154
|
+
allResults.sort((a, b) => a.order - b.order);
|
|
155
|
+
allEvalContexts.sort((a, b) => a.order - b.order);
|
|
156
|
+
return {
|
|
157
|
+
results: allResults.map(r => r.result),
|
|
158
|
+
evaluationContexts: allEvalContexts.map(e => e.context),
|
|
159
|
+
};
|
|
130
160
|
}
|
|
131
161
|
/**
|
|
132
162
|
* Run module test with multiple repetitions
|
|
133
163
|
*/
|
|
134
|
-
async runModuleTest(moduleName,
|
|
164
|
+
async runModuleTest(moduleName, module, driver, testCase) {
|
|
135
165
|
logger.verbose(`Running ${this.repeatCount} time(s) for module: ${moduleName}`);
|
|
136
166
|
const runs = [];
|
|
137
167
|
for (let i = 0; i < this.repeatCount; i++) {
|
|
138
168
|
logger.verbose(`Run ${i + 1}/${this.repeatCount} for module: ${moduleName}`);
|
|
139
169
|
const startTime = Date.now();
|
|
140
170
|
try {
|
|
141
|
-
const
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
171
|
+
const workflowResult = await defaultProcess(driver, module, testCase.input, {
|
|
172
|
+
queryOptions: {
|
|
173
|
+
temperature: 0.7,
|
|
174
|
+
maxTokens: 2048,
|
|
175
|
+
...testCase.queryOptions,
|
|
176
|
+
},
|
|
145
177
|
});
|
|
146
178
|
const elapsed = Date.now() - startTime;
|
|
179
|
+
// Convert workflow result to QueryResult-like structure
|
|
180
|
+
const result = {
|
|
181
|
+
content: workflowResult.output,
|
|
182
|
+
toolCalls: workflowResult.metadata?.toolCalls,
|
|
183
|
+
finishReason: workflowResult.metadata?.finishReason,
|
|
184
|
+
usage: workflowResult.metadata?.usage,
|
|
185
|
+
};
|
|
147
186
|
logger.verbose(`Module ${moduleName} run ${i + 1}: Success (${elapsed}ms)`);
|
|
148
187
|
// Display result summary (思考ブロックはプレビューから除外)
|
|
149
|
-
// パターン: <think>...</think> または 先頭から</think>まで(テンプレートが<think>を付与する場合)
|
|
150
188
|
const displayContent = result.content
|
|
151
189
|
.replace(/<think>[\s\S]*?<\/think>\s*/g, '')
|
|
152
190
|
.replace(/^[\s\S]*?<\/think>\s*/g, '');
|
|
@@ -204,37 +242,5 @@ export class ExperimentRunner {
|
|
|
204
242
|
// Display all evaluation results
|
|
205
243
|
evaluatorRunner.displayResults(allEvaluations, this.evaluators);
|
|
206
244
|
}
|
|
207
|
-
/**
|
|
208
|
-
* Compare prompts across modules
|
|
209
|
-
*/
|
|
210
|
-
comparePrompts(compiledModules) {
|
|
211
|
-
console.log('📊 Prompt Comparison:');
|
|
212
|
-
for (let i = 0; i < compiledModules.length; i++) {
|
|
213
|
-
const module1 = compiledModules[i];
|
|
214
|
-
for (let j = i + 1; j < compiledModules.length; j++) {
|
|
215
|
-
const module2 = compiledModules[j];
|
|
216
|
-
if (module1.prompt === module2.prompt) {
|
|
217
|
-
console.log(` ✅ [${module1.name}] and [${module2.name}] are identical`);
|
|
218
|
-
}
|
|
219
|
-
else {
|
|
220
|
-
console.log(` ⚠️ [${module1.name}] and [${module2.name}] differ`);
|
|
221
|
-
logger.verbose(`Prompt comparison details:`);
|
|
222
|
-
logger.verbose(` ${module1.name}: ${module1.prompt.length} chars`);
|
|
223
|
-
logger.verbose(` ${module2.name}: ${module2.prompt.length} chars`);
|
|
224
|
-
logger.verbose(` Diff: ${module2.prompt.length - module1.prompt.length} chars`);
|
|
225
|
-
// Find first difference (verbose only)
|
|
226
|
-
for (let k = 0; k < Math.max(module1.prompt.length, module2.prompt.length); k++) {
|
|
227
|
-
if (module1.prompt[k] !== module2.prompt[k]) {
|
|
228
|
-
logger.verbose(` First diff at position ${k}:`);
|
|
229
|
-
logger.verbose(` ${module1.name}: ${JSON.stringify(module1.prompt.substring(k, k + 50))}`);
|
|
230
|
-
logger.verbose(` ${module2.name}: ${JSON.stringify(module2.prompt.substring(k, k + 50))}`);
|
|
231
|
-
break;
|
|
232
|
-
}
|
|
233
|
-
}
|
|
234
|
-
}
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
console.log();
|
|
238
|
-
}
|
|
239
245
|
}
|
|
240
246
|
//# sourceMappingURL=experiment.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"experiment.js","sourceRoot":"","sources":["../../src/runner/experiment.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAC;
|
|
1
|
+
{"version":3,"file":"experiment.js","sourceRoot":"","sources":["../../src/runner/experiment.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAE/C,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAC;AAEhE,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AAIzD,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,MAAM,IAAI,UAAU,EAAE,MAAM,cAAc,CAAC;AAEpD,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;AAW5C,MAAM,OAAO,gBAAgB;IAEjB;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IARV,YACU,SAAoB,EACpB,aAA4B,EAC5B,OAA2B,EAC3B,SAAqB,EACrB,MAAiC,EACjC,WAAmB,EACnB,UAA8B,EAC9B,cAAkD;QAPlD,cAAS,GAAT,SAAS,CAAW;QACpB,kBAAa,GAAb,aAAa,CAAe;QAC5B,YAAO,GAAP,OAAO,CAAoB;QAC3B,cAAS,GAAT,SAAS,CAAY;QACrB,WAAM,GAAN,MAAM,CAA2B;QACjC,gBAAW,GAAX,WAAW,CAAQ;QACnB,eAAU,GAAV,UAAU,CAAoB;QAC9B,mBAAc,GAAd,cAAc,CAAoC;IACzD,CAAC;IAEJ;;;;OAIG;IACH,KAAK,CAAC,GAAG;QACP,oBAAoB;QACpB,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;QAClC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC,CAAC;YAC7C,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,2BAA2B;QAC3B,MAAM,EAAE,OAAO,EAAE,kBAAkB,EAAE,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QAErE,kBAAkB;QAClB,IAAI,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACzE,MAAM,IAAI,CAAC,kBAAkB,CAAC,kBAAkB,CAAC,CAAC;QACpD,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACK,aAAa;QACnB,MAAM,IAAI,GAAmB,EAAE,CAAC;QAChC,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACtC,kBAAkB;YAClB,MAAM,YAAY,GAA6C,QAAQ,CAAC,MAAM;gBAC5E,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;oBACzB,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;oBAC/B,IAAI,CAAC,IAAI,EAAE,CAAC;wBACV,OAAO,CAAC,IAAI,CAAC,cAAc,IAAI,wCAAwC,CAAC,CAAC;wBACzE,OAAO,IAAI,CAAC;oBACd,CAAC;oBACD,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;gBACxB,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAA6C;gBAChE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC;qBACxB,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC;qBACrC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;YAE/C,KAAK,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,YAAY,EAAE,CAAC;gBAChE,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;oBAClC,0CAA0C;oBAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC;oBACxD,MAAM,MAAM,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;oBAEhD,IAAI,CAAC,IAAI,CAAC;wBACR,KAAK,EAAE,KAAK,EAAE;wBACd,QAAQ;wBACR,SAAS;wBACT,SAAS;wBACT,MAAM;wBACN,MAAM;qBACP,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,cAAc,IAAI,CAAC,MAAM,WAAW,IAAI,CAAC,SAAS,CAAC,MAAM,0BAA0B,IAAI,CAAC,OAAO,CAAC,MAAM,WAAW,CAAC,CAAC;QAC/H,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,WAAW,CAAC,IAAoB;QAI5C,MAAM,UAAU,GAAiD,EAAE,CAAC;QACpE,MAAM,eAAe,GAAyD,EAAE,CAAC;QAEjF,sBAAsB;QACtB,MAAM,WAAW,GAAG,IAAI,GAAG,EAA0B,CAAC;QACtD,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;YACxB,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC9C,IAAI,KAAK,EAAE,CAAC;gBACV,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACnB,CAAC;iBAAM,CAAC;gBACN,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC;YAC1C,CAAC;QACH,CAAC;QAED,WAAW;QACX,KAAK,MAAM,CAAC,SAAS,EAAE,KAAK,CAAC,IAAI,WAAW,EAAE,CAAC;YAC7C,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;YAC5B,OAAO,CAAC,GAAG,CAAC,aAAa,SAAS,KAAK,SAAS,CAAC,QAAQ,IAAI,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC;YACjF,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;YAE5B,MAAM,CAAC,IAAI,CAAC,uBAAuB,SAAS,KAAK,SAAS,CAAC,QAAQ,IAAI,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC;YAC3F,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,WAAW,CAAC,IAAI,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;YAE1F,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,OAAO,CAAC,GAAG,CAAC,QAAQ,IAAI,CAAC,QAAQ,CAAC,IAAI,KAAK,CAAC,CAAC;gBAC7C,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;oBAC9B,OAAO,CAAC,GAAG,CAAC,QAAQ,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;gBACnD,CAAC;gBAED,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAEnG,UAAU,CAAC,IAAI,CAAC;oBACd,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,MAAM,EAAE;wBACN,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,IAAI;wBAC5B,KAAK,EAAE,SAAS;wBAChB,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI;wBACxB,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;4BACnB,OAAO,EAAE,CAAC,CAAC,OAAO;4BAClB,OAAO,EAAE,CAAC,CAAC,OAAO;4BAClB,OAAO,EAAE,CAAC,CAAC,WAAW,EAAE,OAAO,IAAI,EAAE;4BACrC,SAAS,EAAE,CAAC,CAAC,WAAW,EAAE,SAAS;4BACnC,YAAY,EAAE,CAAC,CAAC,WAAW,EAAE,YAAY;4BACzC,KAAK,EAAE,CAAC,CAAC,KAAK;yBACf,CAAC,CAAC;qBACJ;iBACF,CAAC,CAAC;gBAEH,yBAAyB;gBACzB,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;gBACnD,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC9B,eAAe,CAAC,IAAI,CAAC;wBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;wBACjB,OAAO,EAAE;4BACP,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI;4BAC5B,MAAM,EAAE,IAAI,CAAC,MAAM;4BACnB,IAAI,EAAE,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,WAAW,EAAE,CAAC,CAAC,WAAY,EAAE,CAAC,CAAC;yBACjE;qBACF,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,yBAAyB;YACzB,MAAM,CAAC,IAAI,CAAC,mBAAmB,SAAS,EAAE,CAAC,CAAC;YAC5C,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;YAC1C,OAAO,CAAC,GAAG,EAAE,CAAC;QAChB,CAAC;QAED,wBAAwB;QACxB,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAC7C,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAElD,OAAO;YACL,OAAO,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;YACtC,kBAAkB,EAAE,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;SACxD,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,aAAa,CACzB,UAAkB,EAClB,MAAyB,EACzB,MAAgB,EAChB,QAAkB;QAElB,MAAM,CAAC,OAAO,CAAC,WAAW,IAAI,CAAC,WAAW,wBAAwB,UAAU,EAAE,CAAC,CAAC;QAEhF,MAAM,IAAI,GAA4F,EAAE,CAAC;QAEzG,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,WAAW,gBAAgB,UAAU,EAAE,CAAC,CAAC;YAE7E,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC7B,IAAI,CAAC;gBACH,MAAM,cAAc,GAAG,MAAM,cAAc,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC,KAAK,EAAE;oBAC1E,YAAY,EAAE;wBACZ,WAAW,EAAE,GAAG;wBAChB,SAAS,EAAE,IAAI;wBACf,GAAG,QAAQ,CAAC,YAAY;qBACzB;iBACF,CAAC,CAAC;gBACH,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;gBAEvC,wDAAwD;gBACxD,MAAM,MAAM,GAAgB;oBAC1B,OAAO,EAAE,cAAc,CAAC,MAAM;oBAC9B,SAAS,EAAE,cAAc,CAAC,QAAQ,EAAE,SAAgB;oBACpD,YAAY,EAAE,cAAc,CAAC,QAAQ,EAAE,YAAmB;oBAC1D,KAAK,EAAE,cAAc,CAAC,QAAQ,EAAE,KAAY;iBAC7C,CAAC;gBAEF,MAAM,CAAC,OAAO,CAAC,UAAU,UAAU,QAAQ,CAAC,GAAG,CAAC,cAAc,OAAO,KAAK,CAAC,CAAC;gBAE5E,4CAA4C;gBAC5C,MAAM,cAAc,GAAG,MAAM,CAAC,OAAO;qBAClC,OAAO,CAAC,8BAA8B,EAAE,EAAE,CAAC;qBAC3C,OAAO,CAAC,wBAAwB,EAAE,EAAE,CAAC,CAAC;gBACzC,MAAM,cAAc,GAAG,cAAc,CAAC,MAAM,GAAG,GAAG;oBAChD,CAAC,CAAC,cAAc,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,KAAK;oBAC1C,CAAC,CAAC,cAAc,CAAC;gBACnB,OAAO,CAAC,GAAG,CAAC,SAAS,UAAU,SAAS,CAAC,GAAG,CAAC,KAAK,OAAO,oBAAoB,MAAM,CAAC,YAAY,IAAI,SAAS,EAAE,CAAC,CAAC;gBACjH,IAAI,MAAM,CAAC,SAAS,IAAI,MAAM,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACpD,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;wBAClC,OAAO,CAAC,GAAG,CAAC,sBAAsB,EAAE,CAAC,IAAI,IAAI,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;oBAChF,CAAC;gBACH,CAAC;gBACD,IAAI,cAAc,CAAC,IAAI,EAAE,EAAE,CAAC;oBAC1B,OAAO,CAAC,GAAG,CAAC,YAAY,cAAc,EAAE,CAAC,CAAC;gBAC5C,CAAC;gBAED,IAAI,CAAC,IAAI,CAAC;oBACR,OAAO,EAAE,IAAI;oBACb,OAAO;oBACP,WAAW,EAAE,MAAM;iBACpB,CAAC,CAAC;YACL,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;gBACvC,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAC5E,MAAM,CAAC,KAAK,CAAC,UAAU,UAAU,QAAQ,CAAC,GAAG,CAAC,YAAY,OAAO,QAAQ,YAAY,EAAE,CAAC,CAAC;gBACzF,IAAI,CAAC,IAAI,CAAC;oBACR,OAAO,EAAE,KAAK;oBACd,OAAO;oBACP,KAAK,EAAE,YAAY;iBACpB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,kBAAkB,CAC9B,kBAAuC;QAEvC,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,EAAE,CAAC;QAEd,MAAM,eAAe,GAAG,IAAI,eAAe,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,cAAe,CAAC,IAAI,CAAC,CAAC;QACvF,MAAM,cAAc,GAAuB,EAAE,CAAC;QAE9C,2CAA2C;QAC3C,KAAK,MAAM,OAAO,IAAI,kBAAkB,EAAE,CAAC;YACzC,OAAO,CAAC,GAAG,CAAC,kBAAkB,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC;YACpD,OAAO,CAAC,GAAG,EAAE,CAAC;YAEd,KAAK,MAAM,SAAS,IAAI,IAAI,CAAC,UAAW,EAAE,CAAC;gBACzC,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,QAAQ,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;gBAClE,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;QAED,iCAAiC;QACjC,eAAe,CAAC,cAAc,CAAC,cAAc,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;IAClE,CAAC;CAEF"}
|
package/dist/types.d.ts
CHANGED
|
@@ -54,7 +54,7 @@ export interface ExperimentOptions {
|
|
|
54
54
|
export interface ModuleDefinition {
|
|
55
55
|
name: string;
|
|
56
56
|
description: string;
|
|
57
|
-
|
|
57
|
+
module: PromptModule<any>;
|
|
58
58
|
}
|
|
59
59
|
/**
|
|
60
60
|
* Evaluation context (common for both code and prompt evaluators)
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAElF;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,qBAAqB;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,4BAA4B;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,+CAA+C;IAC/C,KAAK,EAAE,GAAG,CAAC;IACX,iGAAiG;IACjG,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,kEAAkE;IAClE,YAAY,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC;CACtC;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,QAAQ,EAAE,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,SAAS,EAAE,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAElF;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,qBAAqB;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,4BAA4B;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,+CAA+C;IAC/C,KAAK,EAAE,GAAG,CAAC;IACX,iGAAiG;IACjG,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,kEAAkE;IAClE,YAAY,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC;CACtC;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,QAAQ,EAAE,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,SAAS,EAAE,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,YAAY,CAAC,GAAG,CAAC,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,KAAK,CAAC;QACV,WAAW,EAAE,WAAW,CAAC;KAC1B,CAAC,CAAC;CACJ;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC9B,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb;;;;;;;;;OASG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,CAAC,OAAO,EAAE,iBAAiB,KAAK,OAAO,CAAC,gBAAgB,CAAC,CAAC;CACrE;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb;;;;;;;;;OASG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,YAAY,CAAC,iBAAiB,CAAC,CAAC;CACzC;AAED;;GAEG;AACH,MAAM,MAAM,kBAAkB,GAC1B;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,GACpD;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,YAAY,CAAC,iBAAiB,CAAC,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,GAC/E;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAE3C;;GAEG;AACH,MAAM,WAAW,yBAA0B,SAAQ,iBAAiB;IAClE,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB"}
|
|
@@ -12,62 +12,75 @@ models:
|
|
|
12
12
|
provider: "mlx"
|
|
13
13
|
capabilities: ["local", "tools"]
|
|
14
14
|
priority: 20
|
|
15
|
+
disabled: true
|
|
16
|
+
lfm2.5-jp:
|
|
17
|
+
model: LiquidAI/LFM2.5-1.2B-JP-MLX-8bit
|
|
18
|
+
provider: "mlx"
|
|
19
|
+
capabilities: ["local", "fast", "japanese"]
|
|
20
|
+
priority: 20
|
|
21
|
+
lfm2.5-instruct:
|
|
22
|
+
model: LiquidAI/LFM2.5-1.2B-Instruct-MLX-8bit
|
|
23
|
+
provider: "mlx"
|
|
24
|
+
capabilities: ["local", "fast", "japanese"]
|
|
25
|
+
priority: 20
|
|
26
|
+
disabled: true
|
|
27
|
+
lfm2.5-thinking:
|
|
28
|
+
model: LiquidAI/LFM2.5-1.2B-Thinking-MLX-8bit
|
|
29
|
+
provider: "mlx"
|
|
30
|
+
capabilities: ["local", "thinking", "japanese"]
|
|
31
|
+
priority: 20
|
|
32
|
+
# disabled: true
|
|
15
33
|
|
|
16
34
|
drivers:
|
|
17
35
|
mlx: {}
|
|
18
36
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
37
|
+
# 共通定義
|
|
38
|
+
_shared:
|
|
39
|
+
tools: &tools_def
|
|
40
|
+
- name: get_weather
|
|
41
|
+
description: "指定された場所の現在の天気を取得する"
|
|
42
|
+
parameters:
|
|
43
|
+
type: object
|
|
44
|
+
properties:
|
|
45
|
+
location:
|
|
46
|
+
type: string
|
|
47
|
+
description: "天気を取得する場所(都市名)"
|
|
48
|
+
required:
|
|
49
|
+
- location
|
|
50
|
+
queryOptions:
|
|
51
|
+
weather: &tool_weather
|
|
32
52
|
temperature: 0.3
|
|
33
53
|
maxTokens: 512
|
|
34
|
-
tools:
|
|
35
|
-
- name: get_weather
|
|
36
|
-
description: "指定された場所の現在の天気を取得する"
|
|
37
|
-
parameters:
|
|
38
|
-
type: object
|
|
39
|
-
properties:
|
|
40
|
-
location:
|
|
41
|
-
type: string
|
|
42
|
-
description: "天気を取得する場所(都市名)"
|
|
43
|
-
required:
|
|
44
|
-
- location
|
|
54
|
+
tools: *tools_def
|
|
45
55
|
toolChoice: auto
|
|
46
|
-
|
|
47
|
-
- name: "[gemma3] ツール不要の質問"
|
|
48
|
-
description: "ツールを呼び出さずにテキストで回答することを期待"
|
|
49
|
-
models: ["gemma3-12b"]
|
|
50
|
-
input:
|
|
51
|
-
question: "1 + 1 は何ですか?"
|
|
52
|
-
queryOptions: &tool_math
|
|
56
|
+
math: &tool_math
|
|
53
57
|
temperature: 0.3
|
|
54
58
|
maxTokens: 1024
|
|
55
59
|
tools: *tools_def
|
|
56
60
|
toolChoice: auto
|
|
57
61
|
|
|
58
|
-
|
|
59
|
-
- name:
|
|
62
|
+
modules:
|
|
63
|
+
- name: tools-test
|
|
64
|
+
path: ./tools-test-module.mjs
|
|
65
|
+
description: "ツール呼び出し実験用"
|
|
66
|
+
|
|
67
|
+
testCases:
|
|
68
|
+
- name: "天気ツール呼び出し"
|
|
60
69
|
description: "get_weatherツールを呼び出すことを期待"
|
|
61
|
-
models: ["qwen3-4b"]
|
|
62
70
|
input:
|
|
63
71
|
question: "東京の天気を教えてください。"
|
|
64
72
|
queryOptions: *tool_weather
|
|
65
73
|
|
|
66
|
-
- name: "
|
|
74
|
+
- name: "ツール不要の質問"
|
|
67
75
|
description: "ツールを呼び出さずにテキストで回答することを期待"
|
|
68
|
-
models: ["qwen3-4b"]
|
|
69
76
|
input:
|
|
70
77
|
question: "1 + 1 は何ですか?"
|
|
71
78
|
queryOptions: *tool_math
|
|
72
79
|
|
|
73
|
-
evaluators:
|
|
80
|
+
evaluators:
|
|
81
|
+
- name: llm-requirement-fulfillment
|
|
82
|
+
|
|
83
|
+
evaluation:
|
|
84
|
+
enabled: true
|
|
85
|
+
provider: mlx
|
|
86
|
+
model: lfm2.5-thinking
|
|
@@ -2,28 +2,22 @@
|
|
|
2
2
|
* Tools実験用モジュール
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
{
|
|
22
|
-
type: 'message',
|
|
23
|
-
role: 'user',
|
|
24
|
-
content: context.question || 'Hello',
|
|
25
|
-
},
|
|
26
|
-
],
|
|
27
|
-
});
|
|
28
|
-
},
|
|
5
|
+
const module = {
|
|
6
|
+
objective: [
|
|
7
|
+
'- あなたは利用者からの質問に答えるアシスタントです。',
|
|
8
|
+
],
|
|
9
|
+
instructions: [
|
|
10
|
+
'- 質問の内容に応じて、適切なツールを使ってください。',
|
|
11
|
+
' - ツールの結果が返ってくるまで、推測で答えないでください。',
|
|
12
|
+
'- 必要がない場合は通常の応答を返します。',
|
|
13
|
+
],
|
|
14
|
+
messages: [
|
|
15
|
+
(ctx) => ({
|
|
16
|
+
type: 'message',
|
|
17
|
+
role: 'user',
|
|
18
|
+
content: ctx.question || 'Hello',
|
|
19
|
+
}),
|
|
20
|
+
],
|
|
29
21
|
};
|
|
22
|
+
|
|
23
|
+
export default module;
|
|
@@ -2,28 +2,24 @@
|
|
|
2
2
|
* Tools実験用モジュール
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import type { PromptModule } from '@modular-prompt/core';
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
role: 'user' as const,
|
|
24
|
-
content: context.question || 'Hello',
|
|
25
|
-
},
|
|
26
|
-
],
|
|
27
|
-
});
|
|
28
|
-
},
|
|
7
|
+
const module: PromptModule<{ question?: string }> = {
|
|
8
|
+
objective: [
|
|
9
|
+
'あなたはツールを使って質問に答えるアシスタントです。',
|
|
10
|
+
'必要に応じてツールを呼び出してください。',
|
|
11
|
+
],
|
|
12
|
+
instructions: [
|
|
13
|
+
'質問の内容に応じて、適切なツールを使ってください。',
|
|
14
|
+
'ツールの結果が返ってくるまで、推測で答えないでください。',
|
|
15
|
+
],
|
|
16
|
+
messages: [
|
|
17
|
+
(ctx) => ({
|
|
18
|
+
type: 'message' as const,
|
|
19
|
+
role: 'user' as const,
|
|
20
|
+
content: ctx.question || 'Hello',
|
|
21
|
+
}),
|
|
22
|
+
],
|
|
29
23
|
};
|
|
24
|
+
|
|
25
|
+
export default module;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@modular-prompt/experiment",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.2",
|
|
4
4
|
"description": "Experiment framework for comparing and evaluating prompt modules",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
"files": [
|
|
18
18
|
"dist",
|
|
19
19
|
"examples",
|
|
20
|
+
"skills",
|
|
20
21
|
"README.md"
|
|
21
22
|
],
|
|
22
23
|
"dependencies": {
|
|
@@ -24,9 +25,10 @@
|
|
|
24
25
|
"jiti": "^2.4.2",
|
|
25
26
|
"yaml": "^2.3.4",
|
|
26
27
|
"zod": "^3.22.4",
|
|
27
|
-
"@modular-prompt/driver": "0.8.
|
|
28
|
-
"@modular-prompt/core": "0.1.
|
|
29
|
-
"@modular-prompt/
|
|
28
|
+
"@modular-prompt/driver": "0.8.2",
|
|
29
|
+
"@modular-prompt/core": "0.1.13",
|
|
30
|
+
"@modular-prompt/process": "0.1.24",
|
|
31
|
+
"@modular-prompt/utils": "0.2.4"
|
|
30
32
|
},
|
|
31
33
|
"devDependencies": {
|
|
32
34
|
"@eslint/js": "^9.34.0",
|
|
@@ -64,7 +66,8 @@
|
|
|
64
66
|
"test": "vitest",
|
|
65
67
|
"test:ui": "vitest --ui",
|
|
66
68
|
"test:run": "vitest run",
|
|
67
|
-
"
|
|
69
|
+
"copy-skills": "mkdir -p skills/experiment && cp ../../skills/experiment/SKILL.md skills/experiment/SKILL.md",
|
|
70
|
+
"clean": "rm -rf dist skills",
|
|
68
71
|
"lint": "eslint src",
|
|
69
72
|
"typecheck": "tsc --noEmit"
|
|
70
73
|
}
|