@nahisaho/katashiro-evaluation 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/LICENSE +21 -0
  2. package/dist/BenchmarkSuite.d.ts +63 -0
  3. package/dist/BenchmarkSuite.d.ts.map +1 -0
  4. package/dist/BenchmarkSuite.js +152 -0
  5. package/dist/BenchmarkSuite.js.map +1 -0
  6. package/dist/DatasetManager.d.ts +68 -0
  7. package/dist/DatasetManager.d.ts.map +1 -0
  8. package/dist/DatasetManager.js +161 -0
  9. package/dist/DatasetManager.js.map +1 -0
  10. package/dist/ExperimentRunner.d.ts +51 -0
  11. package/dist/ExperimentRunner.d.ts.map +1 -0
  12. package/dist/ExperimentRunner.js +170 -0
  13. package/dist/ExperimentRunner.js.map +1 -0
  14. package/dist/evaluators/CompositeEvaluator.d.ts +66 -0
  15. package/dist/evaluators/CompositeEvaluator.d.ts.map +1 -0
  16. package/dist/evaluators/CompositeEvaluator.js +122 -0
  17. package/dist/evaluators/CompositeEvaluator.js.map +1 -0
  18. package/dist/evaluators/HeuristicEvaluator.d.ts +82 -0
  19. package/dist/evaluators/HeuristicEvaluator.d.ts.map +1 -0
  20. package/dist/evaluators/HeuristicEvaluator.js +233 -0
  21. package/dist/evaluators/HeuristicEvaluator.js.map +1 -0
  22. package/dist/evaluators/LLMJudgeEvaluator.d.ts +93 -0
  23. package/dist/evaluators/LLMJudgeEvaluator.d.ts.map +1 -0
  24. package/dist/evaluators/LLMJudgeEvaluator.js +296 -0
  25. package/dist/evaluators/LLMJudgeEvaluator.js.map +1 -0
  26. package/dist/evaluators/RAGASEvaluators.d.ts +128 -0
  27. package/dist/evaluators/RAGASEvaluators.d.ts.map +1 -0
  28. package/dist/evaluators/RAGASEvaluators.js +521 -0
  29. package/dist/evaluators/RAGASEvaluators.js.map +1 -0
  30. package/dist/evaluators/index.d.ts +13 -0
  31. package/dist/evaluators/index.d.ts.map +1 -0
  32. package/dist/evaluators/index.js +12 -0
  33. package/dist/evaluators/index.js.map +1 -0
  34. package/dist/index.d.ts +20 -0
  35. package/dist/index.d.ts.map +1 -0
  36. package/dist/index.js +24 -0
  37. package/dist/index.js.map +1 -0
  38. package/dist/reporting/EvaluationReporter.d.ts +135 -0
  39. package/dist/reporting/EvaluationReporter.d.ts.map +1 -0
  40. package/dist/reporting/EvaluationReporter.js +285 -0
  41. package/dist/reporting/EvaluationReporter.js.map +1 -0
  42. package/dist/reporting/index.d.ts +8 -0
  43. package/dist/reporting/index.d.ts.map +1 -0
  44. package/dist/reporting/index.js +8 -0
  45. package/dist/reporting/index.js.map +1 -0
  46. package/dist/reporting/templates.d.ts +91 -0
  47. package/dist/reporting/templates.d.ts.map +1 -0
  48. package/dist/reporting/templates.js +150 -0
  49. package/dist/reporting/templates.js.map +1 -0
  50. package/dist/types.d.ts +408 -0
  51. package/dist/types.d.ts.map +1 -0
  52. package/dist/types.js +8 -0
  53. package/dist/types.js.map +1 -0
  54. package/package.json +47 -0
@@ -0,0 +1,233 @@
1
+ /**
2
+ * Heuristic Evaluators
3
+ *
4
+ * @requirement REQ-EVAL-002
5
+ * @design DES-KATASHIRO-003-EVAL §3.2
6
+ */
7
+ /**
8
+ * 長さベース評価器
9
+ */
10
+ export class LengthEvaluator {
11
+ config;
12
+ name = 'length';
13
+ constructor(config = {}) {
14
+ this.config = config;
15
+ }
16
+ async evaluate(input) {
17
+ const length = input.output.length;
18
+ const { minLength = 0, maxLength = Infinity, optimalLength, tolerance = 0.2, } = this.config;
19
+ let score;
20
+ let reasoning;
21
+ let passed = true;
22
+ if (length < minLength) {
23
+ score = minLength > 0 ? length / minLength : 0;
24
+ reasoning = `出力が短すぎます(${length}文字 < 最小${minLength}文字)`;
25
+ passed = false;
26
+ }
27
+ else if (length > maxLength && maxLength !== Infinity) {
28
+ score = maxLength / length;
29
+ reasoning = `出力が長すぎます(${length}文字 > 最大${maxLength}文字)`;
30
+ passed = false;
31
+ }
32
+ else if (optimalLength) {
33
+ const deviation = Math.abs(length - optimalLength) / optimalLength;
34
+ score = Math.max(0, 1 - deviation / tolerance);
35
+ reasoning = `理想的な長さ(${optimalLength}文字)からの偏差: ${(deviation * 100).toFixed(1)}%`;
36
+ passed = score >= 0.5;
37
+ }
38
+ else {
39
+ score = 1;
40
+ reasoning = `適切な長さ(${length}文字)`;
41
+ }
42
+ return {
43
+ evaluator: this.name,
44
+ score,
45
+ normalizedScore: score,
46
+ passed,
47
+ reasoning,
48
+ metadata: { length, minLength, maxLength, optimalLength },
49
+ };
50
+ }
51
+ }
52
+ /**
53
+ * キーワード評価器
54
+ */
55
+ export class KeywordEvaluator {
56
+ config;
57
+ name = 'keyword';
58
+ constructor(config = {}) {
59
+ this.config = config;
60
+ }
61
+ async evaluate(input) {
62
+ const text = this.config.caseSensitive
63
+ ? input.output
64
+ : input.output.toLowerCase();
65
+ const { requiredKeywords = [], forbiddenKeywords = [] } = this.config;
66
+ const foundKeywords = requiredKeywords.filter((kw) => text.includes(this.config.caseSensitive ? kw : kw.toLowerCase()));
67
+ const missingKeywords = requiredKeywords.filter((kw) => !text.includes(this.config.caseSensitive ? kw : kw.toLowerCase()));
68
+ const foundForbidden = forbiddenKeywords.filter((kw) => text.includes(this.config.caseSensitive ? kw : kw.toLowerCase()));
69
+ const requiredScore = requiredKeywords.length > 0 ? foundKeywords.length / requiredKeywords.length : 1;
70
+ const forbiddenScore = forbiddenKeywords.length > 0 ? 1 - foundForbidden.length / forbiddenKeywords.length : 1;
71
+ const score = forbiddenKeywords.length > 0
72
+ ? (requiredScore + forbiddenScore) / 2
73
+ : requiredScore;
74
+ const passed = score >= 1.0;
75
+ const reasoning = [
76
+ requiredKeywords.length > 0
77
+ ? `必須キーワード: ${foundKeywords.length}/${requiredKeywords.length}`
78
+ : '',
79
+ forbiddenKeywords.length > 0
80
+ ? `禁止キーワード: ${foundForbidden.length}/${forbiddenKeywords.length}`
81
+ : '',
82
+ ]
83
+ .filter(Boolean)
84
+ .join(', ');
85
+ return {
86
+ evaluator: this.name,
87
+ score,
88
+ normalizedScore: score,
89
+ passed,
90
+ reasoning: reasoning || '評価完了',
91
+ metadata: {
92
+ foundKeywords,
93
+ missingKeywords,
94
+ foundForbidden,
95
+ requiredScore,
96
+ forbiddenScore,
97
+ },
98
+ };
99
+ }
100
+ }
101
+ /**
102
+ * 正規表現評価器
103
+ */
104
+ export class RegexEvaluator {
105
+ config;
106
+ name = 'regex';
107
+ constructor(config) {
108
+ this.config = config;
109
+ }
110
+ async evaluate(input) {
111
+ const { patterns } = this.config;
112
+ const matches = [];
113
+ let matchedCount = 0;
114
+ for (const regex of patterns) {
115
+ const matched = regex.test(input.output);
116
+ matches.push({ pattern: regex.source, matched });
117
+ if (matched) {
118
+ matchedCount++;
119
+ }
120
+ }
121
+ const score = patterns.length > 0 ? matchedCount / patterns.length : 1;
122
+ return {
123
+ evaluator: this.name,
124
+ score,
125
+ normalizedScore: score,
126
+ reasoning: `${matchedCount}/${patterns.length} パターン一致`,
127
+ metadata: { matches },
128
+ };
129
+ }
130
+ }
131
+ /**
132
+ * JSON構造評価器
133
+ */
134
+ export class JsonStructureEvaluator {
135
+ config;
136
+ name = 'json-structure';
137
+ constructor(config = {}) {
138
+ this.config = config;
139
+ }
140
+ async evaluate(input) {
141
+ try {
142
+ const parsed = JSON.parse(input.output);
143
+ const { requiredFields = [], types = {} } = this.config;
144
+ // 必須フィールドチェック
145
+ const missingFields = requiredFields.filter((f) => !(f in parsed) || parsed[f] === undefined);
146
+ // 型チェック
147
+ const typeErrors = [];
148
+ for (const [field, expectedType] of Object.entries(types)) {
149
+ if (field in parsed) {
150
+ const actualType = Array.isArray(parsed[field])
151
+ ? 'array'
152
+ : typeof parsed[field];
153
+ if (actualType !== expectedType) {
154
+ typeErrors.push(`${field}: expected ${expectedType}, got ${actualType}`);
155
+ }
156
+ }
157
+ }
158
+ const missingScore = requiredFields.length > 0
159
+ ? 1 - missingFields.length / requiredFields.length
160
+ : 1;
161
+ const typeScore = Object.keys(types).length > 0
162
+ ? 1 - typeErrors.length / Object.keys(types).length
163
+ : 1;
164
+ const score = (missingScore + typeScore) / 2;
165
+ const passed = missingFields.length === 0 && typeErrors.length === 0;
166
+ return {
167
+ evaluator: this.name,
168
+ score,
169
+ normalizedScore: score,
170
+ passed,
171
+ reasoning: [
172
+ missingFields.length > 0
173
+ ? `欠落フィールド: ${missingFields.join(', ')}`
174
+ : '',
175
+ typeErrors.length > 0 ? `型エラー: ${typeErrors.join('; ')}` : '',
176
+ passed ? '有効なJSON構造' : '',
177
+ ]
178
+ .filter(Boolean)
179
+ .join('. '),
180
+ metadata: { missingFields, typeErrors },
181
+ };
182
+ }
183
+ catch {
184
+ return {
185
+ evaluator: this.name,
186
+ score: 0,
187
+ normalizedScore: 0,
188
+ passed: false,
189
+ reasoning: '無効なJSON形式',
190
+ metadata: { error: 'JSON parse failed' },
191
+ };
192
+ }
193
+ }
194
+ }
195
+ /**
196
+ * 類似度評価器(Jaccard係数)
197
+ */
198
+ export class SimilarityEvaluator {
199
+ config;
200
+ name = 'similarity';
201
+ constructor(config = {}) {
202
+ this.config = config;
203
+ }
204
+ async evaluate(input) {
205
+ if (!input.expected) {
206
+ return {
207
+ evaluator: this.name,
208
+ score: 0,
209
+ normalizedScore: 0,
210
+ reasoning: '期待出力がないため評価できません',
211
+ };
212
+ }
213
+ const tokenize = this.config.tokenize ?? ((text) => text.toLowerCase().split(/\s+/));
214
+ const outputTokens = new Set(tokenize(input.output));
215
+ const expectedTokens = new Set(tokenize(input.expected));
216
+ const intersection = new Set([...outputTokens].filter((t) => expectedTokens.has(t)));
217
+ const union = new Set([...outputTokens, ...expectedTokens]);
218
+ const score = union.size > 0 ? intersection.size / union.size : 0;
219
+ return {
220
+ evaluator: this.name,
221
+ score,
222
+ normalizedScore: score,
223
+ reasoning: `Jaccard類似度: ${(score * 100).toFixed(1)}% (共通トークン: ${intersection.size}/${union.size})`,
224
+ metadata: {
225
+ intersectionSize: intersection.size,
226
+ unionSize: union.size,
227
+ outputTokenCount: outputTokens.size,
228
+ expectedTokenCount: expectedTokens.size,
229
+ },
230
+ };
231
+ }
232
+ }
233
+ //# sourceMappingURL=HeuristicEvaluator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"HeuristicEvaluator.js","sourceRoot":"","sources":["../../src/evaluators/HeuristicEvaluator.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAcH;;GAEG;AACH,MAAM,OAAO,eAAe;IAGN;IAFX,IAAI,GAAG,QAAQ,CAAC;IAEzB,YAAoB,SAAgC,EAAE;QAAlC,WAAM,GAAN,MAAM,CAA4B;IAAG,CAAC;IAE1D,KAAK,CAAC,QAAQ,CAAC,KAAsB;QACnC,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;QACnC,MAAM,EACJ,SAAS,GAAG,CAAC,EACb,SAAS,GAAG,QAAQ,EACpB,aAAa,EACb,SAAS,GAAG,GAAG,GAChB,GAAG,IAAI,CAAC,MAAM,CAAC;QAEhB,IAAI,KAAa,CAAC;QAClB,IAAI,SAAiB,CAAC;QACtB,IAAI,MAAM,GAAG,IAAI,CAAC;QAElB,IAAI,MAAM,GAAG,SAAS,EAAE,CAAC;YACvB,KAAK,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;YAC/C,SAAS,GAAG,YAAY,MAAM,UAAU,SAAS,KAAK,CAAC;YACvD,MAAM,GAAG,KAAK,CAAC;QACjB,CAAC;aAAM,IAAI,MAAM,GAAG,SAAS,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;YACxD,KAAK,GAAG,SAAS,GAAG,MAAM,CAAC;YAC3B,SAAS,GAAG,YAAY,MAAM,UAAU,SAAS,KAAK,CAAC;YACvD,MAAM,GAAG,KAAK,CAAC;QACjB,CAAC;aAAM,IAAI,aAAa,EAAE,CAAC;YACzB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,aAAa,CAAC,GAAG,aAAa,CAAC;YACnE,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,GAAG,SAAS,CAAC,CAAC;YAC/C,SAAS,GAAG,UAAU,aAAa,aAAa,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;YAChF,MAAM,GAAG,KAAK,IAAI,GAAG,CAAC;QACxB,CAAC;aAAM,CAAC;YACN,KAAK,GAAG,CAAC,CAAC;YACV,SAAS,GAAG,SAAS,MAAM,KAAK,CAAC;QACnC,CAAC;QAED,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,IAAI;YACpB,KAAK;YACL,eAAe,EAAE,KAAK;YACtB,MAAM;YACN,SAAS;YACT,QAAQ,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,aAAa,EAAE;SAC1D,CAAC;IACJ,CAAC;CACF;AAWD;;GAEG;AACH,MAAM,OAAO,gBAAgB;IAGP;IAFX,IAAI,GAAG,SAAS,CAAC;IAE1B,YAAoB,SAAiC,EAAE;QAAnC,WAAM,GAAN,MAAM,CAA6B;IAAG,CAAC;IAE3D,KAAK,CAAC,QAAQ,CAAC,KAAsB;QACnC,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa;YACpC,CAAC,CAAC,KAAK,CAAC,MAAM;YACd,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;QAC/B,MAAM,EAAE,gBAAgB,GAAG,EAAE,EAAE,iBAAiB,GAAG,EAAE,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC;QAEtE,MAAM,aAAa,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CACnD,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,WAAW,EAAE,CAAC,CACjE,CAAC;QACF,MAAM,eAAe,GAAG,gBAAgB,CAAC,MAAM,CAC7C,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,WAAW,EAAE,CAAC,CAC1E,CAAC;QACF,MAAM,cAAc,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CACrD,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,WAAW,EAAE,CAAC,CACjE,CAAC;QAEF,MAAM,aAAa,GACjB,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,MAAM,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QACnF,MAAM,cAAc,GAClB,iBAAiB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,cAAc,CAAC,MAAM,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1F,MAAM,KAAK,GAAG,iBAAiB,CAAC,MAAM,GAAG,CAAC;YACxC,CAAC,CAAC,CAAC,aAAa,GAAG,cAAc,CAAC,GAAG,CAAC;YACtC,CAAC,CAAC,aAAa,CAAC;QAElB,MAAM,MAAM,GAAG,KAAK,IAAI,GAAG,CAAC;QAE5B,MAAM,SAAS,GAAG;YAChB,gBAAgB,CAAC,MAAM,GAAG,CAAC;gBACzB,CAAC,CAAC,YAAY,aAAa,CAAC,MAAM,IAAI,gBAAgB,CAAC,MAAM,EAAE;gBAC/D,CAAC,CAAC,EAAE;YACN,iBAAiB,CAAC,MAAM,GAAG,CAAC;gBAC1B,CAAC,CAAC,YAAY,cAAc,CAAC,MAAM,IAAI,iBAAiB,CAAC,MAAM,EAAE;gBACjE,CAAC,CAAC,EAAE;SACP;aACE,MAAM,CAAC,OAAO,CAAC;aACf,IAAI,CAAC,IAAI,CAAC,CAAC;QAEd,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,IAAI;YACpB,KAAK;YACL,eAAe,EAAE,KAAK;YACtB,MAAM;YACN,SAAS,EAAE,SAAS,IAAI,MAAM;YAC9B,QAAQ,EAAE;gBACR,aAAa;gBACb,eAAe;gBACf,cAAc;gBACd,aAAa;gBACb,cAAc;aACf;SACF,CAAC;IACJ,CAAC;CACF;AASD;;GAEG;AACH,MAAM,OAAO,cAAc;IAGL;IAFX,IAAI,GAAG,OAAO,CAAC;IAExB,YAAoB,MAA4B;QAA5B,WAAM,GAAN,MAAM,CAAsB;IAAG,CAAC;IAEpD,KAAK,CAAC,QAAQ,CAAC,KAAsB;QACnC,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC;QACjC,MAAM,OAAO,GAAiD,EAAE,CAAC;QAEjE,IAAI,YAAY,GAAG,CAAC,CAAC;QAErB,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;YAC7B,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YACzC,OAAO,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC;YACjD,IAAI,OAAO,EAAE,CAAC;gBACZ,YAAY,EAAE,CAAC;YACjB,CAAC;QACH,CAAC;QAED,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAEvE,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,IAAI;YACpB,KAAK;YACL,eAAe,EAAE,KAAK;YACtB,SAAS,EAAE,GAAG,YAAY,IAAI,QAAQ,CAAC,MAAM,SAAS;YACtD,QAAQ,EAAE,EAAE,OAAO,EAAE;SACtB,CAAC;IACJ,CAAC;CACF;AAED;;GAEG;AACH,MAAM,OAAO,sBAAsB;IAIvB;IAHD,IAAI,GAAG,gBAAgB,CAAC;IAEjC,YACU,SAGJ,EAAE;QAHE,WAAM,GAAN,MAAM,CAGR;IACL,CAAC;IAEJ,KAAK,CAAC,QAAQ,CAAC,KAAsB;QACnC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YACxC,MAAM,EAAE,cAAc,GAAG,EAAE,EAAE,KAAK,GAAG,EAAE,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC;YAExD,cAAc;YACd,MAAM,aAAa,GAAG,cAAc,CAAC,MAAM,CACzC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,SAAS,CACjD,CAAC;YAEF,QAAQ;YACR,MAAM,UAAU,GAAa,EAAE,CAAC;YAChC,KAAK,MAAM,CAAC,KAAK,EAAE,YAAY,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC1D,IAAI,KAAK,IAAI,MAAM,EAAE,CAAC;oBACpB,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;wBAC7C,CAAC,CAAC,OAAO;wBACT,CAAC,CAAC,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;oBACzB,IAAI,UAAU,KAAK,YAAY,EAAE,CAAC;wBAChC,UAAU,CAAC,IAAI,CACb,GAAG,KAAK,cAAc,YAAY,SAAS,UAAU,EAAE,CACxD,CAAC;oBACJ,CAAC;gBACH,CAAC;YACH,CAAC;YAED,MAAM,YAAY,GAChB,cAAc,CAAC,MAAM,GAAG,CAAC;gBACvB,CAAC,CAAC,CAAC,GAAG,aAAa,CAAC,MAAM,GAAG,cAAc,CAAC,MAAM;gBAClD,CAAC,CAAC,CAAC,CAAC;YACR,MAAM,SAAS,GACb,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,CAAC;gBAC3B,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM;gBACnD,CAAC,CAAC,CAAC,CAAC;YACR,MAAM,KAAK,GAAG,CAAC,YAAY,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;YAC7C,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,KAAK,CAAC,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,CAAC;YAErE,OAAO;gBACL,SAAS,EAAE,IAAI,CAAC,IAAI;gBACpB,KAAK;gBACL,eAAe,EAAE,KAAK;gBACtB,MAAM;gBACN,SAAS,EAAE;oBACT,aAAa,CAAC,MAAM,GAAG,CAAC;wBACtB,CAAC,CAAC,YAAY,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;wBACxC,CAAC,CAAC,EAAE;oBACN,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE;oBAC7D,MAAM,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE;iBAC1B;qBACE,MAAM,CAAC,OAAO,CAAC;qBACf,IAAI,CAAC,IAAI,CAAC;gBACb,QAAQ,EAAE,EAAE,aAAa,EAAE,UAAU,EAAE;aACxC,CAAC;QACJ,CAAC;QAAC,MAAM,CAAC;YACP,OAAO;gBACL,SAAS,EAAE,IAAI,CAAC,IAAI;gBACpB,KAAK,EAAE,CAAC;gBACR,eAAe,EAAE,CAAC;gBAClB,MAAM,EAAE,KAAK;gBACb,SAAS,EAAE,WAAW;gBACtB,QAAQ,EAAE,EAAE,KAAK,EAAE,mBAAmB,EAAE;aACzC,CAAC;QACJ,CAAC;IACH,CAAC;CACF;AAED;;GAEG;AACH,MAAM,OAAO,mBAAmB;IAIpB;IAHD,IAAI,GAAG,YAAY,CAAC;IAE7B,YACU,SAGJ,EAAE;QAHE,WAAM,GAAN,MAAM,CAGR;IACL,CAAC;IAEJ,KAAK,CAAC,QAAQ,CAAC,KAAsB;QACnC,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;YACpB,OAAO;gBACL,SAAS,EAAE,IAAI,CAAC,IAAI;gBACpB,KAAK,EAAE,CAAC;gBACR,eAAe,EAAE,CAAC;gBAClB,SAAS,EAAE,kBAAkB;aAC9B,CAAC;QACJ,CAAC;QAED,MAAM,QAAQ,GACZ,IAAI,CAAC,MAAM,CAAC,QAAQ,IAAI,CAAC,CAAC,IAAY,EAAE,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;QAE9E,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;QACrD,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC;QAEzD,MAAM,YAAY,GAAG,IAAI,GAAG,CAC1B,CAAC,GAAG,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CACvD,CAAC;QACF,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,YAAY,EAAE,GAAG,cAAc,CAAC,CAAC,CAAC;QAE5D,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QAElE,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,IAAI;YACpB,KAAK;YACL,eAAe,EAAE,KAAK;YACtB,SAAS,EAAE,eAAe,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,cAAc,YAAY,CAAC,IAAI,IAAI,KAAK,CAAC,IAAI,GAAG;YAClG,QAAQ,EAAE;gBACR,gBAAgB,EAAE,YAAY,CAAC,IAAI;gBACnC,SAAS,EAAE,KAAK,CAAC,IAAI;gBACrB,gBAAgB,EAAE,YAAY,CAAC,IAAI;gBACnC,kBAAkB,EAAE,cAAc,CAAC,IAAI;aACxC;SACF,CAAC;IACJ,CAAC;CACF"}
@@ -0,0 +1,93 @@
1
+ /**
2
+ * LLM Judge Evaluator
3
+ *
4
+ * LLMを使用してテキスト品質を評価する評価器
5
+ *
6
+ * @requirement REQ-EVAL-101
7
+ * @design DES-KATASHIRO-003-EVAL §3.3
8
+ */
9
+ import type { Evaluator, EvaluationInput, EvaluationCriteria, LLMJudgeEvaluatorConfig, LLMJudgeResult } from '../types.js';
10
+ interface LLMProviderLike {
11
+ generate(request: {
12
+ messages: Array<{
13
+ role: string;
14
+ content: string;
15
+ }>;
16
+ temperature?: number;
17
+ responseFormat?: {
18
+ type: string;
19
+ };
20
+ }): Promise<{
21
+ content: string;
22
+ usage?: {
23
+ promptTokens: number;
24
+ completionTokens: number;
25
+ totalTokens: number;
26
+ };
27
+ }>;
28
+ }
29
+ /**
30
+ * デフォルトの評価基準
31
+ */
32
+ export declare const DEFAULT_CRITERIA: EvaluationCriteria[];
33
+ /**
34
+ * デフォルトのシステムプロンプト
35
+ */
36
+ export declare const DEFAULT_SYSTEM_PROMPT = "\u3042\u306A\u305F\u306FLLM\u51FA\u529B\u306E\u54C1\u8CEA\u3092\u8A55\u4FA1\u3059\u308B\u5C02\u9580\u5BB6\u3067\u3059\u3002\n\u4E0E\u3048\u3089\u308C\u305F\u5165\u529B\u3068\u51FA\u529B\u3092\u5206\u6790\u3057\u3001\u6307\u5B9A\u3055\u308C\u305F\u57FA\u6E96\u306B\u57FA\u3065\u3044\u3066\u8A55\u4FA1\u3057\u3066\u304F\u3060\u3055\u3044\u3002\n\u8A55\u4FA1\u306F\u5BA2\u89B3\u7684\u304B\u3064\u4E00\u8CAB\u6027\u306E\u3042\u308B\u57FA\u6E96\u3067\u884C\u3063\u3066\u304F\u3060\u3055\u3044\u3002";
37
+ /**
38
+ * デフォルトの評価プロンプトテンプレート
39
+ */
40
+ export declare const DEFAULT_EVALUATION_PROMPT_TEMPLATE = "\u4EE5\u4E0B\u306E\u5165\u529B\u3068\u51FA\u529B\u3092\u8A55\u4FA1\u3057\u3066\u304F\u3060\u3055\u3044\u3002\n\n## \u5165\u529B\n{{input}}\n\n## \u51FA\u529B\n{{output}}\n\n## \u8A55\u4FA1\u57FA\u6E96\n{{criteria}}\n\n## \u8A55\u4FA1\u5F62\u5F0F\n\u4EE5\u4E0B\u306EJSON\u5F62\u5F0F\u3067\u8A55\u4FA1\u7D50\u679C\u3092\u8FD4\u3057\u3066\u304F\u3060\u3055\u3044:\n{\n \"scores\": {\n \"\u57FA\u6E96\u540D\": {\n \"score\": 1-5\u306E\u30B9\u30B3\u30A2,\n \"reasoning\": \"\u30B9\u30B3\u30A2\u306E\u6839\u62E0\"\n }\n },\n \"overallAssessment\": \"\u5168\u4F53\u7684\u306A\u8A55\u4FA1\u30B3\u30E1\u30F3\u30C8\"\n}";
41
+ /**
42
+ * LLMJudge評価器
43
+ */
44
+ export declare class LLMJudgeEvaluator implements Evaluator {
45
+ readonly name: string;
46
+ private llmProvider;
47
+ private criteria;
48
+ private scale;
49
+ private systemPrompt;
50
+ private evaluationPromptTemplate;
51
+ private maxRetries;
52
+ private temperature;
53
+ private forceJsonOutput;
54
+ constructor(llmProvider: LLMProviderLike, config?: LLMJudgeEvaluatorConfig);
55
+ /**
56
+ * 評価実行
57
+ */
58
+ evaluate(input: EvaluationInput): Promise<LLMJudgeResult>;
59
+ /**
60
+ * 評価プロンプト構築
61
+ */
62
+ private buildEvaluationPrompt;
63
+ /**
64
+ * LLMレスポンスのパース
65
+ */
66
+ private parseResponse;
67
+ /**
68
+ * 旧形式からスコア抽出
69
+ */
70
+ private extractScoresFromLegacy;
71
+ /**
72
+ * テキストからスコア抽出(フォールバック)
73
+ */
74
+ private extractScoresFromText;
75
+ /**
76
+ * 正規化スコア計算
77
+ */
78
+ private calculateNormalizedScore;
79
+ /**
80
+ * 根拠テキスト構築
81
+ */
82
+ private buildReasoning;
83
+ /**
84
+ * 評価基準を取得
85
+ */
86
+ getCriteria(): EvaluationCriteria[];
87
+ /**
88
+ * 評価基準を設定
89
+ */
90
+ setCriteria(criteria: EvaluationCriteria[]): void;
91
+ }
92
+ export {};
93
+ //# sourceMappingURL=LLMJudgeEvaluator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"LLMJudgeEvaluator.d.ts","sourceRoot":"","sources":["../../src/evaluators/LLMJudgeEvaluator.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EACV,SAAS,EACT,eAAe,EACf,kBAAkB,EAClB,uBAAuB,EACvB,cAAc,EACf,MAAM,aAAa,CAAC;AAGrB,UAAU,eAAe;IACvB,QAAQ,CAAC,OAAO,EAAE;QAChB,QAAQ,EAAE,KAAK,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAA;SAAE,CAAC,CAAC;QACnD,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,cAAc,CAAC,EAAE;YAAE,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;KACnC,GAAG,OAAO,CAAC;QACV,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,CAAC,EAAE;YACN,YAAY,EAAE,MAAM,CAAC;YACrB,gBAAgB,EAAE,MAAM,CAAC;YACzB,WAAW,EAAE,MAAM,CAAC;SACrB,CAAC;KACH,CAAC,CAAC;CACJ;AAED;;GAEG;AACH,eAAO,MAAM,gBAAgB,EAAE,kBAAkB,EAqChD,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,qBAAqB,kfAER,CAAC;AAE3B;;GAEG;AACH,eAAO,MAAM,kCAAkC,ynBAqB7C,CAAC;AAEH;;GAEG;AACH,qBAAa,iBAAkB,YAAW,SAAS;IACjD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,WAAW,CAAkB;IACrC,OAAO,CAAC,QAAQ,CAAuB;IACvC,OAAO,CAAC,KAAK,CAA+B;IAC5C,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,wBAAwB,CAAS;IACzC,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,eAAe,CAAU;gBAG/B,WAAW,EAAE,eAAe,EAC5B,MAAM,GAAE,uBAAwD;IAclE;;OAEG;IACG,QAAQ,CAAC,KAAK,EAAE,eAAe,GAAG,OAAO,CAAC,cAAc,CAAC;IA0D/D;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAqB7B;;OAEG;IACH,OAAO,CAAC,aAAa;IAuBrB;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAuB/B;;OAEG;IACH,OAAO,CAAC,qBAAqB;IA8B7B;;OAEG;IACH,OAAO,CAAC,wBAAwB;IAqBhC;;OAEG;IACH,OAAO,CAAC,cAAc;IActB;;OAEG;IACH,WAAW,IAAI,kBAAkB,EAAE;IAInC;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,kBAAkB,EAAE,GAAG,IAAI;CAGlD"}
@@ -0,0 +1,296 @@
1
+ /**
2
+ * LLM Judge Evaluator
3
+ *
4
+ * LLMを使用してテキスト品質を評価する評価器
5
+ *
6
+ * @requirement REQ-EVAL-101
7
+ * @design DES-KATASHIRO-003-EVAL §3.3
8
+ */
9
+ /**
10
+ * デフォルトの評価基準
11
+ */
12
+ export const DEFAULT_CRITERIA = [
13
+ {
14
+ name: 'relevance',
15
+ description: '回答が質問や入力に対して関連性があるか',
16
+ scale: { min: 1, max: 5 },
17
+ rubric: {
18
+ 1: '全く関連がない',
19
+ 2: '部分的に関連があるが、主要な点を外している',
20
+ 3: '概ね関連があるが、改善の余地がある',
21
+ 4: '非常に関連性が高い',
22
+ 5: '完全に関連性があり、質問に的確に答えている',
23
+ },
24
+ },
25
+ {
26
+ name: 'coherence',
27
+ description: '回答が論理的で一貫性があるか',
28
+ scale: { min: 1, max: 5 },
29
+ rubric: {
30
+ 1: '論理的でなく、理解困難',
31
+ 2: '部分的に論理的だが、矛盾がある',
32
+ 3: '概ね論理的だが、一部不明瞭な部分がある',
33
+ 4: '論理的で理解しやすい',
34
+ 5: '非常に論理的で明確、流れが自然',
35
+ },
36
+ },
37
+ {
38
+ name: 'helpfulness',
39
+ description: '回答がユーザーにとって有用か',
40
+ scale: { min: 1, max: 5 },
41
+ rubric: {
42
+ 1: '全く役に立たない',
43
+ 2: '限定的に役立つが、不十分',
44
+ 3: '概ね役立つ',
45
+ 4: '非常に役立つ',
46
+ 5: '極めて有用で、期待以上の価値がある',
47
+ },
48
+ },
49
+ ];
50
+ /**
51
+ * デフォルトのシステムプロンプト
52
+ */
53
+ export const DEFAULT_SYSTEM_PROMPT = `あなたはLLM出力の品質を評価する専門家です。
54
+ 与えられた入力と出力を分析し、指定された基準に基づいて評価してください。
55
+ 評価は客観的かつ一貫性のある基準で行ってください。`;
56
+ /**
57
+ * デフォルトの評価プロンプトテンプレート
58
+ */
59
+ export const DEFAULT_EVALUATION_PROMPT_TEMPLATE = `以下の入力と出力を評価してください。
60
+
61
+ ## 入力
62
+ {{input}}
63
+
64
+ ## 出力
65
+ {{output}}
66
+
67
+ ## 評価基準
68
+ {{criteria}}
69
+
70
+ ## 評価形式
71
+ 以下のJSON形式で評価結果を返してください:
72
+ {
73
+ "scores": {
74
+ "基準名": {
75
+ "score": 1-5のスコア,
76
+ "reasoning": "スコアの根拠"
77
+ }
78
+ },
79
+ "overallAssessment": "全体的な評価コメント"
80
+ }`;
81
+ /**
82
+ * LLMJudge評価器
83
+ */
84
+ export class LLMJudgeEvaluator {
85
+ name;
86
+ llmProvider;
87
+ criteria;
88
+ scale;
89
+ systemPrompt;
90
+ evaluationPromptTemplate;
91
+ maxRetries;
92
+ temperature;
93
+ forceJsonOutput;
94
+ constructor(llmProvider, config = { criteria: DEFAULT_CRITERIA }) {
95
+ this.llmProvider = llmProvider;
96
+ this.name = config.name ?? 'llm-judge';
97
+ this.criteria = config.criteria && config.criteria.length > 0 ? config.criteria : DEFAULT_CRITERIA;
98
+ this.scale = config.scale ?? { min: 1, max: 5 };
99
+ this.systemPrompt = config.systemPrompt ?? DEFAULT_SYSTEM_PROMPT;
100
+ this.evaluationPromptTemplate =
101
+ config.evaluationPromptTemplate ?? DEFAULT_EVALUATION_PROMPT_TEMPLATE;
102
+ this.maxRetries = config.maxRetries ?? 3;
103
+ this.temperature = config.temperature ?? 0.1;
104
+ this.forceJsonOutput = config.forceJsonOutput ?? true;
105
+ }
106
+ /**
107
+ * 評価実行
108
+ */
109
+ async evaluate(input) {
110
+ const prompt = this.buildEvaluationPrompt(input);
111
+ let lastError = null;
112
+ for (let attempt = 0; attempt < this.maxRetries; attempt++) {
113
+ try {
114
+ const response = await this.llmProvider.generate({
115
+ messages: [
116
+ { role: 'system', content: this.systemPrompt },
117
+ { role: 'user', content: prompt },
118
+ ],
119
+ temperature: this.temperature,
120
+ ...(this.forceJsonOutput && { responseFormat: { type: 'json_object' } }),
121
+ });
122
+ const parsed = this.parseResponse(response.content);
123
+ const normalizedScore = this.calculateNormalizedScore(parsed.scores);
124
+ return {
125
+ evaluator: this.name,
126
+ score: normalizedScore,
127
+ normalizedScore,
128
+ passed: normalizedScore >= 0.6,
129
+ reasoning: parsed.overallAssessment ?? this.buildReasoning(parsed.scores),
130
+ criteriaScores: parsed.scores,
131
+ rawLLMOutput: response.content,
132
+ tokenUsage: response.usage,
133
+ metadata: {
134
+ criteria: this.criteria.map((c) => c.name),
135
+ scale: this.scale,
136
+ attempt: attempt + 1,
137
+ },
138
+ };
139
+ }
140
+ catch (error) {
141
+ lastError = error instanceof Error ? error : new Error(String(error));
142
+ // 最後の試行でなければ継続
143
+ if (attempt < this.maxRetries - 1) {
144
+ continue;
145
+ }
146
+ }
147
+ }
148
+ // 全リトライ失敗
149
+ return {
150
+ evaluator: this.name,
151
+ score: 0,
152
+ normalizedScore: 0,
153
+ passed: false,
154
+ reasoning: `評価に失敗しました: ${lastError?.message ?? 'Unknown error'}`,
155
+ criteriaScores: {},
156
+ rawLLMOutput: undefined,
157
+ metadata: {
158
+ error: lastError?.message,
159
+ retries: this.maxRetries,
160
+ },
161
+ };
162
+ }
163
+ /**
164
+ * 評価プロンプト構築
165
+ */
166
+ buildEvaluationPrompt(input) {
167
+ const criteriaText = this.criteria
168
+ .map((c) => {
169
+ let text = `### ${c.name}\n${c.description}`;
170
+ if (c.rubric) {
171
+ text += '\n評価基準:';
172
+ for (const [score, desc] of Object.entries(c.rubric)) {
173
+ text += `\n ${score}: ${desc}`;
174
+ }
175
+ }
176
+ return text;
177
+ })
178
+ .join('\n\n');
179
+ return this.evaluationPromptTemplate
180
+ .replace('{{input}}', input.input ?? '(入力なし)')
181
+ .replace('{{output}}', input.output)
182
+ .replace('{{criteria}}', criteriaText)
183
+ .replace('{{expected}}', input.expected ?? '(期待出力なし)');
184
+ }
185
+ /**
186
+ * LLMレスポンスのパース
187
+ */
188
+ parseResponse(content) {
189
+ // JSONを抽出(コードブロック内の場合も対応)
190
+ let jsonStr = content;
191
+ const jsonMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/);
192
+ if (jsonMatch) {
193
+ jsonStr = jsonMatch[1].trim();
194
+ }
195
+ try {
196
+ const parsed = JSON.parse(jsonStr);
197
+ return {
198
+ scores: parsed.scores ?? this.extractScoresFromLegacy(parsed),
199
+ overallAssessment: parsed.overallAssessment,
200
+ };
201
+ }
202
+ catch {
203
+ // JSONパース失敗時はテキストから抽出を試みる
204
+ return this.extractScoresFromText(content);
205
+ }
206
+ }
207
+ /**
208
+ * 旧形式からスコア抽出
209
+ */
210
+ extractScoresFromLegacy(parsed) {
211
+ const scores = {};
212
+ for (const criterion of this.criteria) {
213
+ const key = criterion.name;
214
+ const value = parsed[key];
215
+ if (typeof value === 'number') {
216
+ scores[key] = { score: value, reasoning: '' };
217
+ }
218
+ else if (typeof value === 'object' && value !== null) {
219
+ const obj = value;
220
+ scores[key] = {
221
+ score: typeof obj.score === 'number' ? obj.score : this.scale.min,
222
+ reasoning: typeof obj.reasoning === 'string' ? obj.reasoning : '',
223
+ };
224
+ }
225
+ }
226
+ return scores;
227
+ }
228
+ /**
229
+ * テキストからスコア抽出(フォールバック)
230
+ */
231
+ extractScoresFromText(text) {
232
+ const scores = {};
233
+ for (const criterion of this.criteria) {
234
+ // パターン: "基準名: 4" や "基準名: 4/5"
235
+ const pattern = new RegExp(`${criterion.name}[:\\s]+(\\d+)(?:\\/\\d+)?`, 'i');
236
+ const match = text.match(pattern);
237
+ if (match) {
238
+ scores[criterion.name] = {
239
+ score: parseInt(match[1], 10),
240
+ reasoning: '',
241
+ };
242
+ }
243
+ else {
244
+ // スコアが見つからない場合は中央値
245
+ scores[criterion.name] = {
246
+ score: Math.round((this.scale.min + this.scale.max) / 2),
247
+ reasoning: 'スコアを抽出できませんでした',
248
+ };
249
+ }
250
+ }
251
+ return { scores };
252
+ }
253
+ /**
254
+ * 正規化スコア計算
255
+ */
256
+ calculateNormalizedScore(scores) {
257
+ const criteriaMap = new Map(this.criteria.map((c) => [c.name, c]));
258
+ let weightedSum = 0;
259
+ let totalWeight = 0;
260
+ for (const [name, result] of Object.entries(scores)) {
261
+ const criterion = criteriaMap.get(name);
262
+ const weight = criterion?.weight ?? 1;
263
+ const scale = criterion?.scale ?? this.scale;
264
+ // スコアを0-1に正規化
265
+ const normalized = (result.score - scale.min) / (scale.max - scale.min);
266
+ weightedSum += normalized * weight;
267
+ totalWeight += weight;
268
+ }
269
+ return totalWeight > 0 ? weightedSum / totalWeight : 0;
270
+ }
271
+ /**
272
+ * 根拠テキスト構築
273
+ */
274
+ buildReasoning(scores) {
275
+ const parts = [];
276
+ for (const [name, result] of Object.entries(scores)) {
277
+ const criterion = this.criteria.find((c) => c.name === name);
278
+ const maxScore = criterion?.scale?.max ?? this.scale.max;
279
+ parts.push(`${name}: ${result.score}/${maxScore}${result.reasoning ? ` - ${result.reasoning}` : ''}`);
280
+ }
281
+ return parts.join('; ');
282
+ }
283
+ /**
284
+ * 評価基準を取得
285
+ */
286
+ getCriteria() {
287
+ return [...this.criteria];
288
+ }
289
+ /**
290
+ * 評価基準を設定
291
+ */
292
+ setCriteria(criteria) {
293
+ this.criteria = criteria;
294
+ }
295
+ }
296
+ //# sourceMappingURL=LLMJudgeEvaluator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"LLMJudgeEvaluator.js","sourceRoot":"","sources":["../../src/evaluators/LLMJudgeEvaluator.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AA0BH;;GAEG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAyB;IACpD;QACE,IAAI,EAAE,WAAW;QACjB,WAAW,EAAE,qBAAqB;QAClC,KAAK,EAAE,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE;QACzB,MAAM,EAAE;YACN,CAAC,EAAE,SAAS;YACZ,CAAC,EAAE,uBAAuB;YAC1B,CAAC,EAAE,mBAAmB;YACtB,CAAC,EAAE,WAAW;YACd,CAAC,EAAE,uBAAuB;SAC3B;KACF;IACD;QACE,IAAI,EAAE,WAAW;QACjB,WAAW,EAAE,gBAAgB;QAC7B,KAAK,EAAE,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE;QACzB,MAAM,EAAE;YACN,CAAC,EAAE,aAAa;YAChB,CAAC,EAAE,iBAAiB;YACpB,CAAC,EAAE,qBAAqB;YACxB,CAAC,EAAE,YAAY;YACf,CAAC,EAAE,iBAAiB;SACrB;KACF;IACD;QACE,IAAI,EAAE,aAAa;QACnB,WAAW,EAAE,gBAAgB;QAC7B,KAAK,EAAE,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE;QACzB,MAAM,EAAE;YACN,CAAC,EAAE,UAAU;YACb,CAAC,EAAE,cAAc;YACjB,CAAC,EAAE,OAAO;YACV,CAAC,EAAE,QAAQ;YACX,CAAC,EAAE,mBAAmB;SACvB;KACF;CACF,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAG;;0BAEX,CAAC;AAE3B;;GAEG;AACH,MAAM,CAAC,MAAM,kCAAkC,GAAG;;;;;;;;;;;;;;;;;;;;;EAqBhD,CAAC;AAEH;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACnB,IAAI,CAAS;IACd,WAAW,CAAkB;IAC7B,QAAQ,CAAuB;IAC/B,KAAK,CAA+B;IACpC,YAAY,CAAS;IACrB,wBAAwB,CAAS;IACjC,UAAU,CAAS;IACnB,WAAW,CAAS;IACpB,eAAe,CAAU;IAEjC,YACE,WAA4B,EAC5B,SAAkC,EAAE,QAAQ,EAAE,gBAAgB,EAAE;QAEhE,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,IAAI,GAAG,MAAM,CAAC,IAAI,IAAI,WAAW,CAAC;QACvC,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,gBAAgB,CAAC;QACnG,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC;QAChD,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,qBAAqB,CAAC;QACjE,IAAI,CAAC,wBAAwB;YAC3B,MAAM,CAAC,wBAAwB,IAAI,kCAAkC,CAAC;QACxE,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;QACzC,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,IAAI,GAAG,CAAC;QAC7C,IAAI,CAAC,eAAe,GAAG,MAAM,CAAC,eAAe,IAAI,IAAI,CAAC;IACxD,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CAAC,KAAsB;QACnC,MAAM,MAAM,GAAG,IAAI,CAAC,qBAAqB,CAAC,KAAK,CAAC,CAAC;QAEjD,IAAI,SAAS,GAAiB,IAAI,CAAC;QACnC,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,IAAI,CAAC,UAAU,EAAE,OAAO,EAAE,EAAE,CAAC;YAC3D,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC;oBAC/C,QAAQ,EAAE;wBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC,YAAY,EAAE;wBAC9C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE;qBAClC;oBACD,WAAW,EAAE,IAAI,CAAC,WAAW;oBAC7B,GAAG,CAAC,IAAI,CAAC,eAAe,IAAI,EAAE,cAAc,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE,EAAE,CAAC;iBACzE,CAAC,CAAC;gBAEH,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;gBACpD,MAAM,eAAe,GAAG,IAAI,CAAC,wBAAwB,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;gBAErE,OAAO;oBACL,SAAS,EAAE,IAAI,CAAC,IAAI;oBACpB,KAAK,EAAE,eAAe;oBACtB,eAAe;oBACf,MAAM,EAAE,eAAe,IAAI,GAAG;oBAC9B,SAAS,EAAE,MAAM,CAAC,iBAAiB,IAAI,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,MAAM,CAAC;oBACzE,cAAc,EAAE,MAAM,CAAC,MAAM;oBAC7B,YAAY,EAAE,QAAQ,CAAC,OAAO;oBAC9B,UAAU,EAAE,QAAQ,CAAC,KAAK;oBAC1B,QAAQ,EAAE;wBACR,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;wBAC1C,KAAK,EAAE,IAAI,CAAC,KAAK;wBACjB,OAAO,EAAE,OAAO,GAAG,CAAC;qBACrB;iBACF,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,SAAS,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;gBACtE,eAAe;gBACf,IAAI,OAAO,GAAG,IAAI,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC;oBAClC,SAAS;gBACX,CAAC;YACH,CAAC;QACH,CAAC;QAED,UAAU;QACV,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,IAAI;YACpB,KAAK,EAAE,CAAC;YACR,eAAe,EAAE,CAAC;YAClB,MAAM,EAAE,KAAK;YACb,SAAS,EAAE,cAAc,SAAS,EAAE,OAAO,IAAI,eAAe,EAAE;YAChE,cAAc,EAAE,EAAE;YAClB,YAAY,EAAE,SAAS;YACvB,QAAQ,EAAE;gBACR,KAAK,EAAE,SAAS,EAAE,OAAO;gBACzB,OAAO,EAAE,IAAI,CAAC,UAAU;aACzB;SACF,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,qBAAqB,CAAC,KAAsB;QAClD,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ;aAC/B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;YACT,IAAI,IAAI,GAAG,OAAO,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;YAC7C,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC;gBACb,IAAI,IAAI,SAAS,CAAC;gBAClB,KAAK,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;oBACrD,IAAI,IAAI,OAAO,KAAK,KAAK,IAAI,EAAE,CAAC;gBAClC,CAAC;YACH,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC;aACD,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,OAAO,IAAI,CAAC,wBAAwB;aACjC,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC,KAAK,IAAI,QAAQ,CAAC;aAC7C,OAAO,CAAC,YAAY,EAAE,KAAK,CAAC,MAAM,CAAC;aACnC,OAAO,CAAC,cAAc,EAAE,YAAY,CAAC;aACrC,OAAO,CAAC,cAAc,EAAE,KAAK,CAAC,QAAQ,IAAI,UAAU,CAAC,CAAC;IAC3D,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,OAAe;QAInC,0BAA0B;QAC1B,IAAI,OAAO,GAAG,OAAO,CAAC;QACtB,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;QAChE,IAAI,SAAS,EAAE,CAAC;YACd,OAAO,GAAG,SAAS,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC;QACjC,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACnC,OAAO;gBACL,MAAM,EAAE,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC,uBAAuB,CAAC,MAAM,CAAC;gBAC7D,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;aAC5C,CAAC;QACJ,CAAC;QAAC,MAAM,CAAC;YACP,0BAA0B;YAC1B,OAAO,IAAI,CAAC,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;IAED;;OAEG;IACK,uBAAuB,CAC7B,MAA+B;QAE/B,MAAM,MAAM,GAAyD,EAAE,CAAC;QAExE,KAAK,MAAM,SAAS,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACtC,MAAM,GAAG,GAAG,SAAS,CAAC,IAAI,CAAC;YAC3B,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;YAE1B,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;gBAC9B,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,EAAE,KAAK,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC;YAChD,CAAC;iBAAM,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBACvD,MAAM,GAAG,GAAG,KAAgC,CAAC;gBAC7C,MAAM,CAAC,GAAG,CAAC,GAAG;oBACZ,KAAK,EAAE,OAAO,GAAG,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG;oBACjE,SAAS,EAAE,OAAO,GAAG,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE;iBAClE,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,qBAAqB,CAC3B,IAAY;QAEZ,MAAM,MAAM,GAAyD,EAAE,CAAC;QAExE,KAAK,MAAM,SAAS,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACtC,8BAA8B;YAC9B,MAAM,OAAO,GAAG,IAAI,MAAM,CACxB,GAAG,SAAS,CAAC,IAAI,2BAA2B,EAC5C,GAAG,CACJ,CAAC;YACF,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAElC,IAAI,KAAK,EAAE,CAAC;gBACV,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG;oBACvB,KAAK,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAE,EAAE,EAAE,CAAC;oBAC9B,SAAS,EAAE,EAAE;iBACd,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,mBAAmB;gBACnB,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG;oBACvB,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;oBACxD,SAAS,EAAE,gBAAgB;iBAC5B,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,EAAE,MAAM,EAAE,CAAC;IACpB,CAAC;IAED;;OAEG;IACK,wBAAwB,CAC9B,MAA4D;QAE5D,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACnE,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,IAAI,WAAW,GAAG,CAAC,CAAC;QAEpB,KAAK,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YACpD,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACxC,MAAM,MAAM,GAAG,SAAS,EAAE,MAAM,IAAI,CAAC,CAAC;YACtC,MAAM,KAAK,GAAG,SAAS,EAAE,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC;YAE7C,cAAc;YACd,MAAM,UAAU,GAAG,CAAC,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC;YACxE,WAAW,IAAI,UAAU,GAAG,MAAM,CAAC;YACnC,WAAW,IAAI,MAAM,CAAC;QACxB,CAAC;QAED,OAAO,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;IACzD,CAAC;IAED;;OAEG;IACK,cAAc,CACpB,MAA4D;QAE5D,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,KAAK,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YACpD,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;YAC7D,MAAM,QAAQ,GAAG,SAAS,EAAE,KAAK,EAAE,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC;YACzD,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,KAAK,MAAM,CAAC,KAAK,IAAI,QAAQ,GAAG,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACxG,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAED;;OAEG;IACH,WAAW;QACT,OAAO,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,WAAW,CAAC,QAA8B;QACxC,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC3B,CAAC;CACF"}