@nahisaho/katashiro-evaluation 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/LICENSE +21 -0
  2. package/dist/BenchmarkSuite.d.ts +63 -0
  3. package/dist/BenchmarkSuite.d.ts.map +1 -0
  4. package/dist/BenchmarkSuite.js +152 -0
  5. package/dist/BenchmarkSuite.js.map +1 -0
  6. package/dist/DatasetManager.d.ts +68 -0
  7. package/dist/DatasetManager.d.ts.map +1 -0
  8. package/dist/DatasetManager.js +161 -0
  9. package/dist/DatasetManager.js.map +1 -0
  10. package/dist/ExperimentRunner.d.ts +51 -0
  11. package/dist/ExperimentRunner.d.ts.map +1 -0
  12. package/dist/ExperimentRunner.js +170 -0
  13. package/dist/ExperimentRunner.js.map +1 -0
  14. package/dist/evaluators/CompositeEvaluator.d.ts +66 -0
  15. package/dist/evaluators/CompositeEvaluator.d.ts.map +1 -0
  16. package/dist/evaluators/CompositeEvaluator.js +122 -0
  17. package/dist/evaluators/CompositeEvaluator.js.map +1 -0
  18. package/dist/evaluators/HeuristicEvaluator.d.ts +82 -0
  19. package/dist/evaluators/HeuristicEvaluator.d.ts.map +1 -0
  20. package/dist/evaluators/HeuristicEvaluator.js +233 -0
  21. package/dist/evaluators/HeuristicEvaluator.js.map +1 -0
  22. package/dist/evaluators/LLMJudgeEvaluator.d.ts +93 -0
  23. package/dist/evaluators/LLMJudgeEvaluator.d.ts.map +1 -0
  24. package/dist/evaluators/LLMJudgeEvaluator.js +296 -0
  25. package/dist/evaluators/LLMJudgeEvaluator.js.map +1 -0
  26. package/dist/evaluators/RAGASEvaluators.d.ts +128 -0
  27. package/dist/evaluators/RAGASEvaluators.d.ts.map +1 -0
  28. package/dist/evaluators/RAGASEvaluators.js +521 -0
  29. package/dist/evaluators/RAGASEvaluators.js.map +1 -0
  30. package/dist/evaluators/index.d.ts +13 -0
  31. package/dist/evaluators/index.d.ts.map +1 -0
  32. package/dist/evaluators/index.js +12 -0
  33. package/dist/evaluators/index.js.map +1 -0
  34. package/dist/index.d.ts +20 -0
  35. package/dist/index.d.ts.map +1 -0
  36. package/dist/index.js +24 -0
  37. package/dist/index.js.map +1 -0
  38. package/dist/reporting/EvaluationReporter.d.ts +135 -0
  39. package/dist/reporting/EvaluationReporter.d.ts.map +1 -0
  40. package/dist/reporting/EvaluationReporter.js +285 -0
  41. package/dist/reporting/EvaluationReporter.js.map +1 -0
  42. package/dist/reporting/index.d.ts +8 -0
  43. package/dist/reporting/index.d.ts.map +1 -0
  44. package/dist/reporting/index.js +8 -0
  45. package/dist/reporting/index.js.map +1 -0
  46. package/dist/reporting/templates.d.ts +91 -0
  47. package/dist/reporting/templates.d.ts.map +1 -0
  48. package/dist/reporting/templates.js +150 -0
  49. package/dist/reporting/templates.js.map +1 -0
  50. package/dist/types.d.ts +408 -0
  51. package/dist/types.d.ts.map +1 -0
  52. package/dist/types.js +8 -0
  53. package/dist/types.js.map +1 -0
  54. package/package.json +47 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"templates.d.ts","sourceRoot":"","sources":["../../src/reporting/templates.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAEjE;;GAEG;AACH,MAAM,MAAM,gBAAgB,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,EAAE,QAAQ,EAAE,IAAI,GAAG,IAAI,KAAK,MAAM,CAAC;AAE7E;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,KAAK,EAAE,iBAAiB,CAAC;IACzB,aAAa,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,gBAAgB,CAAC;IACzB,MAAM,EAAE,OAAO,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,MAAM,EAAE,gBAAgB,CAAC,kBAAkB,CAAC,CAAC;IAC7C,OAAO,EAAE,gBAAgB,CAAC,mBAAmB,CAAC,CAAC;IAC/C,SAAS,EAAE,gBAAgB,CAAC,qBAAqB,CAAC,CAAC;IACnD,MAAM,EAAE,gBAAgB,CAAC;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACtE;AAED;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,gBAAgB,CAAC,kBAAkB,CAoBtE,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,sBAAsB,EAAE,gBAAgB,CAAC,mBAAmB,CAiCxE,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,wBAAwB,EAAE,gBAAgB,CAAC,qBAAqB,CAM5E,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,gBAAgB,CAAC;IACnD,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,CAqBA,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,gBAAgB,EAAE,kBAK9B,CAAC;AAEF;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,MAAM,CAIrE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,MAAM,EACb,UAAU;;;CAA2B,GACpC,MAAM,CAQR;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CACrC,QAAQ,EAAE,iBAAiB,EAC3B,OAAO,EAAE,iBAAiB,EAC1B,QAAQ,GAAE,IAAI,GAAG,IAAW,GAC3B,MAAM,CA4BR;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,gBAAgB,EAAE,EAC3B,OAAO,SAAK,GACX,MAAM,CA6BR"}
@@ -0,0 +1,150 @@
1
+ /**
2
+ * Markdown Templates for Evaluation Reports
3
+ *
4
+ * レポートテンプレートのカスタマイズ用モジュール
5
+ *
6
+ * @requirement REQ-EVAL-103
7
+ */
8
+ /**
9
+ * デフォルトヘッダーテンプレート
10
+ */
11
+ export const defaultHeaderTemplate = (data, language) => {
12
+ const lines = [`# ${data.title}`];
13
+ if (data.description) {
14
+ lines.push('', data.description);
15
+ }
16
+ if (data.datasetName) {
17
+ lines.push('', language === 'ja'
18
+ ? `**データセット**: ${data.datasetName}`
19
+ : `**Dataset**: ${data.datasetName}`);
20
+ }
21
+ return lines.join('\n');
22
+ };
23
+ /**
24
+ * デフォルトサマリーテンプレート
25
+ */
26
+ export const defaultSummaryTemplate = (data, language) => {
27
+ const { stats } = data;
28
+ const isJa = language === 'ja';
29
+ const lines = [];
30
+ lines.push(isJa ? '## 📊 サマリー' : '## 📊 Summary');
31
+ lines.push('');
32
+ // プログレスバー生成
33
+ const progressBar = generateProgressBar(stats.passRate);
34
+ lines.push(`${progressBar} ${(stats.passRate * 100).toFixed(1)}%`);
35
+ lines.push('');
36
+ lines.push(isJa ? '| 指標 | 値 |' : '| Metric | Value |');
37
+ lines.push('|------|-----|');
38
+ lines.push(isJa ? `| 総件数 | ${stats.total} |` : `| Total | ${stats.total} |`);
39
+ lines.push(isJa
40
+ ? `| 合格 / 不合格 | ${stats.passed} / ${stats.failed} |`
41
+ : `| Passed / Failed | ${stats.passed} / ${stats.failed} |`);
42
+ lines.push(isJa
43
+ ? `| 平均スコア | ${stats.avgScore.toFixed(3)} |`
44
+ : `| Avg Score | ${stats.avgScore.toFixed(3)} |`);
45
+ return lines.join('\n');
46
+ };
47
+ /**
48
+ * デフォルト結果行テンプレート
49
+ */
50
+ export const defaultResultRowTemplate = (data) => {
51
+ const { index, result, passed } = data;
52
+ const statusIcon = passed ? '✅' : '❌';
53
+ return `| ${index} | ${result.evaluator} | ${result.score.toFixed(3)} | ${result.normalizedScore.toFixed(3)} | ${statusIcon} |`;
54
+ };
55
+ /**
56
+ * デフォルトフッターテンプレート
57
+ */
58
+ export const defaultFooterTemplate = (data, language) => {
59
+ const isJa = language === 'ja';
60
+ const lines = ['---', ''];
61
+ lines.push(isJa
62
+ ? `_生成日時: ${data.timestamp}_`
63
+ : `_Generated at: ${data.timestamp}_`);
64
+ if (data.durationMs !== undefined) {
65
+ lines.push(isJa
66
+ ? `_実行時間: ${data.durationMs}ms_`
67
+ : `_Duration: ${data.durationMs}ms_`);
68
+ }
69
+ lines.push('', '_Powered by KATASHIRO Evaluation Framework_');
70
+ return lines.join('\n');
71
+ };
72
+ /**
73
+ * デフォルトテンプレートコレクション
74
+ */
75
+ export const defaultTemplates = {
76
+ header: defaultHeaderTemplate,
77
+ summary: defaultSummaryTemplate,
78
+ resultRow: defaultResultRowTemplate,
79
+ footer: defaultFooterTemplate,
80
+ };
81
+ /**
82
+ * プログレスバー生成
83
+ */
84
+ export function generateProgressBar(ratio, width = 20) {
85
+ const filled = Math.round(ratio * width);
86
+ const empty = width - filled;
87
+ return `[${'█'.repeat(filled)}${'░'.repeat(empty)}]`;
88
+ }
89
+ /**
90
+ * スコアバッジ生成
91
+ */
92
+ export function generateScoreBadge(score, thresholds = { good: 0.8, fair: 0.5 }) {
93
+ if (score >= thresholds.good) {
94
+ return `![Score](https://img.shields.io/badge/score-${(score * 100).toFixed(0)}%25-brightgreen)`;
95
+ }
96
+ else if (score >= thresholds.fair) {
97
+ return `![Score](https://img.shields.io/badge/score-${(score * 100).toFixed(0)}%25-yellow)`;
98
+ }
99
+ else {
100
+ return `![Score](https://img.shields.io/badge/score-${(score * 100).toFixed(0)}%25-red)`;
101
+ }
102
+ }
103
+ /**
104
+ * 比較テーブル生成
105
+ */
106
+ export function generateComparisonTable(baseline, current, language = 'en') {
107
+ const isJa = language === 'ja';
108
+ const lines = [];
109
+ lines.push(isJa ? '## 📈 比較' : '## 📈 Comparison');
110
+ lines.push('');
111
+ lines.push(isJa
112
+ ? '| 指標 | ベースライン | 今回 | 差分 |'
113
+ : '| Metric | Baseline | Current | Diff |');
114
+ lines.push('|------|------------|-------|------|');
115
+ const formatDiff = (diff) => {
116
+ if (diff > 0)
117
+ return `+${diff.toFixed(3)} 📈`;
118
+ if (diff < 0)
119
+ return `${diff.toFixed(3)} 📉`;
120
+ return '0.000 ➡️';
121
+ };
122
+ lines.push(`| ${isJa ? '合格率' : 'Pass Rate'} | ${(baseline.passRate * 100).toFixed(1)}% | ${(current.passRate * 100).toFixed(1)}% | ${formatDiff((current.passRate - baseline.passRate) * 100)}% |`);
123
+ lines.push(`| ${isJa ? '平均スコア' : 'Avg Score'} | ${baseline.avgScore.toFixed(3)} | ${current.avgScore.toFixed(3)} | ${formatDiff(current.avgScore - baseline.avgScore)} |`);
124
+ return lines.join('\n');
125
+ }
126
+ /**
127
+ * ヒートマップ生成(テキストベース)
128
+ */
129
+ export function generateScoreHeatmap(results, buckets = 10) {
130
+ const lines = [];
131
+ // スコア分布を計算
132
+ const distribution = new Array(buckets).fill(0);
133
+ for (const result of results) {
134
+ const bucket = Math.min(Math.floor(result.normalizedScore * buckets), buckets - 1);
135
+ distribution[bucket]++;
136
+ }
137
+ const max = Math.max(...distribution);
138
+ lines.push('```');
139
+ lines.push('Score Distribution:');
140
+ lines.push('');
141
+ for (let i = buckets - 1; i >= 0; i--) {
142
+ const label = `${((i / buckets) * 100).toFixed(0).padStart(3)}%-${(((i + 1) / buckets) * 100).toFixed(0).padStart(3)}%`;
143
+ const bar = '█'.repeat(Math.round((distribution[i] / max) * 20));
144
+ const count = distribution[i];
145
+ lines.push(`${label} | ${bar} (${count})`);
146
+ }
147
+ lines.push('```');
148
+ return lines.join('\n');
149
+ }
150
+ //# sourceMappingURL=templates.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"templates.js","sourceRoot":"","sources":["../../src/reporting/templates.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AA8CH;;GAEG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAyC,CACzE,IAAI,EACJ,QAAQ,EACR,EAAE;IACF,MAAM,KAAK,GAAa,CAAC,KAAK,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;IAE5C,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;QACrB,KAAK,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;IACnC,CAAC;IAED,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;QACrB,KAAK,CAAC,IAAI,CACR,EAAE,EACF,QAAQ,KAAK,IAAI;YACf,CAAC,CAAC,eAAe,IAAI,CAAC,WAAW,EAAE;YACnC,CAAC,CAAC,gBAAgB,IAAI,CAAC,WAAW,EAAE,CACvC,CAAC;IACJ,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAA0C,CAC3E,IAAI,EACJ,QAAQ,EACR,EAAE;IACF,MAAM,EAAE,KAAK,EAAE,GAAG,IAAI,CAAC;IACvB,MAAM,IAAI,GAAG,QAAQ,KAAK,IAAI,CAAC;IAC/B,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;IAClD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,YAAY;IACZ,MAAM,WAAW,GAAG,mBAAmB,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IACxD,KAAK,CAAC,IAAI,CAAC,GAAG,WAAW,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACnE,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC;IACvD,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IAC7B,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,CAAC,CAAC,WAAW,KAAK,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,aAAa,KAAK,CAAC,KAAK,IAAI,CACjE,CAAC;IACF,KAAK,CAAC,IAAI,CACR,IAAI;QACF,CAAC,CAAC,gBAAgB,KAAK,CAAC,MAAM,MAAM,KAAK,CAAC,MAAM,IAAI;QACpD,CAAC,CAAC,uBAAuB,KAAK,CAAC,MAAM,MAAM,KAAK,CAAC,MAAM,IAAI,CAC9D,CAAC;IACF,KAAK,CAAC,IAAI,CACR,IAAI;QACF,CAAC,CAAC,aAAa,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;QAC5C,CAAC,CAAC,iBAAiB,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CACnD,CAAC;IAEF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,wBAAwB,GAA4C,CAC/E,IAAI,EACJ,EAAE;IACF,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IACvC,MAAM,UAAU,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IACtC,OAAO,KAAK,KAAK,MAAM,MAAM,CAAC,SAAS,MAAM,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,UAAU,IAAI,CAAC;AAClI,CAAC,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAG7B,CAAC,IAAI,EAAE,QAAQ,EAAE,EAAE;IACtB,MAAM,IAAI,GAAG,QAAQ,KAAK,IAAI,CAAC;IAC/B,MAAM,KAAK,GAAG,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAE1B,KAAK,CAAC,IAAI,CACR,IAAI;QACF,CAAC,CAAC,UAAU,IAAI,CAAC,SAAS,GAAG;QAC7B,CAAC,CAAC,kBAAkB,IAAI,CAAC,SAAS,GAAG,CACxC,CAAC;IAEF,IAAI,IAAI,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;QAClC,KAAK,CAAC,IAAI,CACR,IAAI;YACF,CAAC,CAAC,UAAU,IAAI,CAAC,UAAU,KAAK;YAChC,CAAC,CAAC,cAAc,IAAI,CAAC,UAAU,KAAK,CACvC,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,EAAE,EAAE,6CAA6C,CAAC,CAAC;IAE9D,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAuB;IAClD,MAAM,EAAE,qBAAqB;IAC7B,OAAO,EAAE,sBAAsB;IAC/B,SAAS,EAAE,wBAAwB;IACnC,MAAM,EAAE,qBAAqB;CAC9B,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,KAAa,EAAE,KAAK,GAAG,EAAE;IAC3D,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC;IACzC,MAAM,KAAK,GAAG,KAAK,GAAG,MAAM,CAAC;IAC7B,OAAO,IAAI,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC;AACvD,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAChC,KAAa,EACb,UAAU,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE;IAErC,IAAI,KAAK,IAAI,UAAU,CAAC,IAAI,EAAE,CAAC;QAC7B,OAAO,+CAA+C,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,kBAAkB,CAAC;IACnG,CAAC;SAAM,IAAI,KAAK,IAAI,UAAU,CAAC,IAAI,EAAE,CAAC;QACpC,OAAO,+CAA+C,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,aAAa,CAAC;IAC9F,CAAC;SAAM,CAAC;QACN,OAAO,+CAA+C,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC;IAC3F,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,uBAAuB,CACrC,QAA2B,EAC3B,OAA0B,EAC1B,WAAwB,IAAI;IAE5B,MAAM,IAAI,GAAG,QAAQ,KAAK,IAAI,CAAC;IAC/B,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC;IACnD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,KAAK,CAAC,IAAI,CACR,IAAI;QACF,CAAC,CAAC,2BAA2B;QAC7B,CAAC,CAAC,wCAAwC,CAC7C,CAAC;IACF,KAAK,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;IAEnD,MAAM,UAAU,GAAG,CAAC,IAAY,EAAE,EAAE;QAClC,IAAI,IAAI,GAAG,CAAC;YAAE,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC;QAC9C,IAAI,IAAI,GAAG,CAAC;YAAE,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC;QAC7C,OAAO,UAAU,CAAC;IACpB,CAAC,CAAC;IAEF,KAAK,CAAC,IAAI,CACR,KAAK,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,WAAW,MAAM,CAAC,QAAQ,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,UAAU,CAAC,CAAC,OAAO,CAAC,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG,GAAG,CAAC,KAAK,CACxL,CAAC;IACF,KAAK,CAAC,IAAI,CACR,KAAK,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,MAAM,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,UAAU,CAAC,OAAO,CAAC,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAC/J,CAAC;IAEF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB,CAClC,OAA2B,EAC3B,OAAO,GAAG,EAAE;IAEZ,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,WAAW;IACX,MAAM,YAAY,GAAG,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAChD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CACrB,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,eAAe,GAAG,OAAO,CAAC,EAC5C,OAAO,GAAG,CAAC,CACZ,CAAC;QACF,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;IACzB,CAAC;IAED,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC,CAAC;IAEtC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClB,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;IAClC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,KAAK,IAAI,CAAC,GAAG,OAAO,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC;QACxH,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QACjE,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;QAC9B,KAAK,CAAC,IAAI,CAAC,GAAG,KAAK,MAAM,GAAG,KAAK,KAAK,GAAG,CAAC,CAAC;IAC7C,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAElB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
@@ -0,0 +1,408 @@
1
+ /**
2
+ * Evaluation Types
3
+ *
4
+ * @requirement REQ-EVAL-001, REQ-EVAL-002, REQ-EVAL-003, REQ-EVAL-004, REQ-EVAL-005
5
+ * @design DES-KATASHIRO-003-EVAL §3
6
+ */
7
+ /**
8
+ * 評価入力
9
+ */
10
+ export interface EvaluationInput {
11
+ /** 評価対象出力 */
12
+ output: string;
13
+ /** 入力(コンテキスト) */
14
+ input?: string;
15
+ /** 期待出力(比較用) */
16
+ expected?: string;
17
+ /** 追加コンテキスト */
18
+ context?: Record<string, unknown>;
19
+ }
20
+ /**
21
+ * 評価結果
22
+ */
23
+ export interface EvaluationResult {
24
+ /** 評価器名 */
25
+ evaluator: string;
26
+ /** スコア */
27
+ score: number;
28
+ /** 正規化スコア(0-1) */
29
+ normalizedScore: number;
30
+ /** 合否 */
31
+ passed?: boolean;
32
+ /** 根拠 */
33
+ reasoning: string;
34
+ /** 生スコア(複数回試行時) */
35
+ rawScores?: number[];
36
+ /** メタデータ */
37
+ metadata?: EvaluationMetadata;
38
+ }
39
+ /**
40
+ * 評価メタデータ
41
+ */
42
+ export interface EvaluationMetadata {
43
+ /** 最適長(LengthEvaluator) */
44
+ optimalLength?: number;
45
+ /** 発見キーワード(KeywordEvaluator) */
46
+ foundKeywords?: string[];
47
+ /** 欠落キーワード(KeywordEvaluator) */
48
+ missingKeywords?: string[];
49
+ /** 欠落フィールド(JsonStructureEvaluator) */
50
+ missingFields?: string[];
51
+ /** コンポーネントスコア(CompositeEvaluator) */
52
+ componentScores?: Array<{
53
+ evaluator: string;
54
+ score: number;
55
+ }>;
56
+ /** その他のメタデータ */
57
+ [key: string]: unknown;
58
+ }
59
+ /**
60
+ * 評価器インターフェース
61
+ */
62
+ export interface Evaluator {
63
+ /** 評価器名 */
64
+ readonly name: string;
65
+ /** 評価実行 */
66
+ evaluate(input: EvaluationInput): Promise<EvaluationResult>;
67
+ }
68
+ /**
69
+ * 評価スケール
70
+ */
71
+ export interface EvaluationScale {
72
+ min: number;
73
+ max: number;
74
+ descriptions?: Record<number, string>;
75
+ }
76
+ /**
77
+ * データセット
78
+ */
79
+ export interface Dataset {
80
+ /** データセットID */
81
+ id: string;
82
+ /** 名前 */
83
+ name: string;
84
+ /** 説明 */
85
+ description?: string;
86
+ /** データ件数 */
87
+ size: number;
88
+ /** タグ */
89
+ tags?: string[];
90
+ /** 作成日時 */
91
+ createdAt: string;
92
+ /** 更新日時 */
93
+ updatedAt: string;
94
+ }
95
+ /**
96
+ * データセットアイテム
97
+ */
98
+ export interface DatasetItem {
99
+ /** アイテムID */
100
+ id: string;
101
+ /** 入力 */
102
+ input: string;
103
+ /** 期待出力 */
104
+ expected?: string;
105
+ /** メタデータ */
106
+ metadata?: Record<string, unknown>;
107
+ }
108
+ /**
109
+ * 実験設定
110
+ */
111
+ export interface ExperimentConfig {
112
+ /** 実験名 */
113
+ name: string;
114
+ /** 説明 */
115
+ description?: string;
116
+ /** データセットID */
117
+ datasetId: string;
118
+ /** 使用評価器 */
119
+ evaluators: string[];
120
+ /** タグ */
121
+ tags?: string[];
122
+ }
123
+ /**
124
+ * 実験結果
125
+ */
126
+ export interface ExperimentResult {
127
+ /** 実験ID */
128
+ id: string;
129
+ /** 実験名 */
130
+ name: string;
131
+ /** 実行日時 */
132
+ timestamp: string;
133
+ /** 使用データセット */
134
+ datasetId: string;
135
+ /** 評価結果サマリー */
136
+ summary: ExperimentSummary;
137
+ /** 詳細結果 */
138
+ details: ExperimentDetailResult[];
139
+ /** 実行時間(ミリ秒) */
140
+ durationMs: number;
141
+ /** メタデータ */
142
+ metadata?: Record<string, unknown>;
143
+ }
144
+ /**
145
+ * 実験結果サマリー
146
+ */
147
+ export interface ExperimentSummary {
148
+ /** 評価器ごとの平均スコア */
149
+ averageScores: Record<string, number>;
150
+ /** 評価器ごとの標準偏差 */
151
+ stdDevs: Record<string, number>;
152
+ /** 全体の平均スコア */
153
+ overallScore: number;
154
+ /** 評価件数 */
155
+ totalItems: number;
156
+ /** 成功件数 */
157
+ successCount: number;
158
+ /** エラー件数 */
159
+ errorCount: number;
160
+ }
161
+ /**
162
+ * 実験詳細結果
163
+ */
164
+ export interface ExperimentDetailResult {
165
+ /** アイテムID */
166
+ itemId: string;
167
+ /** 入力 */
168
+ input: string;
169
+ /** 出力 */
170
+ output: string;
171
+ /** 期待出力 */
172
+ expected?: string;
173
+ /** 評価結果 */
174
+ evaluations: EvaluationResult[];
175
+ /** 成功フラグ */
176
+ success: boolean;
177
+ /** エラーメッセージ */
178
+ error?: string;
179
+ }
180
+ /**
181
+ * A/Bテスト設定
182
+ */
183
+ export interface ABTestConfig {
184
+ /** テスト名 */
185
+ name: string;
186
+ /** 説明 */
187
+ description?: string;
188
+ /** バリアントA設定 */
189
+ variantA: VariantConfig;
190
+ /** バリアントB設定 */
191
+ variantB: VariantConfig;
192
+ /** データセットID */
193
+ datasetId: string;
194
+ /** 評価器 */
195
+ evaluators: string[];
196
+ /** 統計的有意水準 */
197
+ significanceLevel?: number;
198
+ }
199
+ /**
200
+ * バリアント設定
201
+ */
202
+ export interface VariantConfig {
203
+ /** バリアント名 */
204
+ name: string;
205
+ /** 生成関数 */
206
+ generator: (input: string) => Promise<string>;
207
+ }
208
+ /**
209
+ * A/Bテスト結果
210
+ */
211
+ export interface ABTestResult {
212
+ /** テストID */
213
+ id: string;
214
+ /** テスト名 */
215
+ name: string;
216
+ /** 実行日時 */
217
+ timestamp: string;
218
+ /** バリアントA結果 */
219
+ variantA: VariantResult;
220
+ /** バリアントB結果 */
221
+ variantB: VariantResult;
222
+ /** 統計分析結果 */
223
+ analysis: ABAnalysis;
224
+ /** 勝者 */
225
+ winner: 'A' | 'B' | 'tie';
226
+ /** 結論 */
227
+ conclusion: string;
228
+ }
229
+ /**
230
+ * バリアント結果
231
+ */
232
+ export interface VariantResult {
233
+ /** バリアント名 */
234
+ name: string;
235
+ /** 評価器ごとの平均スコア */
236
+ averageScores: Record<string, number>;
237
+ /** 評価器ごとの標準偏差 */
238
+ stdDevs: Record<string, number>;
239
+ /** サンプル数 */
240
+ sampleSize: number;
241
+ }
242
+ /**
243
+ * A/Bテストバリアント型
244
+ */
245
+ export type ABTestVariant = 'A' | 'B';
246
+ /**
247
+ * A/B分析結果
248
+ */
249
+ export interface ABAnalysis {
250
+ /** 評価器ごとのt検定結果 */
251
+ tTests: Record<string, {
252
+ tStatistic: number;
253
+ pValue: number;
254
+ significant: boolean;
255
+ effectSize: number;
256
+ }>;
257
+ /** 全体の有意性 */
258
+ overallSignificant: boolean;
259
+ /** 信頼区間 */
260
+ confidenceIntervals: Record<string, {
261
+ lower: number;
262
+ upper: number;
263
+ }>;
264
+ }
265
+ /**
266
+ * ベンチマーク設定
267
+ */
268
+ export interface BenchmarkConfig {
269
+ /** ベンチマーク名 */
270
+ name: string;
271
+ /** 反復回数 */
272
+ iterations?: number;
273
+ /** ウォームアップ回数 */
274
+ warmupIterations?: number;
275
+ /** タイムアウト(ミリ秒) */
276
+ timeout?: number;
277
+ }
278
+ /**
279
+ * ベンチマーク結果
280
+ */
281
+ export interface BenchmarkResult {
282
+ /** ベンチマーク名 */
283
+ name: string;
284
+ /** 平均実行時間(ミリ秒) */
285
+ meanMs: number;
286
+ /** 標準偏差 */
287
+ stdDevMs: number;
288
+ /** 最小実行時間 */
289
+ minMs: number;
290
+ /** 最大実行時間 */
291
+ maxMs: number;
292
+ /** パーセンタイル */
293
+ percentiles: {
294
+ p50: number;
295
+ p90: number;
296
+ p99: number;
297
+ };
298
+ /** 反復回数 */
299
+ iterations: number;
300
+ /** 実行日時 */
301
+ timestamp: string;
302
+ }
303
+ /**
304
+ * 評価基準定義
305
+ * @requirement REQ-EVAL-101
306
+ */
307
+ export interface EvaluationCriteria {
308
+ /** 基準名 */
309
+ name: string;
310
+ /** 説明 */
311
+ description: string;
312
+ /** スコア範囲 (デフォルト: 1-5) */
313
+ scale?: {
314
+ min: number;
315
+ max: number;
316
+ };
317
+ /** 各スコアの説明(オプション) */
318
+ rubric?: Record<number, string>;
319
+ /** 重み(複合評価時) */
320
+ weight?: number;
321
+ }
322
+ /**
323
+ * LLMJudge評価器設定
324
+ * @requirement REQ-EVAL-101
325
+ */
326
+ export interface LLMJudgeEvaluatorConfig {
327
+ /** 評価器名 */
328
+ name?: string;
329
+ /** 評価基準リスト */
330
+ criteria: EvaluationCriteria[];
331
+ /** 評価スケール(デフォルト: 1-5) */
332
+ scale?: {
333
+ min: number;
334
+ max: number;
335
+ };
336
+ /** システムプロンプト(カスタマイズ用) */
337
+ systemPrompt?: string;
338
+ /** 評価プロンプトテンプレート */
339
+ evaluationPromptTemplate?: string;
340
+ /** リトライ回数(パース失敗時) */
341
+ maxRetries?: number;
342
+ /** 温度パラメータ */
343
+ temperature?: number;
344
+ /** 評価結果のJSON出力を強制 */
345
+ forceJsonOutput?: boolean;
346
+ }
347
+ /**
348
+ * LLMJudge評価結果
349
+ * @requirement REQ-EVAL-101
350
+ */
351
+ export interface LLMJudgeResult extends EvaluationResult {
352
+ /** 各基準ごとのスコア */
353
+ criteriaScores: Record<string, {
354
+ score: number;
355
+ reasoning: string;
356
+ }>;
357
+ /** LLMの生の出力 */
358
+ rawLLMOutput?: string;
359
+ /** 使用トークン数 */
360
+ tokenUsage?: {
361
+ promptTokens: number;
362
+ completionTokens: number;
363
+ totalTokens: number;
364
+ };
365
+ }
366
+ /**
367
+ * RAG評価入力
368
+ * @requirement REQ-EVAL-102
369
+ */
370
+ export interface RAGEvaluationInput extends EvaluationInput {
371
+ /** ユーザークエリ(質問) */
372
+ query: string;
373
+ /** 取得されたコンテキスト */
374
+ retrievedContexts: string[];
375
+ /** 生成された回答 */
376
+ generatedAnswer: string;
377
+ /** グラウンドトゥルース(オプション) */
378
+ groundTruth?: string;
379
+ }
380
+ /**
381
+ * RAGAS評価結果
382
+ * @requirement REQ-EVAL-102
383
+ */
384
+ export interface RAGASEvaluationResult extends EvaluationResult {
385
+ /** 各メトリクスのスコア */
386
+ metrics: {
387
+ /** Faithfulness: 回答がコンテキストに基づいているか */
388
+ faithfulness?: number;
389
+ /** Context Relevancy: 取得コンテキストの関連性 */
390
+ contextRelevancy?: number;
391
+ /** Answer Relevancy: 回答がクエリに関連しているか */
392
+ answerRelevancy?: number;
393
+ /** Context Recall: コンテキストがグラウンドトゥルースをカバーしているか */
394
+ contextRecall?: number;
395
+ /** Context Precision: 関連コンテキストの精度 */
396
+ contextPrecision?: number;
397
+ };
398
+ /** 詳細な分析 */
399
+ analysis?: {
400
+ /** 回答から抽出されたステートメント */
401
+ statements?: string[];
402
+ /** コンテキストでサポートされているステートメント */
403
+ supportedStatements?: string[];
404
+ /** サポートされていないステートメント */
405
+ unsupportedStatements?: string[];
406
+ };
407
+ }
408
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,aAAa;IACb,MAAM,EAAE,MAAM,CAAC;IACf,iBAAiB;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,gBAAgB;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,eAAe;IACf,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACnC;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,WAAW;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU;IACV,KAAK,EAAE,MAAM,CAAC;IACd,kBAAkB;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,SAAS;IACT,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,SAAS;IACT,SAAS,EAAE,MAAM,CAAC;IAClB,mBAAmB;IACnB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,YAAY;IACZ,QAAQ,CAAC,EAAE,kBAAkB,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,2BAA2B;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gCAAgC;IAChC,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,gCAAgC;IAChC,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,sCAAsC;IACtC,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,qCAAqC;IACrC,eAAe,CAAC,EAAE,KAAK,CAAC;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC9D,gBAAgB;IAChB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,WAAW;IACX,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,WAAW;IACX,QAAQ,CAAC,KAAK,EAAE,eAAe,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAAC;CAC7D;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACvC;AAED;;GAEG;AACH,MAAM,WAAW,OAAO;IACtB,eAAe;IACf,EAAE,EAAE,MAAM,CAAC;IACX,SAAS;IACT,IAAI,EAAE,MAAM,CAAC;IACb,SAAS;IACT,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,SAAS;IACT,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,WAAW;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW;IACX,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,aAAa;IACb,EAAE,EAAE,MAAM,CAAC;IACX,SAAS;IACT,KAAK,EAAE,MAAM,CAAC;IACd,WAAW;IACX,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY;IACZ,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,UAAU;IACV,IAAI,EAAE,MAAM,CAAC;IACb,SAAS;IACT,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,eAAe;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY;IACZ,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,SAAS;IACT,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,WAAW;IACX,EAAE,EAAE,MAAM,CAAC;IACX,UAAU;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe;IACf,OAAO,EAAE,iBAAiB,CAAC;IAC3B,WAAW;IACX,OAAO,EAAE,sBAAsB,EAAE,CAAC;IAClC,gBAAgB;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY;IACZ,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,kBAAkB;IAClB,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACtC,iBAAiB;IACjB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,eAAe;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW;IACX,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW;IACX,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY;IACZ,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,aAAa;IACb,MAAM,EAAE,MAAM,CAAC;IACf,SAAS;IACT,KAAK,EAAE,MAAM,CAAC;IACd,SAAS;IACT,MAAM,EAAE,MAAM,CAAC;IACf,WAAW;IACX,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW;IACX,WAAW,EAAE,gBAAgB,EAAE,CAAC;IAChC,YAAY;IACZ,OAAO,EAAE,OAAO,CAAC;IACjB,eAAe;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,WAAW;IACX,IAAI,EAAE,MAAM,CAAC;IACb,SAAS;IACT,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,eAAe;IACf,QAAQ,EAAE,aAAa,CAAC;IACxB,eAAe;IACf,QAAQ,EAAE,aAAa,CAAC;IACxB,eAAe;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU;IACV,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,cAAc;IACd,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,aAAa;IACb,IAAI,EAAE,MAAM,CAAC;IACb,WAAW;IACX,SAAS,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CAC/C;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,YAAY;IACZ,EAAE,EAAE,MAAM,CAAC;IACX,WAAW;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe;IACf,QAAQ,EAAE,aAAa,CAAC;IACxB,eAAe;IACf,QAAQ,EAAE,aAAa,CAAC;IACxB,aAAa;IACb,QAAQ,EAAE,UAAU,CAAC;IACrB,SAAS;IACT,MAAM,EAAE,GAAG,GAAG,GAAG,GAAG,KAAK,CAAC;IAC1B,SAAS;IACT,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,aAAa;IACb,IAAI,EAAE,MAAM,CAAC;IACb,kBAAkB;IAClB,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACtC,iBAAiB;IACjB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,YAAY;IACZ,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,MAAM,aAAa,GAAG,GAAG,GAAG,GAAG,CAAC;AAEtC;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,kBAAkB;IAClB,MAAM,EAAE,MAAM,CACZ,MAAM,EACN;QACE,UAAU,EAAE,MAAM,CAAC;QACnB,MAAM,EAAE,MAAM,CAAC;QACf,WAAW,EAAE,OAAO,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;KACpB,CACF,CAAC;IACF,aAAa;IACb,kBAAkB,EAAE,OAAO,CAAC;IAC5B,WAAW;IACX,mBAAmB,EAAE,MAAM,CACzB,MAAM,EACN;QACE,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACf,CACF,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,cAAc;IACd,IAAI,EAAE,MAAM,CAAC;IACb,WAAW;IACX,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gBAAgB;IAChB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,kBAAkB;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,cAAc;IACd,IAAI,EAAE,MAAM,CAAC;IACb,kBAAkB;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa;IACb,KAAK,EAAE,MAAM,CAAC;IACd,aAAa;IACb,KAAK,EAAE,MAAM,CAAC;IACd,cAAc;IACd,WAAW,EAAE;QACX,GAAG,EAAE,MAAM,CAAC;QACZ,GAAG,EAAE,MAAM,CAAC;QACZ,GAAG,EAAE,MAAM,CAAC;KACb,CAAC;IACF,WAAW;IACX,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW;IACX,SAAS,EAAE,MAAM,CAAC;CACnB;AAMD;;;GAGG;AACH,MAAM,WAAW,kBAAkB;IACjC,UAAU;IACV,IAAI,EAAE,MAAM,CAAC;IACb,SAAS;IACT,WAAW,EAAE,MAAM,CAAC;IACpB,yBAAyB;IACzB,KAAK,CAAC,EAAE;QACN,GAAG,EAAE,MAAM,CAAC;QACZ,GAAG,EAAE,MAAM,CAAC;KACb,CAAC;IACF,qBAAqB;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,gBAAgB;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;;GAGG;AACH,MAAM,WAAW,uBAAuB;IACtC,WAAW;IACX,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,cAAc;IACd,QAAQ,EAAE,kBAAkB,EAAE,CAAC;IAC/B,yBAAyB;IACzB,KAAK,CAAC,EAAE;QACN,GAAG,EAAE,MAAM,CAAC;QACZ,GAAG,EAAE,MAAM,CAAC;KACb,CAAC;IACF,yBAAyB;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,oBAAoB;IACpB,wBAAwB,CAAC,EAAE,MAAM,CAAC;IAClC,qBAAqB;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,cAAc;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,qBAAqB;IACrB,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B;AAED;;;GAGG;AACH,MAAM,WAAW,cAAe,SAAQ,gBAAgB;IACtD,gBAAgB;IAChB,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE;QAC7B,KAAK,EAAE,MAAM,CAAC;QACd,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC,CAAC;IACH,eAAe;IACf,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,cAAc;IACd,UAAU,CAAC,EAAE;QACX,YAAY,EAAE,MAAM,CAAC;QACrB,gBAAgB,EAAE,MAAM,CAAC;QACzB,WAAW,EAAE,MAAM,CAAC;KACrB,CAAC;CACH;AAMD;;;GAGG;AACH,MAAM,WAAW,kBAAmB,SAAQ,eAAe;IACzD,kBAAkB;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,kBAAkB;IAClB,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,cAAc;IACd,eAAe,EAAE,MAAM,CAAC;IACxB,wBAAwB;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;GAGG;AACH,MAAM,WAAW,qBAAsB,SAAQ,gBAAgB;IAC7D,iBAAiB;IACjB,OAAO,EAAE;QACP,sCAAsC;QACtC,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,sCAAsC;QACtC,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,uCAAuC;QACvC,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,iDAAiD;QACjD,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,qCAAqC;QACrC,gBAAgB,CAAC,EAAE,MAAM,CAAC;KAC3B,CAAC;IACF,YAAY;IACZ,QAAQ,CAAC,EAAE;QACT,uBAAuB;QACvB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;QACtB,8BAA8B;QAC9B,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;QAC/B,wBAAwB;QACxB,qBAAqB,CAAC,EAAE,MAAM,EAAE,CAAC;KAClC,CAAC;CACH"}
package/dist/types.js ADDED
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Evaluation Types
3
+ *
4
+ * @requirement REQ-EVAL-001, REQ-EVAL-002, REQ-EVAL-003, REQ-EVAL-004, REQ-EVAL-005
5
+ * @design DES-KATASHIRO-003-EVAL §3
6
+ */
7
+ export {};
8
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG"}
package/package.json ADDED
@@ -0,0 +1,47 @@
1
+ {
2
+ "name": "@nahisaho/katashiro-evaluation",
3
+ "version": "2.0.0",
4
+ "description": "KATASHIRO Evaluation - 品質評価・A/Bテスト・実験管理",
5
+ "type": "module",
6
+ "main": "./dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.js",
12
+ "default": "./dist/index.js"
13
+ }
14
+ },
15
+ "files": [
16
+ "dist"
17
+ ],
18
+ "keywords": [
19
+ "katashiro",
20
+ "evaluation",
21
+ "testing",
22
+ "benchmark",
23
+ "llm-as-judge"
24
+ ],
25
+ "author": "nahisaho",
26
+ "license": "MIT",
27
+ "repository": {
28
+ "type": "git",
29
+ "url": "https://github.com/nahisaho/katashiro.git",
30
+ "directory": "packages/evaluation"
31
+ },
32
+ "publishConfig": {
33
+ "access": "public"
34
+ },
35
+ "dependencies": {
36
+ "@nahisaho/katashiro-core": "2.0.0"
37
+ },
38
+ "devDependencies": {
39
+ "typescript": "^5.3.0"
40
+ },
41
+ "scripts": {
42
+ "build": "tsc",
43
+ "clean": "rm -rf dist",
44
+ "test": "vitest run",
45
+ "test:watch": "vitest"
46
+ }
47
+ }