rag-eval-node-ts 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +204 -0
- package/dist/__tests__/evaluate.test.d.ts +2 -0
- package/dist/__tests__/evaluate.test.d.ts.map +1 -0
- package/dist/__tests__/evaluate.test.js +130 -0
- package/dist/__tests__/evaluate.test.js.map +1 -0
- package/dist/__tests__/evaluator.test.d.ts +2 -0
- package/dist/__tests__/evaluator.test.d.ts.map +1 -0
- package/dist/__tests__/evaluator.test.js +92 -0
- package/dist/__tests__/evaluator.test.js.map +1 -0
- package/dist/__tests__/heuristic/ngrams.test.d.ts +2 -0
- package/dist/__tests__/heuristic/ngrams.test.d.ts.map +1 -0
- package/dist/__tests__/heuristic/ngrams.test.js +89 -0
- package/dist/__tests__/heuristic/ngrams.test.js.map +1 -0
- package/dist/__tests__/heuristic/tfidf.test.d.ts +2 -0
- package/dist/__tests__/heuristic/tfidf.test.d.ts.map +1 -0
- package/dist/__tests__/heuristic/tfidf.test.js +57 -0
- package/dist/__tests__/heuristic/tfidf.test.js.map +1 -0
- package/dist/__tests__/heuristic/token-f1.test.d.ts +2 -0
- package/dist/__tests__/heuristic/token-f1.test.d.ts.map +1 -0
- package/dist/__tests__/heuristic/token-f1.test.js +40 -0
- package/dist/__tests__/heuristic/token-f1.test.js.map +1 -0
- package/dist/__tests__/metrics/faithfulness.test.d.ts +2 -0
- package/dist/__tests__/metrics/faithfulness.test.d.ts.map +1 -0
- package/dist/__tests__/metrics/faithfulness.test.js +66 -0
- package/dist/__tests__/metrics/faithfulness.test.js.map +1 -0
- package/dist/__tests__/types.test.d.ts +2 -0
- package/dist/__tests__/types.test.d.ts.map +1 -0
- package/dist/__tests__/types.test.js +531 -0
- package/dist/__tests__/types.test.js.map +1 -0
- package/dist/evaluate.d.ts +14 -0
- package/dist/evaluate.d.ts.map +1 -0
- package/dist/evaluate.js +208 -0
- package/dist/evaluate.js.map +1 -0
- package/dist/evaluator.d.ts +10 -0
- package/dist/evaluator.d.ts.map +1 -0
- package/dist/evaluator.js +39 -0
- package/dist/evaluator.js.map +1 -0
- package/dist/heuristic/ngrams.d.ts +22 -0
- package/dist/heuristic/ngrams.d.ts.map +1 -0
- package/dist/heuristic/ngrams.js +70 -0
- package/dist/heuristic/ngrams.js.map +1 -0
- package/dist/heuristic/sentences.d.ts +13 -0
- package/dist/heuristic/sentences.d.ts.map +1 -0
- package/dist/heuristic/sentences.js +23 -0
- package/dist/heuristic/sentences.js.map +1 -0
- package/dist/heuristic/tfidf.d.ts +21 -0
- package/dist/heuristic/tfidf.d.ts.map +1 -0
- package/dist/heuristic/tfidf.js +87 -0
- package/dist/heuristic/tfidf.js.map +1 -0
- package/dist/heuristic/token-f1.d.ts +12 -0
- package/dist/heuristic/token-f1.d.ts.map +1 -0
- package/dist/heuristic/token-f1.js +41 -0
- package/dist/heuristic/token-f1.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +37 -0
- package/dist/index.js.map +1 -0
- package/dist/metrics/answer-correctness.d.ts +7 -0
- package/dist/metrics/answer-correctness.d.ts.map +1 -0
- package/dist/metrics/answer-correctness.js +51 -0
- package/dist/metrics/answer-correctness.js.map +1 -0
- package/dist/metrics/answer-relevance.d.ts +6 -0
- package/dist/metrics/answer-relevance.d.ts.map +1 -0
- package/dist/metrics/answer-relevance.js +37 -0
- package/dist/metrics/answer-relevance.js.map +1 -0
- package/dist/metrics/context-precision.d.ts +6 -0
- package/dist/metrics/context-precision.d.ts.map +1 -0
- package/dist/metrics/context-precision.js +57 -0
- package/dist/metrics/context-precision.js.map +1 -0
- package/dist/metrics/context-recall.d.ts +7 -0
- package/dist/metrics/context-recall.d.ts.map +1 -0
- package/dist/metrics/context-recall.js +66 -0
- package/dist/metrics/context-recall.js.map +1 -0
- package/dist/metrics/context-relevance.d.ts +6 -0
- package/dist/metrics/context-relevance.d.ts.map +1 -0
- package/dist/metrics/context-relevance.js +48 -0
- package/dist/metrics/context-relevance.js.map +1 -0
- package/dist/metrics/faithfulness.d.ts +6 -0
- package/dist/metrics/faithfulness.d.ts.map +1 -0
- package/dist/metrics/faithfulness.js +64 -0
- package/dist/metrics/faithfulness.js.map +1 -0
- package/dist/metrics/hallucination-rate.d.ts +7 -0
- package/dist/metrics/hallucination-rate.d.ts.map +1 -0
- package/dist/metrics/hallucination-rate.js +65 -0
- package/dist/metrics/hallucination-rate.js.map +1 -0
- package/dist/metrics/index.d.ts +14 -0
- package/dist/metrics/index.d.ts.map +1 -0
- package/dist/metrics/index.js +40 -0
- package/dist/metrics/index.js.map +1 -0
- package/dist/types.d.ts +169 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +4 -0
- package/dist/types.js.map +1 -0
- package/package.json +53 -0
package/dist/evaluate.js
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Core evaluate() and evaluateBatch() functions.
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.evaluate = evaluate;
|
|
7
|
+
exports.evaluateBatch = evaluateBatch;
|
|
8
|
+
const index_1 = require("./metrics/index");
|
|
9
|
+
const ALL_METRICS = [
|
|
10
|
+
'faithfulness',
|
|
11
|
+
'answerRelevance',
|
|
12
|
+
'contextPrecision',
|
|
13
|
+
'contextRecall',
|
|
14
|
+
'contextRelevance',
|
|
15
|
+
'answerCorrectness',
|
|
16
|
+
'hallucinationRate',
|
|
17
|
+
];
|
|
18
|
+
const DEFAULT_COMPOSITE_THRESHOLD = 0.6;
|
|
19
|
+
const DEFAULT_CONCURRENCY = 4;
|
|
20
|
+
const DEFAULT_REGRESSION_THRESHOLD = 0.05;
|
|
21
|
+
/**
|
|
22
|
+
* Evaluate a single EvalSample against the requested metrics.
|
|
23
|
+
* Only heuristic mode is implemented; LLM/hybrid options are accepted but ignored.
|
|
24
|
+
*/
|
|
25
|
+
async function evaluate(sample, metrics, options) {
|
|
26
|
+
const start = Date.now();
|
|
27
|
+
const activeMetrics = metrics ?? ALL_METRICS;
|
|
28
|
+
const heuristicOptions = options?.heuristic;
|
|
29
|
+
const thresholds = options?.thresholds ?? {};
|
|
30
|
+
const compositeThreshold = options?.compositeThreshold ?? DEFAULT_COMPOSITE_THRESHOLD;
|
|
31
|
+
const compositeWeights = options?.compositeWeights ?? {};
|
|
32
|
+
const metricResults = {};
|
|
33
|
+
for (const metricId of activeMetrics) {
|
|
34
|
+
const result = await (0, index_1.computeMetric)(metricId, sample, heuristicOptions);
|
|
35
|
+
// Apply per-metric threshold overrides
|
|
36
|
+
const overrideThreshold = thresholds[metricId];
|
|
37
|
+
if (overrideThreshold !== undefined) {
|
|
38
|
+
const adjustedPassed = result.score !== null ? result.score >= overrideThreshold : null;
|
|
39
|
+
metricResults[metricId] = {
|
|
40
|
+
...result,
|
|
41
|
+
threshold: overrideThreshold,
|
|
42
|
+
passed: adjustedPassed,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
metricResults[metricId] = result;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
// Compute composite score: weighted average of non-null scores
|
|
50
|
+
let weightedSum = 0;
|
|
51
|
+
let totalWeight = 0;
|
|
52
|
+
let allPassed = true;
|
|
53
|
+
for (const metricId of activeMetrics) {
|
|
54
|
+
const r = metricResults[metricId];
|
|
55
|
+
if (!r)
|
|
56
|
+
continue;
|
|
57
|
+
if (r.score !== null) {
|
|
58
|
+
const w = compositeWeights[metricId] ?? 1;
|
|
59
|
+
weightedSum += r.score * w;
|
|
60
|
+
totalWeight += w;
|
|
61
|
+
}
|
|
62
|
+
if (r.passed === false)
|
|
63
|
+
allPassed = false;
|
|
64
|
+
}
|
|
65
|
+
const compositeScore = totalWeight > 0 ? weightedSum / totalWeight : null;
|
|
66
|
+
const compositePassed = compositeScore !== null ? compositeScore >= compositeThreshold : false;
|
|
67
|
+
const passed = allPassed && compositePassed;
|
|
68
|
+
const cost = {
|
|
69
|
+
llmCalls: 0,
|
|
70
|
+
estimatedInputTokens: 0,
|
|
71
|
+
estimatedOutputTokens: 0,
|
|
72
|
+
parseFailures: 0,
|
|
73
|
+
};
|
|
74
|
+
return {
|
|
75
|
+
sample,
|
|
76
|
+
metrics: metricResults,
|
|
77
|
+
compositeScore,
|
|
78
|
+
passed,
|
|
79
|
+
cost,
|
|
80
|
+
durationMs: Date.now() - start,
|
|
81
|
+
evaluatedAt: new Date().toISOString(),
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
function computeAggregate(metricId, results, _threshold) {
|
|
85
|
+
const scores = [];
|
|
86
|
+
let nullCount = 0;
|
|
87
|
+
let passCount = 0;
|
|
88
|
+
for (const r of results) {
|
|
89
|
+
const mr = r.metrics[metricId];
|
|
90
|
+
if (!mr)
|
|
91
|
+
continue;
|
|
92
|
+
if (mr.score === null) {
|
|
93
|
+
nullCount++;
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
scores.push(mr.score);
|
|
97
|
+
}
|
|
98
|
+
if (mr.passed === true)
|
|
99
|
+
passCount++;
|
|
100
|
+
}
|
|
101
|
+
const total = results.length;
|
|
102
|
+
const nullRate = total > 0 ? nullCount / total : 0;
|
|
103
|
+
const passRate = total > 0 ? passCount / total : 0;
|
|
104
|
+
if (scores.length === 0) {
|
|
105
|
+
return { metricId, mean: 0, median: 0, min: 0, max: 0, stdDev: 0, passRate, nullRate };
|
|
106
|
+
}
|
|
107
|
+
const sorted = [...scores].sort((a, b) => a - b);
|
|
108
|
+
const mean = scores.reduce((a, b) => a + b, 0) / scores.length;
|
|
109
|
+
const median = sorted.length % 2 === 0
|
|
110
|
+
? (sorted[sorted.length / 2 - 1] + sorted[sorted.length / 2]) / 2
|
|
111
|
+
: sorted[Math.floor(sorted.length / 2)];
|
|
112
|
+
const min = sorted[0];
|
|
113
|
+
const max = sorted[sorted.length - 1];
|
|
114
|
+
const variance = scores.reduce((acc, s) => acc + (s - mean) ** 2, 0) / scores.length;
|
|
115
|
+
const stdDev = Math.sqrt(variance);
|
|
116
|
+
return { metricId, mean, median, min, max, stdDev, passRate, nullRate };
|
|
117
|
+
}
|
|
118
|
+
function computeCompositeAggregate(results) {
|
|
119
|
+
const scores = results.map(r => r.compositeScore).filter((s) => s !== null);
|
|
120
|
+
if (scores.length === 0) {
|
|
121
|
+
return { mean: 0, median: 0, min: 0, max: 0, stdDev: 0, passRate: 0 };
|
|
122
|
+
}
|
|
123
|
+
const sorted = [...scores].sort((a, b) => a - b);
|
|
124
|
+
const mean = scores.reduce((a, b) => a + b, 0) / scores.length;
|
|
125
|
+
const median = sorted.length % 2 === 0
|
|
126
|
+
? (sorted[sorted.length / 2 - 1] + sorted[sorted.length / 2]) / 2
|
|
127
|
+
: sorted[Math.floor(sorted.length / 2)];
|
|
128
|
+
const min = sorted[0];
|
|
129
|
+
const max = sorted[sorted.length - 1];
|
|
130
|
+
const variance = scores.reduce((acc, s) => acc + (s - mean) ** 2, 0) / scores.length;
|
|
131
|
+
const stdDev = Math.sqrt(variance);
|
|
132
|
+
const passRate = results.filter(r => r.passed).length / results.length;
|
|
133
|
+
return { mean, median, min, max, stdDev, passRate };
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Evaluate a batch of EvalSamples with concurrency control.
|
|
137
|
+
*/
|
|
138
|
+
async function evaluateBatch(samples, metrics, options) {
|
|
139
|
+
const start = Date.now();
|
|
140
|
+
const concurrency = options?.concurrency ?? DEFAULT_CONCURRENCY;
|
|
141
|
+
const regressionThreshold = options?.regressionThreshold ?? DEFAULT_REGRESSION_THRESHOLD;
|
|
142
|
+
const onProgress = options?.onProgress;
|
|
143
|
+
const results = new Array(samples.length);
|
|
144
|
+
let completed = 0;
|
|
145
|
+
// Process with limited concurrency
|
|
146
|
+
async function processChunk(startIdx) {
|
|
147
|
+
for (let i = startIdx; i < samples.length; i += concurrency) {
|
|
148
|
+
results[i] = await evaluate(samples[i], metrics, options);
|
|
149
|
+
completed++;
|
|
150
|
+
onProgress?.(completed, samples.length);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
const workers = [];
|
|
154
|
+
for (let i = 0; i < Math.min(concurrency, samples.length); i++) {
|
|
155
|
+
workers.push(processChunk(i));
|
|
156
|
+
}
|
|
157
|
+
await Promise.all(workers);
|
|
158
|
+
const activeMetrics = metrics ?? ALL_METRICS;
|
|
159
|
+
// Build aggregates
|
|
160
|
+
const thresholds = options?.thresholds ?? {};
|
|
161
|
+
const aggregates = {};
|
|
162
|
+
for (const metricId of activeMetrics) {
|
|
163
|
+
const defaultThreshold = results[0]?.metrics[metricId]?.threshold ?? 0.7;
|
|
164
|
+
const threshold = thresholds[metricId] ?? defaultThreshold;
|
|
165
|
+
aggregates[metricId] = computeAggregate(metricId, results, threshold);
|
|
166
|
+
}
|
|
167
|
+
const compositeAggregate = computeCompositeAggregate(results);
|
|
168
|
+
// Compute total cost
|
|
169
|
+
const totalCost = {
|
|
170
|
+
llmCalls: 0,
|
|
171
|
+
estimatedInputTokens: 0,
|
|
172
|
+
estimatedOutputTokens: 0,
|
|
173
|
+
parseFailures: 0,
|
|
174
|
+
};
|
|
175
|
+
// Regression detection
|
|
176
|
+
let regressions;
|
|
177
|
+
if (options?.baselineResult) {
|
|
178
|
+
regressions = [];
|
|
179
|
+
const baseline = options.baselineResult;
|
|
180
|
+
for (const metricId of activeMetrics) {
|
|
181
|
+
const baselineAgg = baseline.aggregates[metricId];
|
|
182
|
+
const currentAgg = aggregates[metricId];
|
|
183
|
+
if (!baselineAgg || !currentAgg)
|
|
184
|
+
continue;
|
|
185
|
+
const delta = currentAgg.mean - baselineAgg.mean;
|
|
186
|
+
const regressed = delta < -regressionThreshold;
|
|
187
|
+
regressions.push({
|
|
188
|
+
metricId,
|
|
189
|
+
baselineMean: baselineAgg.mean,
|
|
190
|
+
currentMean: currentAgg.mean,
|
|
191
|
+
delta,
|
|
192
|
+
regressed,
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
const passed = results.every(r => r.passed) && compositeAggregate.passRate >= 0.5;
|
|
197
|
+
return {
|
|
198
|
+
results,
|
|
199
|
+
aggregates: aggregates,
|
|
200
|
+
compositeAggregate,
|
|
201
|
+
passed,
|
|
202
|
+
totalCost,
|
|
203
|
+
totalDurationMs: Date.now() - start,
|
|
204
|
+
regressions,
|
|
205
|
+
evaluatedAt: new Date().toISOString(),
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
//# sourceMappingURL=evaluate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluate.js","sourceRoot":"","sources":["../src/evaluate.ts"],"names":[],"mappings":";AAAA;;GAEG;;AAkCH,4BAkEC;AAiED,sCAkFC;AAzOD,2CAAgD;AAEhD,MAAM,WAAW,GAAe;IAC9B,cAAc;IACd,iBAAiB;IACjB,kBAAkB;IAClB,eAAe;IACf,kBAAkB;IAClB,mBAAmB;IACnB,mBAAmB;CACpB,CAAC;AAEF,MAAM,2BAA2B,GAAG,GAAG,CAAC;AACxC,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAC9B,MAAM,4BAA4B,GAAG,IAAI,CAAC;AAE1C;;;GAGG;AACI,KAAK,UAAU,QAAQ,CAC5B,MAAkB,EAClB,OAAoB,EACpB,OAAyB;IAEzB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,MAAM,aAAa,GAAG,OAAO,IAAI,WAAW,CAAC;IAC7C,MAAM,gBAAgB,GAAG,OAAO,EAAE,SAAS,CAAC;IAC5C,MAAM,UAAU,GAAG,OAAO,EAAE,UAAU,IAAI,EAAE,CAAC;IAC7C,MAAM,kBAAkB,GAAG,OAAO,EAAE,kBAAkB,IAAI,2BAA2B,CAAC;IACtF,MAAM,gBAAgB,GAAG,OAAO,EAAE,gBAAgB,IAAI,EAAE,CAAC;IAEzD,MAAM,aAAa,GAA4C,EAAE,CAAC;IAElE,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;QACrC,MAAM,MAAM,GAAG,MAAM,IAAA,qBAAa,EAAC,QAAQ,EAAE,MAAM,EAAE,gBAAgB,CAAC,CAAC;QACvE,uCAAuC;QACvC,MAAM,iBAAiB,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;QAC/C,IAAI,iBAAiB,KAAK,SAAS,EAAE,CAAC;YACpC,MAAM,cAAc,GAAG,MAAM,CAAC,KAAK,KAAK,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,IAAI,iBAAiB,CAAC,CAAC,CAAC,IAAI,CAAC;YACxF,aAAa,CAAC,QAAQ,CAAC,GAAG;gBACxB,GAAG,MAAM;gBACT,SAAS,EAAE,iBAAiB;gBAC5B,MAAM,EAAE,cAAc;aACvB,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,aAAa,CAAC,QAAQ,CAAC,GAAG,MAAM,CAAC;QACnC,CAAC;IACH,CAAC;IAED,+DAA+D;IAC/D,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,SAAS,GAAG,IAAI,CAAC;IAErB,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;QACrC,MAAM,CAAC,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;QAClC,IAAI,CAAC,CAAC;YAAE,SAAS;QACjB,IAAI,CAAC,CAAC,KAAK,KAAK,IAAI,EAAE,CAAC;YACrB,MAAM,CAAC,GAAG,gBAAgB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;YAC1C,WAAW,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC;YAC3B,WAAW,IAAI,CAAC,CAAC;QACnB,CAAC;QACD,IAAI,CAAC,CAAC,MAAM,KAAK,KAAK;YAAE,SAAS,GAAG,KAAK,CAAC;IAC5C,CAAC;IAED,MAAM,cAAc,GAAG,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC;IAC1E,MAAM,eAAe,GAAG,cAAc,KAAK,IAAI,CAAC,CAAC,CAAC,cAAc,IAAI,kBAAkB,CAAC,CAAC,CAAC,KAAK,CAAC;IAC/F,MAAM,MAAM,GAAG,SAAS,IAAI,eAAe,CAAC;IAE5C,MAAM,IAAI,GAAgB;QACxB,QAAQ,EAAE,CAAC;QACX,oBAAoB,EAAE,CAAC;QACvB,qBAAqB,EAAE,CAAC;QACxB,aAAa,EAAE,CAAC;KACjB,CAAC;IAEF,OAAO;QACL,MAAM;QACN,OAAO,EAAE,aAA+C;QACxD,cAAc;QACd,MAAM;QACN,IAAI;QACJ,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;QAC9B,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACtC,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CAAC,QAAkB,EAAE,OAAqB,EAAE,UAAkB;IACrF,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QAC/B,IAAI,CAAC,EAAE;YAAE,SAAS;QAClB,IAAI,EAAE,CAAC,KAAK,KAAK,IAAI,EAAE,CAAC;YACtB,SAAS,EAAE,CAAC;QACd,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC;QACxB,CAAC;QACD,IAAI,EAAE,CAAC,MAAM,KAAK,IAAI;YAAE,SAAS,EAAE,CAAC;IACtC,CAAC;IAED,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC;IAC7B,MAAM,QAAQ,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACnD,MAAM,QAAQ,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAEnD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC;IACzF,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACjD,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IAC/D,MAAM,MAAM,GACV,MAAM,CAAC,MAAM,GAAG,CAAC,KAAK,CAAC;QACrB,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;QACjE,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;IAC5C,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACtB,MAAM,GAAG,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACtC,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACrF,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAEnC,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC;AAC1E,CAAC;AAED,SAAS,yBAAyB,CAAC,OAAqB;IACtD,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;IAEzF,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;IACxE,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACjD,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IAC/D,MAAM,MAAM,GACV,MAAM,CAAC,MAAM,GAAG,CAAC,KAAK,CAAC;QACrB,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;QACjE,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;IAC5C,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACtB,MAAM,GAAG,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACtC,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACrF,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACnC,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAEvE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;AACtD,CAAC;AAED;;GAEG;AACI,KAAK,UAAU,aAAa,CACjC,OAAqB,EACrB,OAAoB,EACpB,OAA8B;IAE9B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,MAAM,WAAW,GAAG,OAAO,EAAE,WAAW,IAAI,mBAAmB,CAAC;IAChE,MAAM,mBAAmB,GAAG,OAAO,EAAE,mBAAmB,IAAI,4BAA4B,CAAC;IACzF,MAAM,UAAU,GAAG,OAAO,EAAE,UAAU,CAAC;IAEvC,MAAM,OAAO,GAAiB,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IACxD,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,mCAAmC;IACnC,KAAK,UAAU,YAAY,CAAC,QAAgB;QAC1C,KAAK,IAAI,CAAC,GAAG,QAAQ,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC;YAC5D,OAAO,CAAC,CAAC,CAAC,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;YAC1D,SAAS,EAAE,CAAC;YACZ,UAAU,EAAE,CAAC,SAAS,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;QAC1C,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAoB,EAAE,CAAC;IACpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/D,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC;IACD,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAE3B,MAAM,aAAa,GAAG,OAAO,IAAI,WAAW,CAAC;IAE7C,mBAAmB;IACnB,MAAM,UAAU,GAAG,OAAO,EAAE,UAAU,IAAI,EAAE,CAAC;IAC7C,MAAM,UAAU,GAA+C,EAAE,CAAC;IAClE,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;QACrC,MAAM,gBAAgB,GAAG,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAAE,SAAS,IAAI,GAAG,CAAC;QACzE,MAAM,SAAS,GAAG,UAAU,CAAC,QAAQ,CAAC,IAAI,gBAAgB,CAAC;QAC3D,UAAU,CAAC,QAAQ,CAAC,GAAG,gBAAgB,CAAC,QAAQ,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;IACxE,CAAC;IAED,MAAM,kBAAkB,GAAG,yBAAyB,CAAC,OAAO,CAAC,CAAC;IAE9D,qBAAqB;IACrB,MAAM,SAAS,GAAgB;QAC7B,QAAQ,EAAE,CAAC;QACX,oBAAoB,EAAE,CAAC;QACvB,qBAAqB,EAAE,CAAC;QACxB,aAAa,EAAE,CAAC;KACjB,CAAC;IAEF,uBAAuB;IACvB,IAAI,WAA2C,CAAC;IAChD,IAAI,OAAO,EAAE,cAAc,EAAE,CAAC;QAC5B,WAAW,GAAG,EAAE,CAAC;QACjB,MAAM,QAAQ,GAAG,OAAO,CAAC,cAAc,CAAC;QACxC,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;YACrC,MAAM,WAAW,GAAG,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;YAClD,MAAM,UAAU,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;YACxC,IAAI,CAAC,WAAW,IAAI,CAAC,UAAU;gBAAE,SAAS;YAC1C,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC;YACjD,MAAM,SAAS,GAAG,KAAK,GAAG,CAAC,mBAAmB,CAAC;YAC/C,WAAW,CAAC,IAAI,CAAC;gBACf,QAAQ;gBACR,YAAY,EAAE,WAAW,CAAC,IAAI;gBAC9B,WAAW,EAAE,UAAU,CAAC,IAAI;gBAC5B,KAAK;gBACL,SAAS;aACV,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,kBAAkB,CAAC,QAAQ,IAAI,GAAG,CAAC;IAElF,OAAO;QACL,OAAO;QACP,UAAU,EAAE,UAA+C;QAC3D,kBAAkB;QAClB,MAAM;QACN,SAAS;QACT,eAAe,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;QACnC,WAAW;QACX,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACtC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* createEvaluator() factory: returns a pre-configured Evaluator instance.
|
|
3
|
+
*/
|
|
4
|
+
import type { EvaluatorConfig, Evaluator } from './types';
|
|
5
|
+
/**
|
|
6
|
+
* Create a pre-configured Evaluator bound to the given config.
|
|
7
|
+
* Per-call options are merged with config, with per-call options taking precedence.
|
|
8
|
+
*/
|
|
9
|
+
export declare function createEvaluator(config: EvaluatorConfig): Evaluator;
|
|
10
|
+
//# sourceMappingURL=evaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../src/evaluator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EACV,eAAe,EACf,SAAS,EAOV,MAAM,SAAS,CAAC;AAGjB;;;GAGG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,eAAe,GAAG,SAAS,CAqClE"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* createEvaluator() factory: returns a pre-configured Evaluator instance.
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.createEvaluator = createEvaluator;
|
|
7
|
+
const evaluate_1 = require("./evaluate");
|
|
8
|
+
/**
|
|
9
|
+
* Create a pre-configured Evaluator bound to the given config.
|
|
10
|
+
* Per-call options are merged with config, with per-call options taking precedence.
|
|
11
|
+
*/
|
|
12
|
+
function createEvaluator(config) {
|
|
13
|
+
const defaultMetrics = config.metrics;
|
|
14
|
+
function mergeOptions(perCallOptions) {
|
|
15
|
+
if (!perCallOptions)
|
|
16
|
+
return config;
|
|
17
|
+
return {
|
|
18
|
+
...config,
|
|
19
|
+
...perCallOptions,
|
|
20
|
+
thresholds: { ...config.thresholds, ...perCallOptions.thresholds },
|
|
21
|
+
compositeWeights: { ...config.compositeWeights, ...perCallOptions.compositeWeights },
|
|
22
|
+
heuristic: { ...config.heuristic, ...perCallOptions.heuristic },
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
return {
|
|
26
|
+
config,
|
|
27
|
+
async evaluate(sample, metrics, options) {
|
|
28
|
+
const activeMetrics = metrics ?? defaultMetrics;
|
|
29
|
+
const merged = mergeOptions(options);
|
|
30
|
+
return (0, evaluate_1.evaluate)(sample, activeMetrics, merged);
|
|
31
|
+
},
|
|
32
|
+
async evaluateBatch(samples, metrics, options) {
|
|
33
|
+
const activeMetrics = metrics ?? defaultMetrics;
|
|
34
|
+
const merged = mergeOptions(options);
|
|
35
|
+
return (0, evaluate_1.evaluateBatch)(samples, activeMetrics, merged);
|
|
36
|
+
},
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=evaluator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../src/evaluator.ts"],"names":[],"mappings":";AAAA;;GAEG;;AAkBH,0CAqCC;AA3CD,yCAAqD;AAErD;;;GAGG;AACH,SAAgB,eAAe,CAAC,MAAuB;IACrD,MAAM,cAAc,GAAG,MAAM,CAAC,OAAO,CAAC;IAEtC,SAAS,YAAY,CAA4B,cAAkB;QACjE,IAAI,CAAC,cAAc;YAAE,OAAO,MAAsB,CAAC;QACnD,OAAO;YACL,GAAG,MAAM;YACT,GAAG,cAAc;YACjB,UAAU,EAAE,EAAE,GAAG,MAAM,CAAC,UAAU,EAAE,GAAG,cAAc,CAAC,UAAU,EAAE;YAClE,gBAAgB,EAAE,EAAE,GAAG,MAAM,CAAC,gBAAgB,EAAE,GAAG,cAAc,CAAC,gBAAgB,EAAE;YACpF,SAAS,EAAE,EAAE,GAAG,MAAM,CAAC,SAAS,EAAE,GAAG,cAAc,CAAC,SAAS,EAAE;SAChE,CAAC;IACJ,CAAC;IAED,OAAO;QACL,MAAM;QAEN,KAAK,CAAC,QAAQ,CACZ,MAAkB,EAClB,OAAoB,EACpB,OAAyB;YAEzB,MAAM,aAAa,GAAG,OAAO,IAAI,cAAc,CAAC;YAChD,MAAM,MAAM,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;YACrC,OAAO,IAAA,mBAAQ,EAAC,MAAM,EAAE,aAAa,EAAE,MAAM,CAAC,CAAC;QACjD,CAAC;QAED,KAAK,CAAC,aAAa,CACjB,OAAqB,EACrB,OAAoB,EACpB,OAA8B;YAE9B,MAAM,aAAa,GAAG,OAAO,IAAI,cAAc,CAAC;YAChD,MAAM,MAAM,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;YACrC,OAAO,IAAA,wBAAa,EAAC,OAAO,EAAE,aAAa,EAAE,MAAM,CAAC,CAAC;QACvD,CAAC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* N-gram utilities for heuristic evaluation.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Lowercase, split on whitespace and punctuation, filter empty tokens.
|
|
6
|
+
*/
|
|
7
|
+
export declare function tokenize(text: string): string[];
|
|
8
|
+
/**
|
|
9
|
+
* Returns all n-grams from a token list as joined strings ("word1 word2").
|
|
10
|
+
*/
|
|
11
|
+
export declare function getNgrams(tokens: string[], n: number): string[];
|
|
12
|
+
/**
|
|
13
|
+
* Jaccard-style n-gram overlap: |intersection| / |union| using set semantics.
|
|
14
|
+
* Default n=1 (unigram).
|
|
15
|
+
*/
|
|
16
|
+
export declare function ngramOverlap(a: string, b: string, n?: number): number;
|
|
17
|
+
/**
|
|
18
|
+
* Weighted average of ngramOverlap for multiple n-gram sizes.
|
|
19
|
+
* Default: n=[1,2], weights=[0.7, 0.3].
|
|
20
|
+
*/
|
|
21
|
+
export declare function weightedNgramOverlap(a: string, b: string, ngramSizes?: number[], weights?: number[]): number;
|
|
22
|
+
//# sourceMappingURL=ngrams.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ngrams.d.ts","sourceRoot":"","sources":["../../src/heuristic/ngrams.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;GAEG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAK/C;AAED;;GAEG;AACH,wBAAgB,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAO/D;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,SAAI,GAAG,MAAM,CAgBhE;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,CAClC,CAAC,EAAE,MAAM,EACT,CAAC,EAAE,MAAM,EACT,UAAU,GAAE,MAAM,EAAW,EAC7B,OAAO,GAAE,MAAM,EAAe,GAC7B,MAAM,CAcR"}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* N-gram utilities for heuristic evaluation.
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.tokenize = tokenize;
|
|
7
|
+
exports.getNgrams = getNgrams;
|
|
8
|
+
exports.ngramOverlap = ngramOverlap;
|
|
9
|
+
exports.weightedNgramOverlap = weightedNgramOverlap;
|
|
10
|
+
/**
|
|
11
|
+
* Lowercase, split on whitespace and punctuation, filter empty tokens.
|
|
12
|
+
*/
|
|
13
|
+
function tokenize(text) {
|
|
14
|
+
return text
|
|
15
|
+
.toLowerCase()
|
|
16
|
+
.split(/[\s\p{P}]+/u)
|
|
17
|
+
.filter(t => t.length > 0);
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Returns all n-grams from a token list as joined strings ("word1 word2").
|
|
21
|
+
*/
|
|
22
|
+
function getNgrams(tokens, n) {
|
|
23
|
+
if (tokens.length < n)
|
|
24
|
+
return [];
|
|
25
|
+
const result = [];
|
|
26
|
+
for (let i = 0; i <= tokens.length - n; i++) {
|
|
27
|
+
result.push(tokens.slice(i, i + n).join(' '));
|
|
28
|
+
}
|
|
29
|
+
return result;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Jaccard-style n-gram overlap: |intersection| / |union| using set semantics.
|
|
33
|
+
* Default n=1 (unigram).
|
|
34
|
+
*/
|
|
35
|
+
function ngramOverlap(a, b, n = 1) {
|
|
36
|
+
const tokensA = tokenize(a);
|
|
37
|
+
const tokensB = tokenize(b);
|
|
38
|
+
const ngramsA = new Set(getNgrams(tokensA, n));
|
|
39
|
+
const ngramsB = new Set(getNgrams(tokensB, n));
|
|
40
|
+
if (ngramsA.size === 0 && ngramsB.size === 0)
|
|
41
|
+
return 1.0;
|
|
42
|
+
if (ngramsA.size === 0 || ngramsB.size === 0)
|
|
43
|
+
return 0.0;
|
|
44
|
+
let intersectionCount = 0;
|
|
45
|
+
for (const ng of ngramsA) {
|
|
46
|
+
if (ngramsB.has(ng))
|
|
47
|
+
intersectionCount++;
|
|
48
|
+
}
|
|
49
|
+
const unionCount = ngramsA.size + ngramsB.size - intersectionCount;
|
|
50
|
+
return intersectionCount / unionCount;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Weighted average of ngramOverlap for multiple n-gram sizes.
|
|
54
|
+
* Default: n=[1,2], weights=[0.7, 0.3].
|
|
55
|
+
*/
|
|
56
|
+
function weightedNgramOverlap(a, b, ngramSizes = [1, 2], weights = [0.7, 0.3]) {
|
|
57
|
+
let totalWeight = 0;
|
|
58
|
+
let weightedSum = 0;
|
|
59
|
+
for (let i = 0; i < ngramSizes.length; i++) {
|
|
60
|
+
const n = ngramSizes[i];
|
|
61
|
+
const w = weights[i] ?? 1;
|
|
62
|
+
const overlap = ngramOverlap(a, b, n);
|
|
63
|
+
weightedSum += overlap * w;
|
|
64
|
+
totalWeight += w;
|
|
65
|
+
}
|
|
66
|
+
if (totalWeight === 0)
|
|
67
|
+
return 0;
|
|
68
|
+
return weightedSum / totalWeight;
|
|
69
|
+
}
|
|
70
|
+
//# sourceMappingURL=ngrams.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ngrams.js","sourceRoot":"","sources":["../../src/heuristic/ngrams.ts"],"names":[],"mappings":";AAAA;;GAEG;;AAKH,4BAKC;AAKD,8BAOC;AAMD,oCAgBC;AAMD,oDAmBC;AAnED;;GAEG;AACH,SAAgB,QAAQ,CAAC,IAAY;IACnC,OAAO,IAAI;SACR,WAAW,EAAE;SACb,KAAK,CAAC,aAAa,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AAC/B,CAAC;AAED;;GAEG;AACH,SAAgB,SAAS,CAAC,MAAgB,EAAE,CAAS;IACnD,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,EAAE,CAAC;IACjC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAChD,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,SAAgB,YAAY,CAAC,CAAS,EAAE,CAAS,EAAE,CAAC,GAAG,CAAC;IACtD,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;IAC5B,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;IAC5B,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;IAC/C,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;IAE/C,IAAI,OAAO,CAAC,IAAI,KAAK,CAAC,IAAI,OAAO,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,GAAG,CAAC;IACzD,IAAI,OAAO,CAAC,IAAI,KAAK,CAAC,IAAI,OAAO,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,GAAG,CAAC;IAEzD,IAAI,iBAAiB,GAAG,CAAC,CAAC;IAC1B,KAAK,MAAM,EAAE,IAAI,OAAO,EAAE,CAAC;QACzB,IAAI,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;YAAE,iBAAiB,EAAE,CAAC;IAC3C,CAAC;IAED,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,GAAG,OAAO,CAAC,IAAI,GAAG,iBAAiB,CAAC;IACnE,OAAO,iBAAiB,GAAG,UAAU,CAAC;AACxC,CAAC;AAED;;;GAGG;AACH,SAAgB,oBAAoB,CAClC,CAAS,EACT,CAAS,EACT,aAAuB,CAAC,CAAC,EAAE,CAAC,CAAC,EAC7B,UAAoB,CAAC,GAAG,EAAE,GAAG,CAAC;IAE9B,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;QACxB,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC1B,MAAM,OAAO,GAAG,YAAY,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;QACtC,WAAW,IAAI,OAAO,GAAG,CAAC,CAAC;QAC3B,WAAW,IAAI,CAAC,CAAC;IACnB,CAAC;IAED,IAAI,WAAW,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAChC,OAAO,WAAW,GAAG,WAAW,CAAC;AACnC,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sentence segmentation utilities for heuristic evaluation.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Split text into sentences on . ! ? followed by whitespace+uppercase or end of string.
|
|
6
|
+
* Filters empty strings.
|
|
7
|
+
*/
|
|
8
|
+
export declare function splitSentences(text: string): string[];
|
|
9
|
+
/**
|
|
10
|
+
* Keep sentences with at least 3 words (simple heuristic for factual content).
|
|
11
|
+
*/
|
|
12
|
+
export declare function filterFactualSentences(sentences: string[]): string[];
|
|
13
|
+
//# sourceMappingURL=sentences.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sentences.d.ts","sourceRoot":"","sources":["../../src/heuristic/sentences.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;;GAGG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAIrD;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAEpE"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Sentence segmentation utilities for heuristic evaluation.
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.splitSentences = splitSentences;
|
|
7
|
+
exports.filterFactualSentences = filterFactualSentences;
|
|
8
|
+
/**
|
|
9
|
+
* Split text into sentences on . ! ? followed by whitespace+uppercase or end of string.
|
|
10
|
+
* Filters empty strings.
|
|
11
|
+
*/
|
|
12
|
+
function splitSentences(text) {
|
|
13
|
+
// Split on sentence-ending punctuation followed by whitespace+uppercase or end of string
|
|
14
|
+
const raw = text.split(/(?<=[.!?])\s+(?=[A-Z])|(?<=[.!?])\s*$/);
|
|
15
|
+
return raw.map(s => s.trim()).filter(s => s.length > 0);
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Keep sentences with at least 3 words (simple heuristic for factual content).
|
|
19
|
+
*/
|
|
20
|
+
function filterFactualSentences(sentences) {
|
|
21
|
+
return sentences.filter(s => s.split(/\s+/).filter(w => w.length > 0).length >= 3);
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=sentences.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sentences.js","sourceRoot":"","sources":["../../src/heuristic/sentences.ts"],"names":[],"mappings":";AAAA;;GAEG;;AAMH,wCAIC;AAKD,wDAEC;AAfD;;;GAGG;AACH,SAAgB,cAAc,CAAC,IAAY;IACzC,yFAAyF;IACzF,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;IAChE,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AAC1D,CAAC;AAED;;GAEG;AACH,SAAgB,sBAAsB,CAAC,SAAmB;IACxD,OAAO,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC;AACrF,CAAC"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TF-IDF vectorization and cosine similarity utilities for heuristic evaluation.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Build TF-IDF vectors for a query and a set of documents.
|
|
6
|
+
* TF = count/total, IDF = log((N+1)/(df+1))+1 (sklearn-style smooth IDF).
|
|
7
|
+
*/
|
|
8
|
+
export declare function buildTfIdfVectors(query: string, documents: string[]): {
|
|
9
|
+
queryVec: Map<string, number>;
|
|
10
|
+
docVecs: Map<string, number>[];
|
|
11
|
+
};
|
|
12
|
+
/**
|
|
13
|
+
* Cosine similarity between two TF-IDF vectors.
|
|
14
|
+
* Returns 0 if either vector is zero.
|
|
15
|
+
*/
|
|
16
|
+
export declare function cosineSimilarity(a: Map<string, number>, b: Map<string, number>): number;
|
|
17
|
+
/**
|
|
18
|
+
* Convenience: compute TF-IDF cosine similarity between a query and a document.
|
|
19
|
+
*/
|
|
20
|
+
export declare function tfidfSimilarity(query: string, document: string): number;
|
|
21
|
+
//# sourceMappingURL=tfidf.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tfidf.d.ts","sourceRoot":"","sources":["../../src/heuristic/tfidf.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH;;;GAGG;AACH,wBAAgB,iBAAiB,CAC/B,KAAK,EAAE,MAAM,EACb,SAAS,EAAE,MAAM,EAAE,GAClB;IAAE,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAAC,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAA;CAAE,CAmDnE;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAC9B,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EACtB,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GACrB,MAAM,CAgBR;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,CAGvE"}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* TF-IDF vectorization and cosine similarity utilities for heuristic evaluation.
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.buildTfIdfVectors = buildTfIdfVectors;
|
|
7
|
+
exports.cosineSimilarity = cosineSimilarity;
|
|
8
|
+
exports.tfidfSimilarity = tfidfSimilarity;
|
|
9
|
+
const ngrams_1 = require("./ngrams");
|
|
10
|
+
/**
|
|
11
|
+
* Build TF-IDF vectors for a query and a set of documents.
|
|
12
|
+
* TF = count/total, IDF = log((N+1)/(df+1))+1 (sklearn-style smooth IDF).
|
|
13
|
+
*/
|
|
14
|
+
function buildTfIdfVectors(query, documents) {
|
|
15
|
+
const allTexts = [query, ...documents];
|
|
16
|
+
const tokenLists = allTexts.map(t => (0, ngrams_1.tokenize)(t));
|
|
17
|
+
// Build vocabulary
|
|
18
|
+
const vocab = new Set();
|
|
19
|
+
for (const tokens of tokenLists) {
|
|
20
|
+
for (const t of tokens)
|
|
21
|
+
vocab.add(t);
|
|
22
|
+
}
|
|
23
|
+
const N = allTexts.length;
|
|
24
|
+
// Compute document frequencies
|
|
25
|
+
const df = new Map();
|
|
26
|
+
for (const term of vocab) {
|
|
27
|
+
let count = 0;
|
|
28
|
+
for (const tokens of tokenLists) {
|
|
29
|
+
if (tokens.includes(term))
|
|
30
|
+
count++;
|
|
31
|
+
}
|
|
32
|
+
df.set(term, count);
|
|
33
|
+
}
|
|
34
|
+
// Compute IDF (sklearn smooth): log((N+1)/(df+1)) + 1
|
|
35
|
+
const idf = new Map();
|
|
36
|
+
for (const term of vocab) {
|
|
37
|
+
const docFreq = df.get(term) ?? 0;
|
|
38
|
+
idf.set(term, Math.log((N + 1) / (docFreq + 1)) + 1);
|
|
39
|
+
}
|
|
40
|
+
// Build TF-IDF vector for a token list
|
|
41
|
+
function buildVec(tokens) {
|
|
42
|
+
const vec = new Map();
|
|
43
|
+
if (tokens.length === 0)
|
|
44
|
+
return vec;
|
|
45
|
+
const counts = new Map();
|
|
46
|
+
for (const t of tokens) {
|
|
47
|
+
counts.set(t, (counts.get(t) ?? 0) + 1);
|
|
48
|
+
}
|
|
49
|
+
for (const [term, count] of counts) {
|
|
50
|
+
const tf = count / tokens.length;
|
|
51
|
+
const termIdf = idf.get(term) ?? 1;
|
|
52
|
+
vec.set(term, tf * termIdf);
|
|
53
|
+
}
|
|
54
|
+
return vec;
|
|
55
|
+
}
|
|
56
|
+
const queryVec = buildVec(tokenLists[0]);
|
|
57
|
+
const docVecs = tokenLists.slice(1).map(tl => buildVec(tl));
|
|
58
|
+
return { queryVec, docVecs };
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Cosine similarity between two TF-IDF vectors.
|
|
62
|
+
* Returns 0 if either vector is zero.
|
|
63
|
+
*/
|
|
64
|
+
function cosineSimilarity(a, b) {
|
|
65
|
+
let dot = 0;
|
|
66
|
+
let normA = 0;
|
|
67
|
+
let normB = 0;
|
|
68
|
+
for (const [term, val] of a) {
|
|
69
|
+
normA += val * val;
|
|
70
|
+
const bVal = b.get(term) ?? 0;
|
|
71
|
+
dot += val * bVal;
|
|
72
|
+
}
|
|
73
|
+
for (const [, val] of b) {
|
|
74
|
+
normB += val * val;
|
|
75
|
+
}
|
|
76
|
+
if (normA === 0 || normB === 0)
|
|
77
|
+
return 0;
|
|
78
|
+
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Convenience: compute TF-IDF cosine similarity between a query and a document.
|
|
82
|
+
*/
|
|
83
|
+
function tfidfSimilarity(query, document) {
|
|
84
|
+
const { queryVec, docVecs } = buildTfIdfVectors(query, [document]);
|
|
85
|
+
return cosineSimilarity(queryVec, docVecs[0]);
|
|
86
|
+
}
|
|
87
|
+
//# sourceMappingURL=tfidf.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tfidf.js","sourceRoot":"","sources":["../../src/heuristic/tfidf.ts"],"names":[],"mappings":";AAAA;;GAEG;;AAQH,8CAsDC;AAMD,4CAmBC;AAKD,0CAGC;AA7FD,qCAAoC;AAEpC;;;GAGG;AACH,SAAgB,iBAAiB,CAC/B,KAAa,EACb,SAAmB;IAEnB,MAAM,QAAQ,GAAG,CAAC,KAAK,EAAE,GAAG,SAAS,CAAC,CAAC;IACvC,MAAM,UAAU,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAA,iBAAQ,EAAC,CAAC,CAAC,CAAC,CAAC;IAElD,mBAAmB;IACnB,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAChC,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,KAAK,MAAM,CAAC,IAAI,MAAM;YAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC;IAED,MAAM,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC;IAE1B,+BAA+B;IAC/B,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAC;IACrC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;YAChC,IAAI,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC;gBAAE,KAAK,EAAE,CAAC;QACrC,CAAC;QACD,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;IACtB,CAAC;IAED,sDAAsD;IACtD,MAAM,GAAG,GAAG,IAAI,GAAG,EAAkB,CAAC;IACtC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACvD,CAAC;IAED,uCAAuC;IACvC,SAAS,QAAQ,CAAC,MAAgB;QAChC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAkB,CAAC;QACtC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,GAAG,CAAC;QAEpC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;QACzC,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;YACvB,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC1C,CAAC;QAED,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;YACnC,MAAM,EAAE,GAAG,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC;YACjC,MAAM,OAAO,GAAG,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACnC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,GAAG,OAAO,CAAC,CAAC;QAC9B,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC;IAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;IACzC,MAAM,OAAO,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC;IAE5D,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC;AAC/B,CAAC;AAED;;;GAGG;AACH,SAAgB,gBAAgB,CAC9B,CAAsB,EACtB,CAAsB;IAEtB,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,KAAK,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;QAC5B,KAAK,IAAI,GAAG,GAAG,GAAG,CAAC;QACnB,MAAM,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9B,GAAG,IAAI,GAAG,GAAG,IAAI,CAAC;IACpB,CAAC;IACD,KAAK,MAAM,CAAC,EAAE,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;QACxB,KAAK,IAAI,GAAG,GAAG,GAAG,CAAC;IACrB,CAAC;IAED,IAAI,KAAK,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACzC,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;AACrD,CAAC;AAED;;GAEG;AACH,SAAgB,eAAe,CAAC,KAAa,EAAE,QAAgB;IAC7D,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,iBAAiB,CAAC,KAAK,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC;IACnE,OAAO,gBAAgB,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;AAChD,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token-level F1 score for answer correctness evaluation.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Compute token-level F1 using multiset intersection (counting duplicates).
|
|
6
|
+
* precision = |common| / |hypothesis tokens|
|
|
7
|
+
* recall = |common| / |reference tokens|
|
|
8
|
+
* F1 = 2 * precision * recall / (precision + recall)
|
|
9
|
+
* Returns 0 if either input is empty.
|
|
10
|
+
*/
|
|
11
|
+
export declare function tokenF1(reference: string, hypothesis: string): number;
|
|
12
|
+
//# sourceMappingURL=token-f1.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"token-f1.d.ts","sourceRoot":"","sources":["../../src/heuristic/token-f1.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH;;;;;;GAMG;AACH,wBAAgB,OAAO,CAAC,SAAS,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,MAAM,CA8BrE"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Token-level F1 score for answer correctness evaluation.
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.tokenF1 = tokenF1;
|
|
7
|
+
const ngrams_1 = require("./ngrams");
|
|
8
|
+
/**
|
|
9
|
+
* Compute token-level F1 using multiset intersection (counting duplicates).
|
|
10
|
+
* precision = |common| / |hypothesis tokens|
|
|
11
|
+
* recall = |common| / |reference tokens|
|
|
12
|
+
* F1 = 2 * precision * recall / (precision + recall)
|
|
13
|
+
* Returns 0 if either input is empty.
|
|
14
|
+
*/
|
|
15
|
+
function tokenF1(reference, hypothesis) {
|
|
16
|
+
const refTokens = (0, ngrams_1.tokenize)(reference);
|
|
17
|
+
const hypTokens = (0, ngrams_1.tokenize)(hypothesis);
|
|
18
|
+
if (refTokens.length === 0 || hypTokens.length === 0)
|
|
19
|
+
return 0;
|
|
20
|
+
// Build frequency maps
|
|
21
|
+
const refCounts = new Map();
|
|
22
|
+
for (const t of refTokens) {
|
|
23
|
+
refCounts.set(t, (refCounts.get(t) ?? 0) + 1);
|
|
24
|
+
}
|
|
25
|
+
const hypCounts = new Map();
|
|
26
|
+
for (const t of hypTokens) {
|
|
27
|
+
hypCounts.set(t, (hypCounts.get(t) ?? 0) + 1);
|
|
28
|
+
}
|
|
29
|
+
// Multiset intersection count
|
|
30
|
+
let commonCount = 0;
|
|
31
|
+
for (const [term, refCount] of refCounts) {
|
|
32
|
+
const hypCount = hypCounts.get(term) ?? 0;
|
|
33
|
+
commonCount += Math.min(refCount, hypCount);
|
|
34
|
+
}
|
|
35
|
+
if (commonCount === 0)
|
|
36
|
+
return 0;
|
|
37
|
+
const precision = commonCount / hypTokens.length;
|
|
38
|
+
const recall = commonCount / refTokens.length;
|
|
39
|
+
return (2 * precision * recall) / (precision + recall);
|
|
40
|
+
}
|
|
41
|
+
//# sourceMappingURL=token-f1.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"token-f1.js","sourceRoot":"","sources":["../../src/heuristic/token-f1.ts"],"names":[],"mappings":";AAAA;;GAEG;;AAWH,0BA8BC;AAvCD,qCAAoC;AAEpC;;;;;;GAMG;AACH,SAAgB,OAAO,CAAC,SAAiB,EAAE,UAAkB;IAC3D,MAAM,SAAS,GAAG,IAAA,iBAAQ,EAAC,SAAS,CAAC,CAAC;IACtC,MAAM,SAAS,GAAG,IAAA,iBAAQ,EAAC,UAAU,CAAC,CAAC;IAEvC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAE/D,uBAAuB;IACvB,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC5C,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1B,SAAS,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAChD,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC5C,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1B,SAAS,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAChD,CAAC;IAED,8BAA8B;IAC9B,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,KAAK,MAAM,CAAC,IAAI,EAAE,QAAQ,CAAC,IAAI,SAAS,EAAE,CAAC;QACzC,MAAM,QAAQ,GAAG,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1C,WAAW,IAAI,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IAC9C,CAAC;IAED,IAAI,WAAW,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEhC,MAAM,SAAS,GAAG,WAAW,GAAG,SAAS,CAAC,MAAM,CAAC;IACjD,MAAM,MAAM,GAAG,WAAW,GAAG,SAAS,CAAC,MAAM,CAAC;IAE9C,OAAO,CAAC,CAAC,GAAG,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC;AACzD,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export type { MetricId, EvaluationMode, EvalSample, EvalSignal, MetricResult, CostTracker, EvalResult, MetricAggregate, MetricRegression, BatchEvalResult, JudgeFn, PromptOverrides, MetricThresholds, HeuristicOptions, EvaluateOptions, BatchEvaluateOptions, EvaluatorConfig, Evaluator, } from './types';
|
|
2
|
+
export { evaluate, evaluateBatch } from './evaluate';
|
|
3
|
+
export { createEvaluator } from './evaluator';
|
|
4
|
+
export { scoreFaithfulness, scoreAnswerRelevance, scoreContextPrecision, scoreContextRecall, scoreContextRelevance, scoreAnswerCorrectness, scoreHallucinationRate, computeMetric, } from './metrics/index';
|
|
5
|
+
export { splitSentences, filterFactualSentences } from './heuristic/sentences';
|
|
6
|
+
export { tokenize, getNgrams, ngramOverlap, weightedNgramOverlap } from './heuristic/ngrams';
|
|
7
|
+
export { buildTfIdfVectors, cosineSimilarity, tfidfSimilarity } from './heuristic/tfidf';
|
|
8
|
+
export { tokenF1 } from './heuristic/token-f1';
|
|
9
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAGA,YAAY,EACV,QAAQ,EACR,cAAc,EACd,UAAU,EACV,UAAU,EACV,YAAY,EACZ,WAAW,EACX,UAAU,EACV,eAAe,EACf,gBAAgB,EAChB,eAAe,EACf,OAAO,EACP,eAAe,EACf,gBAAgB,EAChB,gBAAgB,EAChB,eAAe,EACf,oBAAoB,EACpB,eAAe,EACf,SAAS,GACV,MAAM,SAAS,CAAC;AAGjB,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAGrD,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAG9C,OAAO,EACL,iBAAiB,EACjB,oBAAoB,EACpB,qBAAqB,EACrB,kBAAkB,EAClB,qBAAqB,EACrB,sBAAsB,EACtB,sBAAsB,EACtB,aAAa,GACd,MAAM,iBAAiB,CAAC;AAGzB,OAAO,EAAE,cAAc,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AAC/E,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,YAAY,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC7F,OAAO,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACzF,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC"}
|