rag-eval-node-ts 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +204 -0
- package/dist/__tests__/evaluate.test.d.ts +2 -0
- package/dist/__tests__/evaluate.test.d.ts.map +1 -0
- package/dist/__tests__/evaluate.test.js +130 -0
- package/dist/__tests__/evaluate.test.js.map +1 -0
- package/dist/__tests__/evaluator.test.d.ts +2 -0
- package/dist/__tests__/evaluator.test.d.ts.map +1 -0
- package/dist/__tests__/evaluator.test.js +92 -0
- package/dist/__tests__/evaluator.test.js.map +1 -0
- package/dist/__tests__/heuristic/ngrams.test.d.ts +2 -0
- package/dist/__tests__/heuristic/ngrams.test.d.ts.map +1 -0
- package/dist/__tests__/heuristic/ngrams.test.js +89 -0
- package/dist/__tests__/heuristic/ngrams.test.js.map +1 -0
- package/dist/__tests__/heuristic/tfidf.test.d.ts +2 -0
- package/dist/__tests__/heuristic/tfidf.test.d.ts.map +1 -0
- package/dist/__tests__/heuristic/tfidf.test.js +57 -0
- package/dist/__tests__/heuristic/tfidf.test.js.map +1 -0
- package/dist/__tests__/heuristic/token-f1.test.d.ts +2 -0
- package/dist/__tests__/heuristic/token-f1.test.d.ts.map +1 -0
- package/dist/__tests__/heuristic/token-f1.test.js +40 -0
- package/dist/__tests__/heuristic/token-f1.test.js.map +1 -0
- package/dist/__tests__/metrics/faithfulness.test.d.ts +2 -0
- package/dist/__tests__/metrics/faithfulness.test.d.ts.map +1 -0
- package/dist/__tests__/metrics/faithfulness.test.js +66 -0
- package/dist/__tests__/metrics/faithfulness.test.js.map +1 -0
- package/dist/__tests__/types.test.d.ts +2 -0
- package/dist/__tests__/types.test.d.ts.map +1 -0
- package/dist/__tests__/types.test.js +531 -0
- package/dist/__tests__/types.test.js.map +1 -0
- package/dist/evaluate.d.ts +14 -0
- package/dist/evaluate.d.ts.map +1 -0
- package/dist/evaluate.js +208 -0
- package/dist/evaluate.js.map +1 -0
- package/dist/evaluator.d.ts +10 -0
- package/dist/evaluator.d.ts.map +1 -0
- package/dist/evaluator.js +39 -0
- package/dist/evaluator.js.map +1 -0
- package/dist/heuristic/ngrams.d.ts +22 -0
- package/dist/heuristic/ngrams.d.ts.map +1 -0
- package/dist/heuristic/ngrams.js +70 -0
- package/dist/heuristic/ngrams.js.map +1 -0
- package/dist/heuristic/sentences.d.ts +13 -0
- package/dist/heuristic/sentences.d.ts.map +1 -0
- package/dist/heuristic/sentences.js +23 -0
- package/dist/heuristic/sentences.js.map +1 -0
- package/dist/heuristic/tfidf.d.ts +21 -0
- package/dist/heuristic/tfidf.d.ts.map +1 -0
- package/dist/heuristic/tfidf.js +87 -0
- package/dist/heuristic/tfidf.js.map +1 -0
- package/dist/heuristic/token-f1.d.ts +12 -0
- package/dist/heuristic/token-f1.d.ts.map +1 -0
- package/dist/heuristic/token-f1.js +41 -0
- package/dist/heuristic/token-f1.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +37 -0
- package/dist/index.js.map +1 -0
- package/dist/metrics/answer-correctness.d.ts +7 -0
- package/dist/metrics/answer-correctness.d.ts.map +1 -0
- package/dist/metrics/answer-correctness.js +51 -0
- package/dist/metrics/answer-correctness.js.map +1 -0
- package/dist/metrics/answer-relevance.d.ts +6 -0
- package/dist/metrics/answer-relevance.d.ts.map +1 -0
- package/dist/metrics/answer-relevance.js +37 -0
- package/dist/metrics/answer-relevance.js.map +1 -0
- package/dist/metrics/context-precision.d.ts +6 -0
- package/dist/metrics/context-precision.d.ts.map +1 -0
- package/dist/metrics/context-precision.js +57 -0
- package/dist/metrics/context-precision.js.map +1 -0
- package/dist/metrics/context-recall.d.ts +7 -0
- package/dist/metrics/context-recall.d.ts.map +1 -0
- package/dist/metrics/context-recall.js +66 -0
- package/dist/metrics/context-recall.js.map +1 -0
- package/dist/metrics/context-relevance.d.ts +6 -0
- package/dist/metrics/context-relevance.d.ts.map +1 -0
- package/dist/metrics/context-relevance.js +48 -0
- package/dist/metrics/context-relevance.js.map +1 -0
- package/dist/metrics/faithfulness.d.ts +6 -0
- package/dist/metrics/faithfulness.d.ts.map +1 -0
- package/dist/metrics/faithfulness.js +64 -0
- package/dist/metrics/faithfulness.js.map +1 -0
- package/dist/metrics/hallucination-rate.d.ts +7 -0
- package/dist/metrics/hallucination-rate.d.ts.map +1 -0
- package/dist/metrics/hallucination-rate.js +65 -0
- package/dist/metrics/hallucination-rate.js.map +1 -0
- package/dist/metrics/index.d.ts +14 -0
- package/dist/metrics/index.d.ts.map +1 -0
- package/dist/metrics/index.js +40 -0
- package/dist/metrics/index.js.map +1 -0
- package/dist/types.d.ts +169 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +4 -0
- package/dist/types.js.map +1 -0
- package/package.json +53 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"token-f1.test.d.ts","sourceRoot":"","sources":["../../../src/__tests__/heuristic/token-f1.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const vitest_1 = require("vitest");
|
|
4
|
+
const token_f1_1 = require("../../heuristic/token-f1");
|
|
5
|
+
(0, vitest_1.describe)('tokenF1', () => {
|
|
6
|
+
(0, vitest_1.it)('returns 1.0 for a perfect match', () => {
|
|
7
|
+
const text = 'the cat sat on the mat';
|
|
8
|
+
(0, vitest_1.expect)((0, token_f1_1.tokenF1)(text, text)).toBeCloseTo(1.0);
|
|
9
|
+
});
|
|
10
|
+
(0, vitest_1.it)('returns 0.0 for completely disjoint texts', () => {
|
|
11
|
+
(0, vitest_1.expect)((0, token_f1_1.tokenF1)('alpha beta gamma', 'delta epsilon zeta')).toBe(0.0);
|
|
12
|
+
});
|
|
13
|
+
(0, vitest_1.it)('returns 0.0 when reference is empty', () => {
|
|
14
|
+
(0, vitest_1.expect)((0, token_f1_1.tokenF1)('', 'hello world')).toBe(0.0);
|
|
15
|
+
});
|
|
16
|
+
(0, vitest_1.it)('returns 0.0 when hypothesis is empty', () => {
|
|
17
|
+
(0, vitest_1.expect)((0, token_f1_1.tokenF1)('hello world', '')).toBe(0.0);
|
|
18
|
+
});
|
|
19
|
+
(0, vitest_1.it)('returns partial score for partial overlap', () => {
|
|
20
|
+
// reference: [a, b, c], hypothesis: [a, b, d]
|
|
21
|
+
// common: a, b (2 tokens)
|
|
22
|
+
// precision = 2/3, recall = 2/3
|
|
23
|
+
// F1 = 2*(2/3)*(2/3) / (2/3 + 2/3) = 2/3
|
|
24
|
+
const score = (0, token_f1_1.tokenF1)('a b c', 'a b d');
|
|
25
|
+
(0, vitest_1.expect)(score).toBeCloseTo(2 / 3, 2);
|
|
26
|
+
});
|
|
27
|
+
(0, vitest_1.it)('handles duplicate tokens correctly using multiset intersection', () => {
|
|
28
|
+
// reference: [a, a, b], hypothesis: [a, b, c]
|
|
29
|
+
// common: a(min(2,1)=1), b(min(1,1)=1) = 2
|
|
30
|
+
// precision = 2/3, recall = 2/3
|
|
31
|
+
const score = (0, token_f1_1.tokenF1)('a a b', 'a b c');
|
|
32
|
+
(0, vitest_1.expect)(score).toBeCloseTo(2 / 3, 2);
|
|
33
|
+
});
|
|
34
|
+
(0, vitest_1.it)('returns a score between 0 and 1', () => {
|
|
35
|
+
const score = (0, token_f1_1.tokenF1)('RAG is a technique for grounded generation', 'RAG combines retrieval with generation');
|
|
36
|
+
(0, vitest_1.expect)(score).toBeGreaterThan(0);
|
|
37
|
+
(0, vitest_1.expect)(score).toBeLessThanOrEqual(1);
|
|
38
|
+
});
|
|
39
|
+
});
|
|
40
|
+
//# sourceMappingURL=token-f1.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"token-f1.test.js","sourceRoot":"","sources":["../../../src/__tests__/heuristic/token-f1.test.ts"],"names":[],"mappings":";;AAAA,mCAA8C;AAC9C,uDAAmD;AAEnD,IAAA,iBAAQ,EAAC,SAAS,EAAE,GAAG,EAAE;IACvB,IAAA,WAAE,EAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,IAAI,GAAG,wBAAwB,CAAC;QACtC,IAAA,eAAM,EAAC,IAAA,kBAAO,EAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,IAAA,eAAM,EAAC,IAAA,kBAAO,EAAC,kBAAkB,EAAE,oBAAoB,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,IAAA,eAAM,EAAC,IAAA,kBAAO,EAAC,EAAE,EAAE,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,IAAA,eAAM,EAAC,IAAA,kBAAO,EAAC,aAAa,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,8CAA8C;QAC9C,0BAA0B;QAC1B,gCAAgC;QAChC,yCAAyC;QACzC,MAAM,KAAK,GAAG,IAAA,kBAAO,EAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACxC,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IACtC,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,gEAAgE,EAAE,GAAG,EAAE;QACxE,8CAA8C;QAC9C,2CAA2C;QAC3C,gCAAgC;QAChC,MAAM,KAAK,GAAG,IAAA,kBAAO,EAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACxC,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IACtC,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,KAAK,GAAG,IAAA,kBAAO,EAAC,4CAA4C,EAAE,wCAAwC,CAAC,CAAC;QAC9G,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QACjC,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"faithfulness.test.d.ts","sourceRoot":"","sources":["../../../src/__tests__/metrics/faithfulness.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const vitest_1 = require("vitest");
|
|
4
|
+
const faithfulness_1 = require("../../metrics/faithfulness");
|
|
5
|
+
(0, vitest_1.describe)('scoreFaithfulness', () => {
|
|
6
|
+
(0, vitest_1.it)('returns a high score when the answer closely matches the context', async () => {
|
|
7
|
+
const sample = {
|
|
8
|
+
question: 'What is RAG?',
|
|
9
|
+
answer: 'RAG combines retrieval with generation to produce grounded answers.',
|
|
10
|
+
contexts: [
|
|
11
|
+
'Retrieval-augmented generation combines retrieval with generation to produce grounded answers.',
|
|
12
|
+
],
|
|
13
|
+
};
|
|
14
|
+
const result = await (0, faithfulness_1.scoreFaithfulness)(sample);
|
|
15
|
+
(0, vitest_1.expect)(result.metricId).toBe('faithfulness');
|
|
16
|
+
(0, vitest_1.expect)(result.score).not.toBeNull();
|
|
17
|
+
(0, vitest_1.expect)(result.score).toBeGreaterThan(0.5);
|
|
18
|
+
(0, vitest_1.expect)(result.mode).toBe('heuristic');
|
|
19
|
+
(0, vitest_1.expect)(result.llmCalls).toBe(0);
|
|
20
|
+
(0, vitest_1.expect)(result.durationMs).toBeGreaterThanOrEqual(0);
|
|
21
|
+
});
|
|
22
|
+
(0, vitest_1.it)('returns a low score when the answer is not in the context', async () => {
|
|
23
|
+
const sample = {
|
|
24
|
+
question: 'What is the capital of France?',
|
|
25
|
+
answer: 'The president signed an executive order today regarding trade policy.',
|
|
26
|
+
contexts: [
|
|
27
|
+
'Paris is the capital of France and a major European city.',
|
|
28
|
+
],
|
|
29
|
+
};
|
|
30
|
+
const result = await (0, faithfulness_1.scoreFaithfulness)(sample);
|
|
31
|
+
(0, vitest_1.expect)(result.score).toBeLessThan(0.5);
|
|
32
|
+
});
|
|
33
|
+
(0, vitest_1.it)('emits WARNING signals for unsupported sentences', async () => {
|
|
34
|
+
const sample = {
|
|
35
|
+
question: 'What is the sky made of?',
|
|
36
|
+
answer: 'The sky is filled with nitrogen oxygen and argon. Additionally dinosaurs went extinct 66 million years ago.',
|
|
37
|
+
contexts: ['The atmosphere contains nitrogen oxygen and trace gases.'],
|
|
38
|
+
};
|
|
39
|
+
const result = await (0, faithfulness_1.scoreFaithfulness)(sample);
|
|
40
|
+
const warnings = result.signals.filter(s => s.severity === 'warning');
|
|
41
|
+
(0, vitest_1.expect)(warnings.length).toBeGreaterThan(0);
|
|
42
|
+
});
|
|
43
|
+
(0, vitest_1.it)('returns score 0 when there are no contexts', async () => {
|
|
44
|
+
const sample = {
|
|
45
|
+
question: 'Q?',
|
|
46
|
+
answer: 'Some answer.',
|
|
47
|
+
contexts: [],
|
|
48
|
+
};
|
|
49
|
+
const result = await (0, faithfulness_1.scoreFaithfulness)(sample);
|
|
50
|
+
(0, vitest_1.expect)(result.score).toBe(0);
|
|
51
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
52
|
+
});
|
|
53
|
+
(0, vitest_1.it)('passes threshold check when score >= 0.7', async () => {
|
|
54
|
+
const sample = {
|
|
55
|
+
question: 'What is machine learning?',
|
|
56
|
+
answer: 'Machine learning is a field of artificial intelligence that trains models on data.',
|
|
57
|
+
contexts: [
|
|
58
|
+
'Machine learning is a field of artificial intelligence. It trains models on data to make predictions.',
|
|
59
|
+
],
|
|
60
|
+
};
|
|
61
|
+
const result = await (0, faithfulness_1.scoreFaithfulness)(sample);
|
|
62
|
+
(0, vitest_1.expect)(result.threshold).toBe(0.7);
|
|
63
|
+
(0, vitest_1.expect)(typeof result.passed).toBe('boolean');
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
//# sourceMappingURL=faithfulness.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"faithfulness.test.js","sourceRoot":"","sources":["../../../src/__tests__/metrics/faithfulness.test.ts"],"names":[],"mappings":";;AAAA,mCAA8C;AAC9C,6DAA+D;AAG/D,IAAA,iBAAQ,EAAC,mBAAmB,EAAE,GAAG,EAAE;IACjC,IAAA,WAAE,EAAC,kEAAkE,EAAE,KAAK,IAAI,EAAE;QAChF,MAAM,MAAM,GAAe;YACzB,QAAQ,EAAE,cAAc;YACxB,MAAM,EAAE,qEAAqE;YAC7E,QAAQ,EAAE;gBACR,gGAAgG;aACjG;SACF,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,IAAA,gCAAiB,EAAC,MAAM,CAAC,CAAC;QAC/C,IAAA,eAAM,EAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAC7C,IAAA,eAAM,EAAC,MAAM,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QACpC,IAAA,eAAM,EAAC,MAAM,CAAC,KAAe,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QACpD,IAAA,eAAM,EAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,IAAA,eAAM,EAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChC,IAAA,eAAM,EAAC,MAAM,CAAC,UAAU,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;IACtD,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,2DAA2D,EAAE,KAAK,IAAI,EAAE;QACzE,MAAM,MAAM,GAAe;YACzB,QAAQ,EAAE,gCAAgC;YAC1C,MAAM,EAAE,uEAAuE;YAC/E,QAAQ,EAAE;gBACR,2DAA2D;aAC5D;SACF,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,IAAA,gCAAiB,EAAC,MAAM,CAAC,CAAC;QAC/C,IAAA,eAAM,EAAC,MAAM,CAAC,KAAe,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,iDAAiD,EAAE,KAAK,IAAI,EAAE;QAC/D,MAAM,MAAM,GAAe;YACzB,QAAQ,EAAE,0BAA0B;YACpC,MAAM,EAAE,6GAA6G;YACrH,QAAQ,EAAE,CAAC,0DAA0D,CAAC;SACvE,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,IAAA,gCAAiB,EAAC,MAAM,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC;QACtE,IAAA,eAAM,EAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAe;YACzB,QAAQ,EAAE,IAAI;YACd,MAAM,EAAE,cAAc;YACtB,QAAQ,EAAE,EAAE;SACb,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,IAAA,gCAAiB,EAAC,MAAM,CAAC,CAAC;QAC/C,IAAA,eAAM,EAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC7B,IAAA,eAAM,EAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,MAAM,GAAe;YACzB,QAAQ,EAAE,2BAA2B;YACrC,MAAM,EAAE,oFAAoF;YAC5F,QAAQ,EAAE;gBACR,uGAAuG;aACxG;SACF,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,IAAA,gCAAiB,EAAC,MAAM,CAAC,CAAC;QAC/C,IAAA,eAAM,EAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACnC,IAAA,eAAM,EAAC,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.test.d.ts","sourceRoot":"","sources":["../../src/__tests__/types.test.ts"],"names":[],"mappings":""}
|