rag-eval-node-ts 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +204 -0
- package/dist/__tests__/evaluate.test.d.ts +2 -0
- package/dist/__tests__/evaluate.test.d.ts.map +1 -0
- package/dist/__tests__/evaluate.test.js +130 -0
- package/dist/__tests__/evaluate.test.js.map +1 -0
- package/dist/__tests__/evaluator.test.d.ts +2 -0
- package/dist/__tests__/evaluator.test.d.ts.map +1 -0
- package/dist/__tests__/evaluator.test.js +92 -0
- package/dist/__tests__/evaluator.test.js.map +1 -0
- package/dist/__tests__/heuristic/ngrams.test.d.ts +2 -0
- package/dist/__tests__/heuristic/ngrams.test.d.ts.map +1 -0
- package/dist/__tests__/heuristic/ngrams.test.js +89 -0
- package/dist/__tests__/heuristic/ngrams.test.js.map +1 -0
- package/dist/__tests__/heuristic/tfidf.test.d.ts +2 -0
- package/dist/__tests__/heuristic/tfidf.test.d.ts.map +1 -0
- package/dist/__tests__/heuristic/tfidf.test.js +57 -0
- package/dist/__tests__/heuristic/tfidf.test.js.map +1 -0
- package/dist/__tests__/heuristic/token-f1.test.d.ts +2 -0
- package/dist/__tests__/heuristic/token-f1.test.d.ts.map +1 -0
- package/dist/__tests__/heuristic/token-f1.test.js +40 -0
- package/dist/__tests__/heuristic/token-f1.test.js.map +1 -0
- package/dist/__tests__/metrics/faithfulness.test.d.ts +2 -0
- package/dist/__tests__/metrics/faithfulness.test.d.ts.map +1 -0
- package/dist/__tests__/metrics/faithfulness.test.js +66 -0
- package/dist/__tests__/metrics/faithfulness.test.js.map +1 -0
- package/dist/__tests__/types.test.d.ts +2 -0
- package/dist/__tests__/types.test.d.ts.map +1 -0
- package/dist/__tests__/types.test.js +531 -0
- package/dist/__tests__/types.test.js.map +1 -0
- package/dist/evaluate.d.ts +14 -0
- package/dist/evaluate.d.ts.map +1 -0
- package/dist/evaluate.js +208 -0
- package/dist/evaluate.js.map +1 -0
- package/dist/evaluator.d.ts +10 -0
- package/dist/evaluator.d.ts.map +1 -0
- package/dist/evaluator.js +39 -0
- package/dist/evaluator.js.map +1 -0
- package/dist/heuristic/ngrams.d.ts +22 -0
- package/dist/heuristic/ngrams.d.ts.map +1 -0
- package/dist/heuristic/ngrams.js +70 -0
- package/dist/heuristic/ngrams.js.map +1 -0
- package/dist/heuristic/sentences.d.ts +13 -0
- package/dist/heuristic/sentences.d.ts.map +1 -0
- package/dist/heuristic/sentences.js +23 -0
- package/dist/heuristic/sentences.js.map +1 -0
- package/dist/heuristic/tfidf.d.ts +21 -0
- package/dist/heuristic/tfidf.d.ts.map +1 -0
- package/dist/heuristic/tfidf.js +87 -0
- package/dist/heuristic/tfidf.js.map +1 -0
- package/dist/heuristic/token-f1.d.ts +12 -0
- package/dist/heuristic/token-f1.d.ts.map +1 -0
- package/dist/heuristic/token-f1.js +41 -0
- package/dist/heuristic/token-f1.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +37 -0
- package/dist/index.js.map +1 -0
- package/dist/metrics/answer-correctness.d.ts +7 -0
- package/dist/metrics/answer-correctness.d.ts.map +1 -0
- package/dist/metrics/answer-correctness.js +51 -0
- package/dist/metrics/answer-correctness.js.map +1 -0
- package/dist/metrics/answer-relevance.d.ts +6 -0
- package/dist/metrics/answer-relevance.d.ts.map +1 -0
- package/dist/metrics/answer-relevance.js +37 -0
- package/dist/metrics/answer-relevance.js.map +1 -0
- package/dist/metrics/context-precision.d.ts +6 -0
- package/dist/metrics/context-precision.d.ts.map +1 -0
- package/dist/metrics/context-precision.js +57 -0
- package/dist/metrics/context-precision.js.map +1 -0
- package/dist/metrics/context-recall.d.ts +7 -0
- package/dist/metrics/context-recall.d.ts.map +1 -0
- package/dist/metrics/context-recall.js +66 -0
- package/dist/metrics/context-recall.js.map +1 -0
- package/dist/metrics/context-relevance.d.ts +6 -0
- package/dist/metrics/context-relevance.d.ts.map +1 -0
- package/dist/metrics/context-relevance.js +48 -0
- package/dist/metrics/context-relevance.js.map +1 -0
- package/dist/metrics/faithfulness.d.ts +6 -0
- package/dist/metrics/faithfulness.d.ts.map +1 -0
- package/dist/metrics/faithfulness.js +64 -0
- package/dist/metrics/faithfulness.js.map +1 -0
- package/dist/metrics/hallucination-rate.d.ts +7 -0
- package/dist/metrics/hallucination-rate.d.ts.map +1 -0
- package/dist/metrics/hallucination-rate.js +65 -0
- package/dist/metrics/hallucination-rate.js.map +1 -0
- package/dist/metrics/index.d.ts +14 -0
- package/dist/metrics/index.d.ts.map +1 -0
- package/dist/metrics/index.js +40 -0
- package/dist/metrics/index.js.map +1 -0
- package/dist/types.d.ts +169 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +4 -0
- package/dist/types.js.map +1 -0
- package/package.json +53 -0
package/README.md
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
# rag-eval-node-ts
|
|
2
|
+
|
|
3
|
+
Lightweight RAG evaluation metrics for CI/CD pipelines. Provides seven metrics for measuring RAG pipeline quality, with heuristic evaluation mode (zero LLM cost) and a pluggable LLM-as-judge interface for hybrid or full LLM modes.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install rag-eval-node-ts
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
For LLM-as-judge mode, install optional peer dependencies:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# OpenAI
|
|
15
|
+
npm install openai
|
|
16
|
+
|
|
17
|
+
# Anthropic
|
|
18
|
+
npm install @anthropic-ai/sdk
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Quick Start
|
|
22
|
+
|
|
23
|
+
### Heuristic mode (zero LLM cost)
|
|
24
|
+
|
|
25
|
+
```ts
|
|
26
|
+
import { evaluate } from 'rag-eval-node-ts';
|
|
27
|
+
|
|
28
|
+
const result = await evaluate({
|
|
29
|
+
question: 'What is retrieval-augmented generation?',
|
|
30
|
+
answer: 'RAG combines retrieval with generation to produce grounded answers.',
|
|
31
|
+
contexts: [
|
|
32
|
+
'Retrieval-augmented generation (RAG) is a technique that combines information retrieval with text generation.',
|
|
33
|
+
],
|
|
34
|
+
groundTruth: 'RAG is a method that retrieves relevant documents and uses them to generate accurate responses.',
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
console.log(result.compositeScore); // 0.0 - 1.0
|
|
38
|
+
console.log(result.passed); // true if all metrics pass thresholds
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Pre-configured evaluator
|
|
42
|
+
|
|
43
|
+
```ts
|
|
44
|
+
import { createEvaluator } from 'rag-eval-node-ts';
|
|
45
|
+
|
|
46
|
+
const evaluator = createEvaluator({
|
|
47
|
+
mode: 'heuristic',
|
|
48
|
+
metrics: ['faithfulness', 'answerRelevance', 'contextPrecision'],
|
|
49
|
+
thresholds: { faithfulness: 0.8 },
|
|
50
|
+
compositeThreshold: 0.7,
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
const result = await evaluator.evaluate(sample);
|
|
54
|
+
const batchResult = await evaluator.evaluateBatch(samples);
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Batch evaluation
|
|
58
|
+
|
|
59
|
+
```ts
|
|
60
|
+
import { evaluateBatch } from 'rag-eval-node-ts';
|
|
61
|
+
|
|
62
|
+
const batchResult = await evaluateBatch(samples, undefined, {
|
|
63
|
+
concurrency: 8,
|
|
64
|
+
onProgress: (completed, total) => console.log(`${completed}/${total}`),
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
console.log(batchResult.aggregates.faithfulness.mean);
|
|
68
|
+
console.log(batchResult.compositeAggregate.passRate);
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Regression detection
|
|
72
|
+
|
|
73
|
+
```ts
|
|
74
|
+
const baseline = await evaluateBatch(baselineSamples);
|
|
75
|
+
const current = await evaluateBatch(currentSamples, undefined, {
|
|
76
|
+
baselineResult: baseline,
|
|
77
|
+
regressionThreshold: 0.05,
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
for (const r of current.regressions ?? []) {
|
|
81
|
+
if (r.regressed) {
|
|
82
|
+
console.warn(`${r.metricId}: dropped by ${Math.abs(r.delta).toFixed(3)}`);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## API
|
|
88
|
+
|
|
89
|
+
### `evaluate(sample, metrics?, options?): Promise<EvalResult>`
|
|
90
|
+
|
|
91
|
+
Evaluates a single `EvalSample` and returns an `EvalResult`.
|
|
92
|
+
|
|
93
|
+
- `sample` — the input (question, answer, contexts, optional groundTruth)
|
|
94
|
+
- `metrics` — subset of `MetricId[]` to compute (default: all 7)
|
|
95
|
+
- `options` — `EvaluateOptions` (thresholds, compositeThreshold, compositeWeights, heuristic tuning)
|
|
96
|
+
|
|
97
|
+
### `evaluateBatch(samples, metrics?, options?): Promise<BatchEvalResult>`
|
|
98
|
+
|
|
99
|
+
Evaluates a batch of samples with concurrency control and optional regression detection.
|
|
100
|
+
|
|
101
|
+
- `options.concurrency` — max parallel evaluations (default: 4)
|
|
102
|
+
- `options.onProgress` — progress callback
|
|
103
|
+
- `options.baselineResult` — prior `BatchEvalResult` for regression comparison
|
|
104
|
+
- `options.regressionThreshold` — minimum mean drop to flag a regression (default: 0.05)
|
|
105
|
+
|
|
106
|
+
### `createEvaluator(config): Evaluator`
|
|
107
|
+
|
|
108
|
+
Returns a pre-configured `Evaluator` instance. Config is merged with per-call options; per-call options win on conflict.
|
|
109
|
+
|
|
110
|
+
```ts
|
|
111
|
+
const evaluator = createEvaluator({
|
|
112
|
+
mode: 'heuristic',
|
|
113
|
+
metrics: ['faithfulness', 'answerRelevance'],
|
|
114
|
+
thresholds: { faithfulness: 0.8 },
|
|
115
|
+
compositeThreshold: 0.65,
|
|
116
|
+
heuristic: { claimSupportThreshold: 0.2 },
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
// evaluate() and evaluateBatch() are bound to this config
|
|
120
|
+
const result = await evaluator.evaluate(sample);
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Metrics
|
|
124
|
+
|
|
125
|
+
All 7 metrics operate in heuristic mode using text overlap, TF-IDF similarity, and token-level F1.
|
|
126
|
+
|
|
127
|
+
| Metric | `MetricId` | Requires `groundTruth` | Default Threshold | Description |
|
|
128
|
+
|--------|-----------|----------------------|-------------------|-------------|
|
|
129
|
+
| Faithfulness | `faithfulness` | No | 0.7 | Is the answer supported by the contexts? Scores each answer sentence against the best-matching context chunk using weighted n-gram overlap. |
|
|
130
|
+
| Answer Relevance | `answerRelevance` | No | 0.7 | Is the answer relevant to the question? Averages TF-IDF cosine similarity and unigram Jaccard overlap between question and answer. |
|
|
131
|
+
| Context Precision | `contextPrecision` | No | 0.7 | Are retrieved contexts relevant to the question? Average TF-IDF similarity of each context chunk against the question. |
|
|
132
|
+
| Context Recall | `contextRecall` | Yes | 0.7 | Do contexts cover the ground truth? Fraction of ground-truth sentences with unigram overlap >= 0.3 against any context chunk. |
|
|
133
|
+
| Context Relevance | `contextRelevance` | No | 0.6 | Stricter relevance check: fraction of context chunks with weighted n-gram overlap >= `chunkRelevanceThreshold` (default 0.2). |
|
|
134
|
+
| Answer Correctness | `answerCorrectness` | Yes | 0.6 | Is the answer factually correct vs ground truth? Blends token F1 (70%) and unigram Jaccard (30%). |
|
|
135
|
+
| Hallucination Rate | `hallucinationRate` | No | 0.7 | Does the answer contain unsupported claims? Score = 1 − (fraction of answer sentences with max context overlap < `claimSupportThreshold`). |
|
|
136
|
+
|
|
137
|
+
## Heuristic Options
|
|
138
|
+
|
|
139
|
+
Pass via `options.heuristic` or `EvaluatorConfig.heuristic`:
|
|
140
|
+
|
|
141
|
+
| Option | Default | Description |
|
|
142
|
+
|--------|---------|-------------|
|
|
143
|
+
| `claimSupportThreshold` | 0.15 | Min unigram overlap for a sentence to be considered context-supported (hallucinationRate). |
|
|
144
|
+
| `chunkRelevanceThreshold` | 0.2 | Min weighted n-gram overlap for a chunk to count as relevant (contextRelevance). |
|
|
145
|
+
| `ngramSizes` | `[1, 2]` | N-gram sizes for weighted overlap. |
|
|
146
|
+
| `ngramWeights` | `[0.7, 0.3]` | Weights for each n-gram size. |
|
|
147
|
+
|
|
148
|
+
## Signals
|
|
149
|
+
|
|
150
|
+
Each `MetricResult` includes a `signals` array of `EvalSignal` objects flagging specific findings:
|
|
151
|
+
|
|
152
|
+
| Severity | When emitted |
|
|
153
|
+
|----------|-------------|
|
|
154
|
+
| `warning` | Answer sentence has low context support (faithfulness), answer relevance is low, answer correctness is low |
|
|
155
|
+
| `info` | Context chunks with low precision score listed (contextPrecision) |
|
|
156
|
+
| `critical` | Specific unsupported answer sentences (hallucinationRate) |
|
|
157
|
+
|
|
158
|
+
## Exports
|
|
159
|
+
|
|
160
|
+
### Functions
|
|
161
|
+
- `evaluate` — single sample evaluation
|
|
162
|
+
- `evaluateBatch` — batch evaluation with concurrency
|
|
163
|
+
- `createEvaluator` — pre-configured evaluator factory
|
|
164
|
+
|
|
165
|
+
### Metric functions
|
|
166
|
+
- `scoreFaithfulness`, `scoreAnswerRelevance`, `scoreContextPrecision`
|
|
167
|
+
- `scoreContextRecall`, `scoreContextRelevance`, `scoreAnswerCorrectness`
|
|
168
|
+
- `scoreHallucinationRate`
|
|
169
|
+
- `computeMetric` — dispatch by MetricId
|
|
170
|
+
|
|
171
|
+
### Heuristic primitives
|
|
172
|
+
- `tokenize`, `getNgrams`, `ngramOverlap`, `weightedNgramOverlap`
|
|
173
|
+
- `buildTfIdfVectors`, `cosineSimilarity`, `tfidfSimilarity`
|
|
174
|
+
- `tokenF1`
|
|
175
|
+
- `splitSentences`, `filterFactualSentences`
|
|
176
|
+
|
|
177
|
+
### Types
|
|
178
|
+
All 18 types are exported: `MetricId`, `EvaluationMode`, `EvalSample`, `EvalSignal`, `MetricResult`, `CostTracker`, `EvalResult`, `MetricAggregate`, `MetricRegression`, `BatchEvalResult`, `JudgeFn`, `PromptOverrides`, `MetricThresholds`, `HeuristicOptions`, `EvaluateOptions`, `BatchEvaluateOptions`, `EvaluatorConfig`, `Evaluator`.
|
|
179
|
+
|
|
180
|
+
## Evaluation Modes
|
|
181
|
+
|
|
182
|
+
| Mode | Description | LLM Cost |
|
|
183
|
+
|------|-------------|----------|
|
|
184
|
+
| `heuristic` | Deterministic text-overlap and TF-IDF heuristics | None |
|
|
185
|
+
| `llm` | LLM-as-judge via a pluggable `JudgeFn` | Per-call |
|
|
186
|
+
| `hybrid` | Routes each metric to heuristic or LLM based on config | Partial |
|
|
187
|
+
|
|
188
|
+
LLM and hybrid modes are accepted by the API but currently route to heuristic; full LLM judge integration is planned.
|
|
189
|
+
|
|
190
|
+
## Adapters
|
|
191
|
+
|
|
192
|
+
LLM adapters are available as subpath imports:
|
|
193
|
+
|
|
194
|
+
```ts
|
|
195
|
+
// OpenAI
|
|
196
|
+
import { createOpenAIJudge } from 'rag-eval-node-ts/adapters/openai';
|
|
197
|
+
|
|
198
|
+
// Anthropic
|
|
199
|
+
import { createAnthropicJudge } from 'rag-eval-node-ts/adapters/anthropic';
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## License
|
|
203
|
+
|
|
204
|
+
MIT
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluate.test.d.ts","sourceRoot":"","sources":["../../src/__tests__/evaluate.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const vitest_1 = require("vitest");
|
|
4
|
+
const evaluate_1 = require("../evaluate");
|
|
5
|
+
const ALL_METRIC_IDS = [
|
|
6
|
+
'faithfulness',
|
|
7
|
+
'answerRelevance',
|
|
8
|
+
'contextPrecision',
|
|
9
|
+
'contextRecall',
|
|
10
|
+
'contextRelevance',
|
|
11
|
+
'answerCorrectness',
|
|
12
|
+
'hallucinationRate',
|
|
13
|
+
];
|
|
14
|
+
const sample = {
|
|
15
|
+
question: 'What is retrieval-augmented generation?',
|
|
16
|
+
answer: 'RAG combines retrieval with generation to produce grounded answers.',
|
|
17
|
+
contexts: [
|
|
18
|
+
'Retrieval-augmented generation (RAG) is a technique that combines information retrieval with text generation to produce grounded answers.',
|
|
19
|
+
'RAG pipelines retrieve relevant documents and feed them to an LLM as context.',
|
|
20
|
+
],
|
|
21
|
+
groundTruth: 'RAG is a method that retrieves relevant documents and uses them to generate accurate responses.',
|
|
22
|
+
};
|
|
23
|
+
(0, vitest_1.describe)('evaluate()', () => {
|
|
24
|
+
(0, vitest_1.it)('returns an EvalResult with the correct shape', async () => {
|
|
25
|
+
const result = await (0, evaluate_1.evaluate)(sample);
|
|
26
|
+
(0, vitest_1.expect)(result.sample).toBe(sample);
|
|
27
|
+
(0, vitest_1.expect)(typeof result.compositeScore).toBe('number');
|
|
28
|
+
(0, vitest_1.expect)(typeof result.passed).toBe('boolean');
|
|
29
|
+
(0, vitest_1.expect)(typeof result.durationMs).toBe('number');
|
|
30
|
+
(0, vitest_1.expect)(typeof result.evaluatedAt).toBe('string');
|
|
31
|
+
(0, vitest_1.expect)(result.evaluatedAt).toMatch(/^\d{4}-\d{2}-\d{2}T/);
|
|
32
|
+
});
|
|
33
|
+
(0, vitest_1.it)('includes all 7 metrics by default', async () => {
|
|
34
|
+
const result = await (0, evaluate_1.evaluate)(sample);
|
|
35
|
+
for (const id of ALL_METRIC_IDS) {
|
|
36
|
+
(0, vitest_1.expect)(result.metrics[id]).toBeDefined();
|
|
37
|
+
(0, vitest_1.expect)(result.metrics[id].metricId).toBe(id);
|
|
38
|
+
}
|
|
39
|
+
});
|
|
40
|
+
(0, vitest_1.it)('compositeScore is between 0 and 1', async () => {
|
|
41
|
+
const result = await (0, evaluate_1.evaluate)(sample);
|
|
42
|
+
(0, vitest_1.expect)(result.compositeScore).not.toBeNull();
|
|
43
|
+
(0, vitest_1.expect)(result.compositeScore).toBeGreaterThanOrEqual(0);
|
|
44
|
+
(0, vitest_1.expect)(result.compositeScore).toBeLessThanOrEqual(1);
|
|
45
|
+
});
|
|
46
|
+
(0, vitest_1.it)('allows selecting a subset of metrics', async () => {
|
|
47
|
+
const metrics = ['faithfulness', 'answerRelevance'];
|
|
48
|
+
const result = await (0, evaluate_1.evaluate)(sample, metrics);
|
|
49
|
+
(0, vitest_1.expect)(result.metrics['faithfulness']).toBeDefined();
|
|
50
|
+
(0, vitest_1.expect)(result.metrics['answerRelevance']).toBeDefined();
|
|
51
|
+
});
|
|
52
|
+
(0, vitest_1.it)('metrics requiring groundTruth return null score when missing', async () => {
|
|
53
|
+
const sampleWithoutGT = {
|
|
54
|
+
question: 'What is RAG?',
|
|
55
|
+
answer: 'RAG combines retrieval with generation.',
|
|
56
|
+
contexts: ['RAG is a retrieval-augmented generation technique.'],
|
|
57
|
+
};
|
|
58
|
+
const result = await (0, evaluate_1.evaluate)(sampleWithoutGT, ['contextRecall', 'answerCorrectness']);
|
|
59
|
+
(0, vitest_1.expect)(result.metrics['contextRecall'].score).toBeNull();
|
|
60
|
+
(0, vitest_1.expect)(result.metrics['answerCorrectness'].score).toBeNull();
|
|
61
|
+
(0, vitest_1.expect)(result.metrics['contextRecall'].passed).toBeNull();
|
|
62
|
+
});
|
|
63
|
+
(0, vitest_1.it)('respects threshold overrides', async () => {
|
|
64
|
+
const result = await (0, evaluate_1.evaluate)(sample, ['faithfulness'], {
|
|
65
|
+
thresholds: { faithfulness: 0.99 },
|
|
66
|
+
});
|
|
67
|
+
(0, vitest_1.expect)(result.metrics['faithfulness'].threshold).toBe(0.99);
|
|
68
|
+
});
|
|
69
|
+
(0, vitest_1.it)('cost tracker has llmCalls: 0 for heuristic mode', async () => {
|
|
70
|
+
const result = await (0, evaluate_1.evaluate)(sample);
|
|
71
|
+
(0, vitest_1.expect)(result.cost.llmCalls).toBe(0);
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
(0, vitest_1.describe)('evaluateBatch()', () => {
|
|
75
|
+
const samples = [
|
|
76
|
+
{
|
|
77
|
+
question: 'What is RAG?',
|
|
78
|
+
answer: 'RAG combines retrieval with generation.',
|
|
79
|
+
contexts: ['RAG is a retrieval-augmented generation technique.'],
|
|
80
|
+
groundTruth: 'RAG retrieves documents and generates answers.',
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
question: 'What is machine learning?',
|
|
84
|
+
answer: 'Machine learning trains models on data.',
|
|
85
|
+
contexts: ['Machine learning is a field of AI that trains models on data.'],
|
|
86
|
+
groundTruth: 'Machine learning trains models on data to make predictions.',
|
|
87
|
+
},
|
|
88
|
+
];
|
|
89
|
+
(0, vitest_1.it)('returns BatchEvalResult with correct shape', async () => {
|
|
90
|
+
const result = await (0, evaluate_1.evaluateBatch)(samples);
|
|
91
|
+
(0, vitest_1.expect)(result.results).toHaveLength(2);
|
|
92
|
+
(0, vitest_1.expect)(typeof result.compositeAggregate.mean).toBe('number');
|
|
93
|
+
(0, vitest_1.expect)(typeof result.passed).toBe('boolean');
|
|
94
|
+
(0, vitest_1.expect)(typeof result.totalDurationMs).toBe('number');
|
|
95
|
+
(0, vitest_1.expect)(result.evaluatedAt).toMatch(/^\d{4}-\d{2}-\d{2}T/);
|
|
96
|
+
});
|
|
97
|
+
(0, vitest_1.it)('computes aggregates for all metrics', async () => {
|
|
98
|
+
const result = await (0, evaluate_1.evaluateBatch)(samples);
|
|
99
|
+
for (const id of ALL_METRIC_IDS) {
|
|
100
|
+
(0, vitest_1.expect)(result.aggregates[id]).toBeDefined();
|
|
101
|
+
(0, vitest_1.expect)(typeof result.aggregates[id].mean).toBe('number');
|
|
102
|
+
(0, vitest_1.expect)(typeof result.aggregates[id].passRate).toBe('number');
|
|
103
|
+
}
|
|
104
|
+
});
|
|
105
|
+
(0, vitest_1.it)('detects regressions when baseline is provided', async () => {
|
|
106
|
+
const baseline = await (0, evaluate_1.evaluateBatch)(samples);
|
|
107
|
+
// Create samples with less relevant contexts to produce lower scores
|
|
108
|
+
const degradedSamples = samples.map(s => ({
|
|
109
|
+
...s,
|
|
110
|
+
answer: 'The weather is unpredictable and changes frequently.',
|
|
111
|
+
contexts: ['Unrelated context about sports and entertainment.'],
|
|
112
|
+
}));
|
|
113
|
+
const current = await (0, evaluate_1.evaluateBatch)(degradedSamples, undefined, {
|
|
114
|
+
baselineResult: baseline,
|
|
115
|
+
regressionThreshold: 0.01,
|
|
116
|
+
});
|
|
117
|
+
(0, vitest_1.expect)(current.regressions).toBeDefined();
|
|
118
|
+
(0, vitest_1.expect)(Array.isArray(current.regressions)).toBe(true);
|
|
119
|
+
});
|
|
120
|
+
(0, vitest_1.it)('respects concurrency option', async () => {
|
|
121
|
+
const manySamples = Array.from({ length: 8 }, (_, i) => ({
|
|
122
|
+
question: `Question ${i}?`,
|
|
123
|
+
answer: `Answer ${i}.`,
|
|
124
|
+
contexts: [`Context for question ${i}.`],
|
|
125
|
+
}));
|
|
126
|
+
const result = await (0, evaluate_1.evaluateBatch)(manySamples, undefined, { concurrency: 2 });
|
|
127
|
+
(0, vitest_1.expect)(result.results).toHaveLength(8);
|
|
128
|
+
});
|
|
129
|
+
});
|
|
130
|
+
//# sourceMappingURL=evaluate.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluate.test.js","sourceRoot":"","sources":["../../src/__tests__/evaluate.test.ts"],"names":[],"mappings":";;AAAA,mCAA8C;AAC9C,0CAAsD;AAGtD,MAAM,cAAc,GAAe;IACjC,cAAc;IACd,iBAAiB;IACjB,kBAAkB;IAClB,eAAe;IACf,kBAAkB;IAClB,mBAAmB;IACnB,mBAAmB;CACpB,CAAC;AAEF,MAAM,MAAM,GAAe;IACzB,QAAQ,EAAE,yCAAyC;IACnD,MAAM,EAAE,qEAAqE;IAC7E,QAAQ,EAAE;QACR,2IAA2I;QAC3I,+EAA+E;KAChF;IACD,WAAW,EAAE,iGAAiG;CAC/G,CAAC;AAEF,IAAA,iBAAQ,EAAC,YAAY,EAAE,GAAG,EAAE;IAC1B,IAAA,WAAE,EAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;QAC5D,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAQ,EAAC,MAAM,CAAC,CAAC;QACtC,IAAA,eAAM,EAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACnC,IAAA,eAAM,EAAC,OAAO,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACpD,IAAA,eAAM,EAAC,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC7C,IAAA,eAAM,EAAC,OAAO,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChD,IAAA,eAAM,EAAC,OAAO,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACjD,IAAA,eAAM,EAAC,MAAM,CAAC,WAAW,CAAC,CAAC,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAC5D,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;QACjD,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAQ,EAAC,MAAM,CAAC,CAAC;QACtC,KAAK,MAAM,EAAE,IAAI,cAAc,EAAE,CAAC;YAChC,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;YACzC,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;QACjD,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAQ,EAAC,MAAM,CAAC,CAAC;QACtC,IAAA,eAAM,EAAC,MAAM,CAAC,cAAc,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC7C,IAAA,eAAM,EAAC,MAAM,CAAC,cAAwB,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QAClE,IAAA,eAAM,EAAC,MAAM,CAAC,cAAwB,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC;IACjE,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;QACpD,MAAM,OAAO,GAAe,CAAC,cAAc,EAAE,iBAAiB,CAAC,CAAC;QAChE,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAQ,EAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC/C,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;QACrD,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,iBAAiB,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,8DAA8D,EAAE,KAAK,IAAI,EAAE;QAC5E,MAAM,eAAe,GAAe;YAClC,QAAQ,EAAE,cAAc;YACxB,MAAM,EAAE,yCAAyC;YACjD,QAAQ,EAAE,CAAC,oDAAoD,CAAC;SACjE,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAQ,EAAC,eAAe,EAAE,CAAC,eAAe,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACvF,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC,KAAK,CAAC,CAAC,QAAQ,EAAE,CAAC;QACzD,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,mBAAmB,CAAC,CAAC,KAAK,CAAC,CAAC,QAAQ,EAAE,CAAC;QAC7D,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,CAAC;IAC5D,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,8BAA8B,EAAE,KAAK,IAAI,EAAE;QAC5C,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAQ,EAAC,MAAM,EAAE,CAAC,cAAc,CAAC,EAAE;YACtD,UAAU,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE;SACnC,CAAC,CAAC;QACH,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC9D,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,iDAAiD,EAAE,KAAK,IAAI,EAAE;QAC/D,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAQ,EAAC,MAAM,CAAC,CAAC;QACtC,IAAA,eAAM,EAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,IAAA,iBAAQ,EAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,MAAM,OAAO,GAAiB;QAC5B;YACE,QAAQ,EAAE,cAAc;YACxB,MAAM,EAAE,yCAAyC;YACjD,QAAQ,EAAE,CAAC,oDAAoD,CAAC;YAChE,WAAW,EAAE,gDAAgD;SAC9D;QACD;YACE,QAAQ,EAAE,2BAA2B;YACrC,MAAM,EAAE,yCAAyC;YACjD,QAAQ,EAAE,CAAC,+DAA+D,CAAC;YAC3E,WAAW,EAAE,6DAA6D;SAC3E;KACF,CAAC;IAEF,IAAA,WAAE,EAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,MAAM,IAAA,wBAAa,EAAC,OAAO,CAAC,CAAC;QAC5C,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACvC,IAAA,eAAM,EAAC,OAAO,MAAM,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC7D,IAAA,eAAM,EAAC,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC7C,IAAA,eAAM,EAAC,OAAO,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACrD,IAAA,eAAM,EAAC,MAAM,CAAC,WAAW,CAAC,CAAC,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAC5D,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,qCAAqC,EAAE,KAAK,IAAI,EAAE;QACnD,MAAM,MAAM,GAAG,MAAM,IAAA,wBAAa,EAAC,OAAO,CAAC,CAAC;QAC5C,KAAK,MAAM,EAAE,IAAI,cAAc,EAAE,CAAC;YAChC,IAAA,eAAM,EAAC,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;YAC5C,IAAA,eAAM,EAAC,OAAO,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACzD,IAAA,eAAM,EAAC,OAAO,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,MAAM,QAAQ,GAAG,MAAM,IAAA,wBAAa,EAAC,OAAO,CAAC,CAAC;QAC9C,qEAAqE;QACrE,MAAM,eAAe,GAAiB,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACtD,GAAG,CAAC;YACJ,MAAM,EAAE,sDAAsD;YAC9D,QAAQ,EAAE,CAAC,mDAAmD,CAAC;SAChE,CAAC,CAAC,CAAC;QACJ,MAAM,OAAO,GAAG,MAAM,IAAA,wBAAa,EAAC,eAAe,EAAE,SAAS,EAAE;YAC9D,cAAc,EAAE,QAAQ;YACxB,mBAAmB,EAAE,IAAI;SAC1B,CAAC,CAAC;QACH,IAAA,eAAM,EAAC,OAAO,CAAC,WAAW,CAAC,CAAC,WAAW,EAAE,CAAC;QAC1C,IAAA,eAAM,EAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACxD,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;QAC3C,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YACvD,QAAQ,EAAE,YAAY,CAAC,GAAG;YAC1B,MAAM,EAAE,UAAU,CAAC,GAAG;YACtB,QAAQ,EAAE,CAAC,wBAAwB,CAAC,GAAG,CAAC;SACzC,CAAC,CAAC,CAAC;QACJ,MAAM,MAAM,GAAG,MAAM,IAAA,wBAAa,EAAC,WAAW,EAAE,SAAS,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC,CAAC;QAC/E,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.test.d.ts","sourceRoot":"","sources":["../../src/__tests__/evaluator.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const vitest_1 = require("vitest");
|
|
4
|
+
const evaluator_1 = require("../evaluator");
|
|
5
|
+
const sample = {
|
|
6
|
+
question: 'What is RAG?',
|
|
7
|
+
answer: 'RAG combines retrieval with generation to produce grounded answers.',
|
|
8
|
+
contexts: [
|
|
9
|
+
'Retrieval-augmented generation combines retrieval with generation.',
|
|
10
|
+
'RAG pipelines improve answer accuracy by grounding in retrieved documents.',
|
|
11
|
+
],
|
|
12
|
+
groundTruth: 'RAG retrieves documents and uses them to generate accurate answers.',
|
|
13
|
+
};
|
|
14
|
+
(0, vitest_1.describe)('createEvaluator()', () => {
|
|
15
|
+
(0, vitest_1.it)('returns an Evaluator with evaluate, evaluateBatch, and config', () => {
|
|
16
|
+
const evaluator = (0, evaluator_1.createEvaluator)({ mode: 'heuristic' });
|
|
17
|
+
(0, vitest_1.expect)(typeof evaluator.evaluate).toBe('function');
|
|
18
|
+
(0, vitest_1.expect)(typeof evaluator.evaluateBatch).toBe('function');
|
|
19
|
+
(0, vitest_1.expect)(evaluator.config).toBeDefined();
|
|
20
|
+
(0, vitest_1.expect)(evaluator.config.mode).toBe('heuristic');
|
|
21
|
+
});
|
|
22
|
+
(0, vitest_1.it)('evaluate() returns a valid EvalResult', async () => {
|
|
23
|
+
const evaluator = (0, evaluator_1.createEvaluator)({ mode: 'heuristic' });
|
|
24
|
+
const result = await evaluator.evaluate(sample);
|
|
25
|
+
(0, vitest_1.expect)(result.sample).toBe(sample);
|
|
26
|
+
(0, vitest_1.expect)(typeof result.compositeScore).toBe('number');
|
|
27
|
+
(0, vitest_1.expect)(typeof result.passed).toBe('boolean');
|
|
28
|
+
});
|
|
29
|
+
(0, vitest_1.it)('uses default metrics from config when none specified per-call', async () => {
|
|
30
|
+
const config = {
|
|
31
|
+
mode: 'heuristic',
|
|
32
|
+
metrics: ['faithfulness', 'answerRelevance'],
|
|
33
|
+
};
|
|
34
|
+
const evaluator = (0, evaluator_1.createEvaluator)(config);
|
|
35
|
+
const result = await evaluator.evaluate(sample);
|
|
36
|
+
(0, vitest_1.expect)(result.metrics['faithfulness']).toBeDefined();
|
|
37
|
+
(0, vitest_1.expect)(result.metrics['answerRelevance']).toBeDefined();
|
|
38
|
+
// contextPrecision not in default metrics
|
|
39
|
+
(0, vitest_1.expect)(result.metrics['contextPrecision']).toBeUndefined();
|
|
40
|
+
});
|
|
41
|
+
(0, vitest_1.it)('per-call metrics override config defaults', async () => {
|
|
42
|
+
const config = {
|
|
43
|
+
mode: 'heuristic',
|
|
44
|
+
metrics: ['faithfulness'],
|
|
45
|
+
};
|
|
46
|
+
const evaluator = (0, evaluator_1.createEvaluator)(config);
|
|
47
|
+
const result = await evaluator.evaluate(sample, ['answerRelevance', 'contextPrecision']);
|
|
48
|
+
(0, vitest_1.expect)(result.metrics['answerRelevance']).toBeDefined();
|
|
49
|
+
(0, vitest_1.expect)(result.metrics['contextPrecision']).toBeDefined();
|
|
50
|
+
(0, vitest_1.expect)(result.metrics['faithfulness']).toBeUndefined();
|
|
51
|
+
});
|
|
52
|
+
(0, vitest_1.it)('per-call threshold options override config thresholds', async () => {
|
|
53
|
+
const evaluator = (0, evaluator_1.createEvaluator)({
|
|
54
|
+
mode: 'heuristic',
|
|
55
|
+
thresholds: { faithfulness: 0.5 },
|
|
56
|
+
});
|
|
57
|
+
const result = await evaluator.evaluate(sample, ['faithfulness'], {
|
|
58
|
+
thresholds: { faithfulness: 0.95 },
|
|
59
|
+
});
|
|
60
|
+
(0, vitest_1.expect)(result.metrics['faithfulness'].threshold).toBe(0.95);
|
|
61
|
+
});
|
|
62
|
+
(0, vitest_1.it)('evaluateBatch() returns a valid BatchEvalResult', async () => {
|
|
63
|
+
const evaluator = (0, evaluator_1.createEvaluator)({ mode: 'heuristic' });
|
|
64
|
+
const samples = [
|
|
65
|
+
sample,
|
|
66
|
+
{
|
|
67
|
+
question: 'What is ML?',
|
|
68
|
+
answer: 'ML trains models on data.',
|
|
69
|
+
contexts: ['Machine learning trains models on data.'],
|
|
70
|
+
},
|
|
71
|
+
];
|
|
72
|
+
const result = await evaluator.evaluateBatch(samples);
|
|
73
|
+
(0, vitest_1.expect)(result.results).toHaveLength(2);
|
|
74
|
+
(0, vitest_1.expect)(typeof result.compositeAggregate.mean).toBe('number');
|
|
75
|
+
(0, vitest_1.expect)(typeof result.passed).toBe('boolean');
|
|
76
|
+
(0, vitest_1.expect)(result.evaluatedAt).toMatch(/^\d{4}-\d{2}-\d{2}T/);
|
|
77
|
+
});
|
|
78
|
+
(0, vitest_1.it)('compositeThreshold from config affects pass/fail', async () => {
|
|
79
|
+
const evaluatorHigh = (0, evaluator_1.createEvaluator)({ mode: 'heuristic', compositeThreshold: 0.99 });
|
|
80
|
+
const evaluatorLow = (0, evaluator_1.createEvaluator)({ mode: 'heuristic', compositeThreshold: 0.01 });
|
|
81
|
+
const resultHigh = await evaluatorHigh.evaluate(sample, ['faithfulness', 'answerRelevance']);
|
|
82
|
+
const resultLow = await evaluatorLow.evaluate(sample, ['faithfulness', 'answerRelevance']);
|
|
83
|
+
// Low threshold should be easier to pass
|
|
84
|
+
if (!resultLow.passed) {
|
|
85
|
+
// This can only fail if both metric scores are 0, which is unlikely for this sample
|
|
86
|
+
}
|
|
87
|
+
// At least verify shapes are correct
|
|
88
|
+
(0, vitest_1.expect)(typeof resultHigh.passed).toBe('boolean');
|
|
89
|
+
(0, vitest_1.expect)(typeof resultLow.passed).toBe('boolean');
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
//# sourceMappingURL=evaluator.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.test.js","sourceRoot":"","sources":["../../src/__tests__/evaluator.test.ts"],"names":[],"mappings":";;AAAA,mCAA8C;AAC9C,4CAA+C;AAG/C,MAAM,MAAM,GAAe;IACzB,QAAQ,EAAE,cAAc;IACxB,MAAM,EAAE,qEAAqE;IAC7E,QAAQ,EAAE;QACR,oEAAoE;QACpE,4EAA4E;KAC7E;IACD,WAAW,EAAE,qEAAqE;CACnF,CAAC;AAEF,IAAA,iBAAQ,EAAC,mBAAmB,EAAE,GAAG,EAAE;IACjC,IAAA,WAAE,EAAC,+DAA+D,EAAE,GAAG,EAAE;QACvE,MAAM,SAAS,GAAG,IAAA,2BAAe,EAAC,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC;QACzD,IAAA,eAAM,EAAC,OAAO,SAAS,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACnD,IAAA,eAAM,EAAC,OAAO,SAAS,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACxD,IAAA,eAAM,EAAC,SAAS,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;QACvC,IAAA,eAAM,EAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;QACrD,MAAM,SAAS,GAAG,IAAA,2BAAe,EAAC,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC;QACzD,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAChD,IAAA,eAAM,EAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACnC,IAAA,eAAM,EAAC,OAAO,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACpD,IAAA,eAAM,EAAC,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,+DAA+D,EAAE,KAAK,IAAI,EAAE;QAC7E,MAAM,MAAM,GAAoB;YAC9B,IAAI,EAAE,WAAW;YACjB,OAAO,EAAE,CAAC,cAAc,EAAE,iBAAiB,CAAC;SAC7C,CAAC;QACF,MAAM,SAAS,GAAG,IAAA,2BAAe,EAAC,MAAM,CAAC,CAAC;QAC1C,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAChD,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;QACrD,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,iBAAiB,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;QACxD,0CAA0C;QAC1C,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC,CAAC,aAAa,EAAE,CAAC;IAC7D,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;QACzD,MAAM,MAAM,GAAoB;YAC9B,IAAI,EAAE,WAAW;YACjB,OAAO,EAAE,CAAC,cAAc,CAAC;SAC1B,CAAC;QACF,MAAM,SAAS,GAAG,IAAA,2BAAe,EAAC,MAAM,CAAC,CAAC;QAC1C,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,iBAAiB,EAAE,kBAAkB,CAAC,CAAC,CAAC;QACzF,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,iBAAiB,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;QACxD,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;QACzD,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,aAAa,EAAE,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,uDAAuD,EAAE,KAAK,IAAI,EAAE;QACrE,MAAM,SAAS,GAAG,IAAA,2BAAe,EAAC;YAChC,IAAI,EAAE,WAAW;YACjB,UAAU,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE;SAClC,CAAC,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,cAAc,CAAC,EAAE;YAChE,UAAU,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE;SACnC,CAAC,CAAC;QACH,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC9D,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,iDAAiD,EAAE,KAAK,IAAI,EAAE;QAC/D,MAAM,SAAS,GAAG,IAAA,2BAAe,EAAC,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC;QACzD,MAAM,OAAO,GAAiB;YAC5B,MAAM;YACN;gBACE,QAAQ,EAAE,aAAa;gBACvB,MAAM,EAAE,2BAA2B;gBACnC,QAAQ,EAAE,CAAC,yCAAyC,CAAC;aACtD;SACF,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QACtD,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACvC,IAAA,eAAM,EAAC,OAAO,MAAM,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC7D,IAAA,eAAM,EAAC,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC7C,IAAA,eAAM,EAAC,MAAM,CAAC,WAAW,CAAC,CAAC,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAC5D,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;QAChE,MAAM,aAAa,GAAG,IAAA,2BAAe,EAAC,EAAE,IAAI,EAAE,WAAW,EAAE,kBAAkB,EAAE,IAAI,EAAE,CAAC,CAAC;QACvF,MAAM,YAAY,GAAG,IAAA,2BAAe,EAAC,EAAE,IAAI,EAAE,WAAW,EAAE,kBAAkB,EAAE,IAAI,EAAE,CAAC,CAAC;QAEtF,MAAM,UAAU,GAAG,MAAM,aAAa,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,cAAc,EAAE,iBAAiB,CAAC,CAAC,CAAC;QAC7F,MAAM,SAAS,GAAG,MAAM,YAAY,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,cAAc,EAAE,iBAAiB,CAAC,CAAC,CAAC;QAE3F,yCAAyC;QACzC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC;YACtB,oFAAoF;QACtF,CAAC;QACD,qCAAqC;QACrC,IAAA,eAAM,EAAC,OAAO,UAAU,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACjD,IAAA,eAAM,EAAC,OAAO,SAAS,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ngrams.test.d.ts","sourceRoot":"","sources":["../../../src/__tests__/heuristic/ngrams.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const vitest_1 = require("vitest");
|
|
4
|
+
const ngrams_1 = require("../../heuristic/ngrams");
|
|
5
|
+
(0, vitest_1.describe)('tokenize', () => {
|
|
6
|
+
(0, vitest_1.it)('lowercases and splits on whitespace and punctuation', () => {
|
|
7
|
+
const tokens = (0, ngrams_1.tokenize)('Hello, World! This is a test.');
|
|
8
|
+
(0, vitest_1.expect)(tokens).toContain('hello');
|
|
9
|
+
(0, vitest_1.expect)(tokens).toContain('world');
|
|
10
|
+
(0, vitest_1.expect)(tokens).toContain('this');
|
|
11
|
+
(0, vitest_1.expect)(tokens).toContain('is');
|
|
12
|
+
(0, vitest_1.expect)(tokens).toContain('a');
|
|
13
|
+
(0, vitest_1.expect)(tokens).toContain('test');
|
|
14
|
+
// punctuation stripped
|
|
15
|
+
(0, vitest_1.expect)(tokens).not.toContain(',');
|
|
16
|
+
(0, vitest_1.expect)(tokens).not.toContain('!');
|
|
17
|
+
(0, vitest_1.expect)(tokens).not.toContain('.');
|
|
18
|
+
});
|
|
19
|
+
(0, vitest_1.it)('filters empty tokens', () => {
|
|
20
|
+
const tokens = (0, ngrams_1.tokenize)(' hello world ');
|
|
21
|
+
(0, vitest_1.expect)(tokens.every(t => t.length > 0)).toBe(true);
|
|
22
|
+
});
|
|
23
|
+
(0, vitest_1.it)('returns empty array for empty string', () => {
|
|
24
|
+
(0, vitest_1.expect)((0, ngrams_1.tokenize)('')).toEqual([]);
|
|
25
|
+
});
|
|
26
|
+
});
|
|
27
|
+
(0, vitest_1.describe)('getNgrams', () => {
|
|
28
|
+
(0, vitest_1.it)('returns unigrams for n=1', () => {
|
|
29
|
+
const tokens = ['a', 'b', 'c'];
|
|
30
|
+
(0, vitest_1.expect)((0, ngrams_1.getNgrams)(tokens, 1)).toEqual(['a', 'b', 'c']);
|
|
31
|
+
});
|
|
32
|
+
(0, vitest_1.it)('returns bigrams for n=2', () => {
|
|
33
|
+
const tokens = ['a', 'b', 'c'];
|
|
34
|
+
(0, vitest_1.expect)((0, ngrams_1.getNgrams)(tokens, 2)).toEqual(['a b', 'b c']);
|
|
35
|
+
});
|
|
36
|
+
(0, vitest_1.it)('returns empty for tokens shorter than n', () => {
|
|
37
|
+
(0, vitest_1.expect)((0, ngrams_1.getNgrams)(['a'], 2)).toEqual([]);
|
|
38
|
+
});
|
|
39
|
+
(0, vitest_1.it)('returns empty for empty tokens', () => {
|
|
40
|
+
(0, vitest_1.expect)((0, ngrams_1.getNgrams)([], 1)).toEqual([]);
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
(0, vitest_1.describe)('ngramOverlap', () => {
|
|
44
|
+
(0, vitest_1.it)('returns 1.0 for identical strings', () => {
|
|
45
|
+
(0, vitest_1.expect)((0, ngrams_1.ngramOverlap)('the cat sat on the mat', 'the cat sat on the mat')).toBeCloseTo(1.0);
|
|
46
|
+
});
|
|
47
|
+
(0, vitest_1.it)('returns 0.0 for completely disjoint strings', () => {
|
|
48
|
+
(0, vitest_1.expect)((0, ngrams_1.ngramOverlap)('alpha beta gamma', 'delta epsilon zeta')).toBeCloseTo(0.0);
|
|
49
|
+
});
|
|
50
|
+
(0, vitest_1.it)('returns approximately 0.5 for 50% token overlap', () => {
|
|
51
|
+
// "a b" vs "a c" — intersection={a}, union={a,b,c} → 1/3
|
|
52
|
+
// "a b c" vs "a b d" — intersection={a,b}, union={a,b,c,d} → 2/4 = 0.5
|
|
53
|
+
const overlap = (0, ngrams_1.ngramOverlap)('a b c', 'a b d');
|
|
54
|
+
(0, vitest_1.expect)(overlap).toBeCloseTo(0.5, 1);
|
|
55
|
+
});
|
|
56
|
+
(0, vitest_1.it)('returns 1.0 for both empty strings', () => {
|
|
57
|
+
(0, vitest_1.expect)((0, ngrams_1.ngramOverlap)('', '')).toBe(1.0);
|
|
58
|
+
});
|
|
59
|
+
(0, vitest_1.it)('returns 0.0 when one string is empty', () => {
|
|
60
|
+
(0, vitest_1.expect)((0, ngrams_1.ngramOverlap)('hello world', '')).toBe(0.0);
|
|
61
|
+
(0, vitest_1.expect)((0, ngrams_1.ngramOverlap)('', 'hello world')).toBe(0.0);
|
|
62
|
+
});
|
|
63
|
+
(0, vitest_1.it)('supports bigram overlap', () => {
|
|
64
|
+
const overlap = (0, ngrams_1.ngramOverlap)('the quick brown fox', 'the quick red fox', 2);
|
|
65
|
+
// bigrams a: {the quick, quick brown, brown fox}
|
|
66
|
+
// bigrams b: {the quick, quick red, red fox}
|
|
67
|
+
// intersection: {the quick} = 1, union = 5
|
|
68
|
+
(0, vitest_1.expect)(overlap).toBeCloseTo(1 / 5, 3);
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
(0, vitest_1.describe)('weightedNgramOverlap', () => {
|
|
72
|
+
(0, vitest_1.it)('returns 1.0 for identical strings', () => {
|
|
73
|
+
(0, vitest_1.expect)((0, ngrams_1.weightedNgramOverlap)('hello world test', 'hello world test')).toBeCloseTo(1.0);
|
|
74
|
+
});
|
|
75
|
+
(0, vitest_1.it)('returns 0.0 for completely disjoint strings', () => {
|
|
76
|
+
(0, vitest_1.expect)((0, ngrams_1.weightedNgramOverlap)('alpha beta', 'gamma delta')).toBeCloseTo(0.0);
|
|
77
|
+
});
|
|
78
|
+
(0, vitest_1.it)('returns value between 0 and 1 for partial overlap', () => {
|
|
79
|
+
const score = (0, ngrams_1.weightedNgramOverlap)('the cat sat', 'the cat ran');
|
|
80
|
+
(0, vitest_1.expect)(score).toBeGreaterThan(0);
|
|
81
|
+
(0, vitest_1.expect)(score).toBeLessThan(1);
|
|
82
|
+
});
|
|
83
|
+
(0, vitest_1.it)('uses custom ngramSizes and weights', () => {
|
|
84
|
+
const score = (0, ngrams_1.weightedNgramOverlap)('a b c', 'a b d', [1], [1.0]);
|
|
85
|
+
// unigram: intersection={a,b}, union={a,b,c,d} = 2/4 = 0.5
|
|
86
|
+
(0, vitest_1.expect)(score).toBeCloseTo(0.5, 1);
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
//# sourceMappingURL=ngrams.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ngrams.test.js","sourceRoot":"","sources":["../../../src/__tests__/heuristic/ngrams.test.ts"],"names":[],"mappings":";;AAAA,mCAA8C;AAC9C,mDAAiG;AAEjG,IAAA,iBAAQ,EAAC,UAAU,EAAE,GAAG,EAAE;IACxB,IAAA,WAAE,EAAC,qDAAqD,EAAE,GAAG,EAAE;QAC7D,MAAM,MAAM,GAAG,IAAA,iBAAQ,EAAC,+BAA+B,CAAC,CAAC;QACzD,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACjC,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAC/B,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QAC9B,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACjC,uBAAuB;QACvB,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QAClC,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QAClC,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,sBAAsB,EAAE,GAAG,EAAE;QAC9B,MAAM,MAAM,GAAG,IAAA,iBAAQ,EAAC,mBAAmB,CAAC,CAAC;QAC7C,IAAA,eAAM,EAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,IAAA,eAAM,EAAC,IAAA,iBAAQ,EAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,IAAA,iBAAQ,EAAC,WAAW,EAAE,GAAG,EAAE;IACzB,IAAA,WAAE,EAAC,0BAA0B,EAAE,GAAG,EAAE;QAClC,MAAM,MAAM,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;QAC/B,IAAA,eAAM,EAAC,IAAA,kBAAS,EAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;IACxD,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,MAAM,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;QAC/B,IAAA,eAAM,EAAC,IAAA,kBAAS,EAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,IAAA,eAAM,EAAC,IAAA,kBAAS,EAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,gCAAgC,EAAE,GAAG,EAAE;QACxC,IAAA,eAAM,EAAC,IAAA,kBAAS,EAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,IAAA,iBAAQ,EAAC,cAAc,EAAE,GAAG,EAAE;IAC5B,IAAA,WAAE,EAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,IAAA,eAAM,EAAC,IAAA,qBAAY,EAAC,wBAAwB,EAAE,wBAAwB,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAC5F,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,6CAA6C,EAAE,GAAG,EAAE;QACrD,IAAA,eAAM,EAAC,IAAA,qBAAY,EAAC,kBAAkB,EAAE,oBAAoB,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAClF,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,iDAAiD,EAAE,GAAG,EAAE;QACzD,yDAAyD;QACzD,uEAAuE;QACvE,MAAM,OAAO,GAAG,IAAA,qBAAY,EAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QAC/C,IAAA,eAAM,EAAC,OAAO,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;IACtC,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,oCAAoC,EAAE,GAAG,EAAE;QAC5C,IAAA,eAAM,EAAC,IAAA,qBAAY,EAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,IAAA,eAAM,EAAC,IAAA,qBAAY,EAAC,aAAa,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAClD,IAAA,eAAM,EAAC,IAAA,qBAAY,EAAC,EAAE,EAAE,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,OAAO,GAAG,IAAA,qBAAY,EAAC,qBAAqB,EAAE,mBAAmB,EAAE,CAAC,CAAC,CAAC;QAC5E,iDAAiD;QACjD,6CAA6C;QAC7C,2CAA2C;QAC3C,IAAA,eAAM,EAAC,OAAO,CAAC,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,IAAA,iBAAQ,EAAC,sBAAsB,EAAE,GAAG,EAAE;IACpC,IAAA,WAAE,EAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,IAAA,eAAM,EAAC,IAAA,6BAAoB,EAAC,kBAAkB,EAAE,kBAAkB,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACxF,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,6CAA6C,EAAE,GAAG,EAAE;QACrD,IAAA,eAAM,EAAC,IAAA,6BAAoB,EAAC,YAAY,EAAE,aAAa,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAC7E,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,mDAAmD,EAAE,GAAG,EAAE;QAC3D,MAAM,KAAK,GAAG,IAAA,6BAAoB,EAAC,aAAa,EAAE,aAAa,CAAC,CAAC;QACjE,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QACjC,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,oCAAoC,EAAE,GAAG,EAAE;QAC5C,MAAM,KAAK,GAAG,IAAA,6BAAoB,EAAC,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;QACjE,2DAA2D;QAC3D,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tfidf.test.d.ts","sourceRoot":"","sources":["../../../src/__tests__/heuristic/tfidf.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const vitest_1 = require("vitest");
|
|
4
|
+
const tfidf_1 = require("../../heuristic/tfidf");
|
|
5
|
+
(0, vitest_1.describe)('tfidfSimilarity', () => {
|
|
6
|
+
(0, vitest_1.it)('returns a high score for identical texts', () => {
|
|
7
|
+
const text = 'the quick brown fox jumps over the lazy dog';
|
|
8
|
+
const score = (0, tfidf_1.tfidfSimilarity)(text, text);
|
|
9
|
+
(0, vitest_1.expect)(score).toBeGreaterThan(0.9);
|
|
10
|
+
});
|
|
11
|
+
(0, vitest_1.it)('returns a low score for unrelated texts', () => {
|
|
12
|
+
const score = (0, tfidf_1.tfidfSimilarity)('quantum physics relativity wave particle', 'cooking recipes pasta sauce garlic');
|
|
13
|
+
(0, vitest_1.expect)(score).toBeLessThan(0.2);
|
|
14
|
+
});
|
|
15
|
+
(0, vitest_1.it)('returns a value in [0,1]', () => {
|
|
16
|
+
const score = (0, tfidf_1.tfidfSimilarity)('hello world', 'world peace');
|
|
17
|
+
(0, vitest_1.expect)(score).toBeGreaterThanOrEqual(0);
|
|
18
|
+
(0, vitest_1.expect)(score).toBeLessThanOrEqual(1);
|
|
19
|
+
});
|
|
20
|
+
(0, vitest_1.it)('returns 0 for empty query', () => {
|
|
21
|
+
const score = (0, tfidf_1.tfidfSimilarity)('', 'some document text');
|
|
22
|
+
(0, vitest_1.expect)(score).toBe(0);
|
|
23
|
+
});
|
|
24
|
+
(0, vitest_1.it)('returns higher similarity for related texts than unrelated', () => {
|
|
25
|
+
const query = 'retrieval augmented generation RAG pipeline';
|
|
26
|
+
const related = 'RAG combines retrieval with generation for grounded answers';
|
|
27
|
+
const unrelated = 'the weather is sunny today';
|
|
28
|
+
(0, vitest_1.expect)((0, tfidf_1.tfidfSimilarity)(query, related)).toBeGreaterThan((0, tfidf_1.tfidfSimilarity)(query, unrelated));
|
|
29
|
+
});
|
|
30
|
+
});
|
|
31
|
+
(0, vitest_1.describe)('cosineSimilarity', () => {
|
|
32
|
+
(0, vitest_1.it)('returns 1.0 for identical vectors', () => {
|
|
33
|
+
const vec = new Map([['a', 0.5], ['b', 0.3]]);
|
|
34
|
+
(0, vitest_1.expect)((0, tfidf_1.cosineSimilarity)(vec, vec)).toBeCloseTo(1.0);
|
|
35
|
+
});
|
|
36
|
+
(0, vitest_1.it)('returns 0.0 for orthogonal vectors', () => {
|
|
37
|
+
const a = new Map([['x', 1.0]]);
|
|
38
|
+
const b = new Map([['y', 1.0]]);
|
|
39
|
+
(0, vitest_1.expect)((0, tfidf_1.cosineSimilarity)(a, b)).toBe(0);
|
|
40
|
+
});
|
|
41
|
+
(0, vitest_1.it)('returns 0.0 for empty vectors', () => {
|
|
42
|
+
(0, vitest_1.expect)((0, tfidf_1.cosineSimilarity)(new Map(), new Map())).toBe(0);
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
(0, vitest_1.describe)('buildTfIdfVectors', () => {
|
|
46
|
+
(0, vitest_1.it)('builds query and document vectors with correct structure', () => {
|
|
47
|
+
const { queryVec, docVecs } = (0, tfidf_1.buildTfIdfVectors)('hello world', ['hello there', 'another doc']);
|
|
48
|
+
(0, vitest_1.expect)(queryVec instanceof Map).toBe(true);
|
|
49
|
+
(0, vitest_1.expect)(docVecs).toHaveLength(2);
|
|
50
|
+
(0, vitest_1.expect)(docVecs[0] instanceof Map).toBe(true);
|
|
51
|
+
});
|
|
52
|
+
(0, vitest_1.it)('assigns non-zero weight to terms in query', () => {
|
|
53
|
+
const { queryVec } = (0, tfidf_1.buildTfIdfVectors)('hello world', ['unrelated content']);
|
|
54
|
+
(0, vitest_1.expect)(queryVec.size).toBeGreaterThan(0);
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
//# sourceMappingURL=tfidf.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tfidf.test.js","sourceRoot":"","sources":["../../../src/__tests__/heuristic/tfidf.test.ts"],"names":[],"mappings":";;AAAA,mCAA8C;AAC9C,iDAA6F;AAE7F,IAAA,iBAAQ,EAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,IAAA,WAAE,EAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,IAAI,GAAG,6CAA6C,CAAC;QAC3D,MAAM,KAAK,GAAG,IAAA,uBAAe,EAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QAC1C,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,MAAM,KAAK,GAAG,IAAA,uBAAe,EAC3B,0CAA0C,EAC1C,oCAAoC,CACrC,CAAC;QACF,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IAClC,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,0BAA0B,EAAE,GAAG,EAAE;QAClC,MAAM,KAAK,GAAG,IAAA,uBAAe,EAAC,aAAa,EAAE,aAAa,CAAC,CAAC;QAC5D,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QACxC,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,2BAA2B,EAAE,GAAG,EAAE;QACnC,MAAM,KAAK,GAAG,IAAA,uBAAe,EAAC,EAAE,EAAE,oBAAoB,CAAC,CAAC;QACxD,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACxB,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,4DAA4D,EAAE,GAAG,EAAE;QACpE,MAAM,KAAK,GAAG,6CAA6C,CAAC;QAC5D,MAAM,OAAO,GAAG,6DAA6D,CAAC;QAC9E,MAAM,SAAS,GAAG,4BAA4B,CAAC;QAC/C,IAAA,eAAM,EAAC,IAAA,uBAAe,EAAC,KAAK,EAAE,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,IAAA,uBAAe,EAAC,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC;IAC7F,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,IAAA,iBAAQ,EAAC,kBAAkB,EAAE,GAAG,EAAE;IAChC,IAAA,WAAE,EAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC;QAC9C,IAAA,eAAM,EAAC,IAAA,wBAAgB,EAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACtD,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,oCAAoC,EAAE,GAAG,EAAE;QAC5C,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC;QAChC,IAAA,eAAM,EAAC,IAAA,wBAAgB,EAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,+BAA+B,EAAE,GAAG,EAAE;QACvC,IAAA,eAAM,EAAC,IAAA,wBAAgB,EAAC,IAAI,GAAG,EAAE,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,IAAA,iBAAQ,EAAC,mBAAmB,EAAE,GAAG,EAAE;IACjC,IAAA,WAAE,EAAC,0DAA0D,EAAE,GAAG,EAAE;QAClE,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,IAAA,yBAAiB,EAAC,aAAa,EAAE,CAAC,aAAa,EAAE,aAAa,CAAC,CAAC,CAAC;QAC/F,IAAA,eAAM,EAAC,QAAQ,YAAY,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3C,IAAA,eAAM,EAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAChC,IAAA,eAAM,EAAC,OAAO,CAAC,CAAC,CAAC,YAAY,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAA,yBAAiB,EAAC,aAAa,EAAE,CAAC,mBAAmB,CAAC,CAAC,CAAC;QAC7E,IAAA,eAAM,EAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|