voyageai-cli 1.18.0 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,300 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const { getDefaultModel, DEFAULT_RERANK_MODEL } = require('../lib/catalog');
5
+ const { generateEmbeddings, apiRequest } = require('../lib/api');
6
+ const { getMongoCollection } = require('../lib/mongo');
7
+ const { loadProject } = require('../lib/project');
8
+ const { computeMetrics, aggregateMetrics } = require('../lib/metrics');
9
+ const ui = require('../lib/ui');
10
+
11
+ /**
12
+ * Load a test set from a JSONL file.
13
+ * Each line: { "query": "...", "relevant": ["id1", "id2"] }
14
+ * Or: { "query": "...", "relevant_texts": ["text1", "text2"] }
15
+ * @param {string} filePath
16
+ * @returns {Array<{query: string, relevant: string[], relevantTexts?: string[]}>}
17
+ */
18
+ function loadTestSet(filePath) {
19
+ const raw = fs.readFileSync(filePath, 'utf-8');
20
+ const lines = raw.split('\n').filter(l => l.trim().length > 0);
21
+ return lines.map((line, i) => {
22
+ const item = JSON.parse(line);
23
+ if (!item.query) throw new Error(`Line ${i + 1}: missing "query" field`);
24
+ if (!item.relevant && !item.relevant_texts) {
25
+ throw new Error(`Line ${i + 1}: need "relevant" (doc IDs) or "relevant_texts" (text matches)`);
26
+ }
27
+ return {
28
+ query: item.query,
29
+ relevant: item.relevant || [],
30
+ relevantTexts: item.relevant_texts || [],
31
+ };
32
+ });
33
+ }
34
+
35
+ /**
36
+ * Register the eval command on a Commander program.
37
+ * @param {import('commander').Command} program
38
+ */
39
+ function registerEval(program) {
40
+ program
41
+ .command('eval')
42
+ .description('Evaluate retrieval quality — measure MRR, NDCG, recall on your data')
43
+ .requiredOption('--test-set <path>', 'JSONL file with queries and expected results')
44
+ .option('--db <database>', 'Database name')
45
+ .option('--collection <name>', 'Collection name')
46
+ .option('--index <name>', 'Vector search index name')
47
+ .option('--field <name>', 'Embedding field name')
48
+ .option('-m, --model <model>', 'Embedding model for queries')
49
+ .option('-d, --dimensions <n>', 'Output dimensions', (v) => parseInt(v, 10))
50
+ .option('-l, --limit <n>', 'Vector search candidates per query', (v) => parseInt(v, 10), 20)
51
+ .option('-k, --k-values <values>', 'Comma-separated K values for @K metrics', '1,3,5,10')
52
+ .option('--rerank', 'Enable reranking')
53
+ .option('--no-rerank', 'Skip reranking')
54
+ .option('--rerank-model <model>', 'Reranking model')
55
+ .option('--text-field <name>', 'Document text field', 'text')
56
+ .option('--id-field <name>', 'Document ID field for matching (default: _id)', '_id')
57
+ .option('--compare <configs>', 'Compare configs: "model1,model2" or "rerank,no-rerank"')
58
+ .option('--json', 'Machine-readable JSON output')
59
+ .option('-q, --quiet', 'Suppress non-essential output')
60
+ .action(async (opts) => {
61
+ let client;
62
+ try {
63
+ // Merge project config
64
+ const { config: proj } = loadProject();
65
+ const db = opts.db || proj.db;
66
+ const collection = opts.collection || proj.collection;
67
+ const index = opts.index || proj.index || 'vector_index';
68
+ const field = opts.field || proj.field || 'embedding';
69
+ const model = opts.model || proj.model || getDefaultModel();
70
+ const rerankModel = opts.rerankModel || DEFAULT_RERANK_MODEL;
71
+ const textField = opts.textField || 'text';
72
+ const idField = opts.idField || '_id';
73
+ const doRerank = opts.rerank !== false;
74
+ const dimensions = opts.dimensions || proj.dimensions;
75
+ const kValues = opts.kValues.split(',').map(v => parseInt(v.trim(), 10)).filter(v => !isNaN(v));
76
+
77
+ if (!db || !collection) {
78
+ console.error(ui.error('Database and collection required. Use --db/--collection or "vai init".'));
79
+ process.exit(1);
80
+ }
81
+
82
+ // Load test set
83
+ let testSet;
84
+ try {
85
+ testSet = loadTestSet(opts.testSet);
86
+ } catch (err) {
87
+ console.error(ui.error(`Failed to load test set: ${err.message}`));
88
+ process.exit(1);
89
+ }
90
+
91
+ if (testSet.length === 0) {
92
+ console.error(ui.error('Test set is empty.'));
93
+ process.exit(1);
94
+ }
95
+
96
+ const verbose = !opts.json && !opts.quiet;
97
+
98
+ if (verbose) {
99
+ console.log('');
100
+ console.log(ui.bold('📊 Retrieval Evaluation'));
101
+ console.log(ui.dim(` Test set: ${testSet.length} queries`));
102
+ console.log(ui.dim(` Collection: ${db}.${collection}`));
103
+ console.log(ui.dim(` Model: ${model}${doRerank ? ` + ${rerankModel}` : ''}`));
104
+ console.log(ui.dim(` K values: ${kValues.join(', ')}`));
105
+ console.log('');
106
+ }
107
+
108
+ // Connect to MongoDB
109
+ const { client: c, collection: coll } = await getMongoCollection(db, collection);
110
+ client = c;
111
+
112
+ // Run evaluation
113
+ const perQueryResults = [];
114
+ let totalEmbedTokens = 0;
115
+ let totalRerankTokens = 0;
116
+
117
+ for (let qi = 0; qi < testSet.length; qi++) {
118
+ const testCase = testSet[qi];
119
+
120
+ if (verbose) {
121
+ process.stderr.write(`\r Evaluating query ${qi + 1}/${testSet.length}...`);
122
+ }
123
+
124
+ // Embed query
125
+ const embedOpts = { model, inputType: 'query' };
126
+ if (dimensions) embedOpts.dimensions = dimensions;
127
+ const embedResult = await generateEmbeddings([testCase.query], embedOpts);
128
+ const queryVector = embedResult.data[0].embedding;
129
+ totalEmbedTokens += embedResult.usage?.total_tokens || 0;
130
+
131
+ // Vector search
132
+ const numCandidates = Math.min(opts.limit * 15, 10000);
133
+ const pipeline = [
134
+ {
135
+ $vectorSearch: {
136
+ index,
137
+ path: field,
138
+ queryVector,
139
+ numCandidates,
140
+ limit: opts.limit,
141
+ },
142
+ },
143
+ { $addFields: { _vsScore: { $meta: 'vectorSearchScore' } } },
144
+ ];
145
+
146
+ let searchResults = await coll.aggregate(pipeline).toArray();
147
+
148
+ // Rerank if enabled
149
+ if (doRerank && searchResults.length > 1) {
150
+ const documents = searchResults.map(doc => {
151
+ const txt = doc[textField];
152
+ return typeof txt === 'string' ? txt : JSON.stringify(txt || doc);
153
+ });
154
+
155
+ const rerankResult = await apiRequest('/rerank', {
156
+ query: testCase.query,
157
+ documents,
158
+ model: rerankModel,
159
+ });
160
+ totalRerankTokens += rerankResult.usage?.total_tokens || 0;
161
+
162
+ // Reorder by rerank score
163
+ searchResults = (rerankResult.data || []).map(item => searchResults[item.index]);
164
+ }
165
+
166
+ // Build retrieved ID list
167
+ let retrievedIds;
168
+ if (testCase.relevant.length > 0) {
169
+ // Match by ID field
170
+ retrievedIds = searchResults.map(doc => String(doc[idField]));
171
+ } else {
172
+ // Match by text similarity (fuzzy — check if retrieved text contains expected text)
173
+ retrievedIds = searchResults.map((doc, i) => {
174
+ const docText = (doc[textField] || '').toLowerCase();
175
+ for (const expectedText of testCase.relevantTexts) {
176
+ if (docText.includes(expectedText.toLowerCase()) ||
177
+ expectedText.toLowerCase().includes(docText.substring(0, 50))) {
178
+ return `match_${i}`;
179
+ }
180
+ }
181
+ return `miss_${i}`;
182
+ });
183
+ // Remap relevant to match format
184
+ testCase.relevant = testCase.relevantTexts.map((_, i) => `match_${i}`);
185
+ }
186
+
187
+ // Compute metrics
188
+ const metrics = computeMetrics(retrievedIds, testCase.relevant, kValues);
189
+
190
+ perQueryResults.push({
191
+ query: testCase.query,
192
+ relevant: testCase.relevant,
193
+ retrieved: retrievedIds.slice(0, Math.max(...kValues)),
194
+ metrics,
195
+ hits: retrievedIds.filter(id => new Set(testCase.relevant).has(id)).length,
196
+ });
197
+ }
198
+
199
+ if (verbose) {
200
+ process.stderr.write('\r' + ' '.repeat(50) + '\r');
201
+ }
202
+
203
+ // Aggregate metrics
204
+ const allMetrics = perQueryResults.map(r => r.metrics);
205
+ const aggregated = aggregateMetrics(allMetrics);
206
+
207
+ // Find worst-performing queries
208
+ const sorted = [...perQueryResults].sort((a, b) => a.metrics.mrr - b.metrics.mrr);
209
+ const worstQueries = sorted.slice(0, Math.min(3, sorted.length));
210
+
211
+ if (opts.json) {
212
+ console.log(JSON.stringify({
213
+ config: { model, rerank: doRerank, rerankModel: doRerank ? rerankModel : null, db, collection, kValues },
214
+ summary: aggregated,
215
+ tokens: { embed: totalEmbedTokens, rerank: totalRerankTokens },
216
+ queries: perQueryResults.length,
217
+ perQuery: perQueryResults,
218
+ }, null, 2));
219
+ return;
220
+ }
221
+
222
+ // Pretty output
223
+ console.log(ui.bold('Results'));
224
+ console.log('');
225
+
226
+ // Main metrics table
227
+ const metricKeys = Object.keys(aggregated);
228
+ const maxKeyLen = Math.max(...metricKeys.map(k => k.length));
229
+
230
+ for (const key of metricKeys) {
231
+ const val = aggregated[key];
232
+ const bar = renderBar(val, 20);
233
+ const label = key.toUpperCase().padEnd(maxKeyLen + 1);
234
+ const valStr = val.toFixed(4);
235
+ const color = val >= 0.8 ? ui.green(valStr) : val >= 0.5 ? ui.cyan(valStr) : ui.yellow(valStr);
236
+ console.log(` ${label} ${bar} ${color}`);
237
+ }
238
+
239
+ console.log('');
240
+
241
+ // Highlight key metrics
242
+ const mrr = aggregated.mrr;
243
+ const recall5 = aggregated['r@5'];
244
+ const ndcg10 = aggregated['ndcg@10'];
245
+
246
+ if (mrr !== undefined) {
247
+ const grade = mrr >= 0.8 ? ui.green('Excellent') : mrr >= 0.6 ? ui.cyan('Good') : mrr >= 0.4 ? ui.yellow('Fair') : ui.red('Needs work');
248
+ console.log(ui.label('MRR', `${mrr.toFixed(4)} — ${grade}`));
249
+ }
250
+ if (recall5 !== undefined) {
251
+ console.log(ui.label('Recall@5', `${(recall5 * 100).toFixed(1)}% of relevant docs found in top 5`));
252
+ }
253
+ if (ndcg10 !== undefined) {
254
+ console.log(ui.label('NDCG@10', `${ndcg10.toFixed(4)} — ranking quality`));
255
+ }
256
+
257
+ // Worst queries
258
+ if (worstQueries.length > 0 && worstQueries[0].metrics.mrr < 1) {
259
+ console.log('');
260
+ console.log(ui.bold('Hardest queries:'));
261
+ for (const wq of worstQueries) {
262
+ const preview = wq.query.substring(0, 60) + (wq.query.length > 60 ? '...' : '');
263
+ const mrrStr = wq.metrics.mrr === 0 ? ui.red('miss') : ui.yellow(wq.metrics.mrr.toFixed(2));
264
+ console.log(` ${mrrStr} "${preview}" (${wq.hits}/${wq.relevant.length} relevant found)`);
265
+ }
266
+ }
267
+
268
+ console.log('');
269
+ console.log(ui.dim(` ${testSet.length} queries evaluated | Tokens: embed ${totalEmbedTokens}${totalRerankTokens ? `, rerank ${totalRerankTokens}` : ''}`));
270
+
271
+ // Suggestions
272
+ console.log('');
273
+ if (mrr !== undefined && mrr < 0.6) {
274
+ console.log(ui.dim(' 💡 Low MRR? Try: larger model, more candidates (--limit), or enable reranking (--rerank)'));
275
+ }
276
+ if (recall5 !== undefined && recall5 < 0.5) {
277
+ console.log(ui.dim(' 💡 Low recall? Try: increasing --limit, different chunking strategy, or review your test set'));
278
+ }
279
+ } catch (err) {
280
+ console.error(ui.error(err.message));
281
+ process.exit(1);
282
+ } finally {
283
+ if (client) await client.close();
284
+ }
285
+ });
286
+ }
287
+
288
+ /**
289
+ * Render a simple ASCII bar chart.
290
+ * @param {number} value - 0.0 to 1.0
291
+ * @param {number} width - Bar width in characters
292
+ * @returns {string}
293
+ */
294
+ function renderBar(value, width) {
295
+ const filled = Math.round(value * width);
296
+ const empty = width - filled;
297
+ return '█'.repeat(filled) + '░'.repeat(empty);
298
+ }
299
+
300
+ module.exports = { registerEval };
@@ -84,20 +84,20 @@ function registerModels(program) {
84
84
  const displayLegacy = models.filter(m => m.legacy);
85
85
 
86
86
  const formatWideRow = (m) => {
87
- const name = ui.cyan(m.name);
87
+ const label = m.unreleased ? ui.cyan(m.name) + ' ' + ui.dim('(unreleased)') : ui.cyan(m.name);
88
88
  const type = m.type.startsWith('embedding') ? ui.green(m.type) : ui.yellow(m.type);
89
89
  const price = ui.dim(m.price);
90
90
  const arch = m.architecture ? (m.architecture === 'moe' ? ui.cyan('MoE') : m.architecture) : '—';
91
91
  const space = m.sharedSpace ? ui.green('✓ ' + m.sharedSpace) : '—';
92
- return [name, type, m.context, m.dimensions, arch, space, price, m.bestFor];
92
+ return [label, type, m.context, m.dimensions, arch, space, price, m.bestFor];
93
93
  };
94
94
 
95
95
  const formatCompactRow = (m) => {
96
- const name = ui.cyan(m.name);
96
+ const label = m.unreleased ? ui.cyan(m.name) + ' ' + ui.dim('(soon)') : ui.cyan(m.name);
97
97
  const type = m.type.startsWith('embedding') ? ui.green(m.multimodal ? 'multi' : 'embed') : ui.yellow('rerank');
98
98
  const dims = compactDimensions(m.dimensions);
99
99
  const price = ui.dim(compactPrice(m.price));
100
- return [name, type, dims, price, m.shortFor || m.bestFor];
100
+ return [label, type, dims, price, m.shortFor || m.bestFor];
101
101
  };
102
102
 
103
103
  if (opts.wide) {
@@ -36,7 +36,7 @@ const MODEL_CATALOG = [
36
36
  { name: 'voyage-multimodal-3.5', type: 'embedding-multimodal', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.12/M + $0.60/B px', bestFor: 'Text + images + video', shortFor: 'Multimodal', multimodal: true },
37
37
  { name: 'rerank-2.5', type: 'reranking', context: '32K', dimensions: '—', price: '$0.05/1M tokens', pricePerMToken: 0.05, bestFor: 'Best quality reranking', shortFor: 'Best reranker' },
38
38
  { name: 'rerank-2.5-lite', type: 'reranking', context: '32K', dimensions: '—', price: '$0.02/1M tokens', pricePerMToken: 0.02, bestFor: 'Fast reranking', shortFor: 'Fast reranker' },
39
- { name: 'voyage-4-nano', type: 'embedding', context: '32K', dimensions: '512 (default), 128, 256', price: 'Open-weight (free)', pricePerMToken: 0, bestFor: 'Open-weight / edge / local', shortFor: 'Open / edge', local: true, family: 'voyage-4', architecture: 'dense', sharedSpace: 'voyage-4', huggingface: 'https://huggingface.co/voyageai/voyage-4-nano', rtebScore: null },
39
+ { name: 'voyage-4-nano', type: 'embedding', context: '32K', dimensions: '512 (default), 128, 256', price: 'Open-weight (free)', pricePerMToken: 0, bestFor: 'Open-weight / edge / local', shortFor: 'Open / edge', local: true, unreleased: true, family: 'voyage-4', architecture: 'dense', sharedSpace: 'voyage-4', huggingface: 'https://huggingface.co/voyageai/voyage-4-nano', rtebScore: null },
40
40
  // Legacy models
41
41
  { name: 'voyage-3-large', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', pricePerMToken: 0.18, bestFor: 'Previous gen quality', shortFor: 'Previous gen quality', legacy: true, rtebScore: null },
42
42
  { name: 'voyage-3.5', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.06/1M tokens', pricePerMToken: 0.06, bestFor: 'Previous gen balanced', shortFor: 'Previous gen balanced', legacy: true, rtebScore: null },
@@ -566,7 +566,7 @@ const concepts = {
566
566
  ``,
567
567
  `${pc.bold('What this means:')}`,
568
568
  ` ${pc.dim('•')} Embed documents with ${pc.cyan('voyage-4-large')} (best quality, one-time cost)`,
569
- ` ${pc.dim('•')} Query with ${pc.cyan('voyage-4-lite')} or ${pc.cyan('voyage-4-nano')} (low cost, high volume)`,
569
+ ` ${pc.dim('•')} Query with ${pc.cyan('voyage-4-lite')} (low cost) or ${pc.cyan('voyage-4-nano')} (local, HuggingFace only)`,
570
570
  ` ${pc.dim('•')} Cosine similarity works across model boundaries`,
571
571
  ` ${pc.dim('•')} Upgrade query model later ${pc.cyan('without re-vectorizing documents')}`,
572
572
  ``,
@@ -658,9 +658,9 @@ const concepts = {
658
658
  ` ${pc.dim('model = SentenceTransformer("voyageai/voyage-4-nano")')}`,
659
659
  ` ${pc.dim('embeddings = model.encode(["your text here"])')}`,
660
660
  ``,
661
- `${pc.bold('With the Voyage API:')} voyage-4-nano is also available via the standard API`,
662
- `endpoint, so you can use ${pc.cyan('vai embed --model voyage-4-nano')} for testing before`,
663
- `switching to local inference.`,
661
+ `${pc.bold('API status:')} voyage-4-nano is ${pc.yellow('not yet available')} via the Voyage API.`,
662
+ `Currently it's ${pc.cyan('local-only')} via Hugging Face. API support may come later —`,
663
+ `for now, use the Python sentence-transformers approach shown above.`,
664
664
  ``,
665
665
  `${pc.bold('Shared space advantage:')} Since nano shares the same embedding space as the`,
666
666
  `larger Voyage 4 models, you can prototype locally with nano, then seamlessly`,
@@ -671,9 +671,9 @@ const concepts = {
671
671
  'https://blog.voyageai.com/2026/01/15/voyage-4-model-family/',
672
672
  ],
673
673
  tryIt: [
674
- 'vai embed "test nano" --model voyage-4-nano',
674
+ 'vai models --wide # see nano in the catalog',
675
+ 'vai explain shared-space',
675
676
  'vai benchmark space',
676
- 'vai benchmark asymmetric --doc-model voyage-4-large --query-models voyage-4-nano',
677
677
  ],
678
678
  },
679
679
  };
@@ -0,0 +1,174 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Information retrieval metrics for evaluating search quality.
5
+ * All functions take arrays of retrieved IDs and relevant (expected) IDs.
6
+ */
7
+
8
+ /**
9
+ * Precision@K — fraction of top-K results that are relevant.
10
+ * @param {string[]} retrieved - Retrieved document IDs in rank order
11
+ * @param {Set<string>|string[]} relevant - Set of relevant document IDs
12
+ * @param {number} k
13
+ * @returns {number} 0.0 to 1.0
14
+ */
15
+ function precisionAtK(retrieved, relevant, k) {
16
+ const rel = relevant instanceof Set ? relevant : new Set(relevant);
17
+ const topK = retrieved.slice(0, k);
18
+ if (topK.length === 0) return 0;
19
+ const hits = topK.filter(id => rel.has(id)).length;
20
+ return hits / topK.length;
21
+ }
22
+
23
+ /**
24
+ * Recall@K — fraction of relevant documents found in top-K results.
25
+ * @param {string[]} retrieved
26
+ * @param {Set<string>|string[]} relevant
27
+ * @param {number} k
28
+ * @returns {number} 0.0 to 1.0
29
+ */
30
+ function recallAtK(retrieved, relevant, k) {
31
+ const rel = relevant instanceof Set ? relevant : new Set(relevant);
32
+ if (rel.size === 0) return 0;
33
+ const topK = retrieved.slice(0, k);
34
+ const hits = topK.filter(id => rel.has(id)).length;
35
+ return hits / rel.size;
36
+ }
37
+
38
+ /**
39
+ * Mean Reciprocal Rank — 1/rank of the first relevant result.
40
+ * @param {string[]} retrieved
41
+ * @param {Set<string>|string[]} relevant
42
+ * @returns {number} 0.0 to 1.0
43
+ */
44
+ function reciprocalRank(retrieved, relevant) {
45
+ const rel = relevant instanceof Set ? relevant : new Set(relevant);
46
+ for (let i = 0; i < retrieved.length; i++) {
47
+ if (rel.has(retrieved[i])) return 1 / (i + 1);
48
+ }
49
+ return 0;
50
+ }
51
+
52
+ /**
53
+ * Discounted Cumulative Gain at K.
54
+ * Binary relevance: 1 if relevant, 0 otherwise.
55
+ * @param {string[]} retrieved
56
+ * @param {Set<string>|string[]} relevant
57
+ * @param {number} k
58
+ * @returns {number}
59
+ */
60
+ function dcgAtK(retrieved, relevant, k) {
61
+ const rel = relevant instanceof Set ? relevant : new Set(relevant);
62
+ let dcg = 0;
63
+ const topK = retrieved.slice(0, k);
64
+ for (let i = 0; i < topK.length; i++) {
65
+ if (rel.has(topK[i])) {
66
+ dcg += 1 / Math.log2(i + 2); // i+2 because log2(1) = 0
67
+ }
68
+ }
69
+ return dcg;
70
+ }
71
+
72
+ /**
73
+ * Ideal DCG at K — best possible DCG given the number of relevant docs.
74
+ * @param {number} numRelevant
75
+ * @param {number} k
76
+ * @returns {number}
77
+ */
78
+ function idealDcgAtK(numRelevant, k) {
79
+ let idcg = 0;
80
+ const n = Math.min(numRelevant, k);
81
+ for (let i = 0; i < n; i++) {
82
+ idcg += 1 / Math.log2(i + 2);
83
+ }
84
+ return idcg;
85
+ }
86
+
87
+ /**
88
+ * Normalized DCG at K.
89
+ * @param {string[]} retrieved
90
+ * @param {Set<string>|string[]} relevant
91
+ * @param {number} k
92
+ * @returns {number} 0.0 to 1.0
93
+ */
94
+ function ndcgAtK(retrieved, relevant, k) {
95
+ const rel = relevant instanceof Set ? relevant : new Set(relevant);
96
+ const dcg = dcgAtK(retrieved, rel, k);
97
+ const idcg = idealDcgAtK(rel.size, k);
98
+ if (idcg === 0) return 0;
99
+ return dcg / idcg;
100
+ }
101
+
102
+ /**
103
+ * Average Precision — area under the precision-recall curve for a single query.
104
+ * @param {string[]} retrieved
105
+ * @param {Set<string>|string[]} relevant
106
+ * @returns {number} 0.0 to 1.0
107
+ */
108
+ function averagePrecision(retrieved, relevant) {
109
+ const rel = relevant instanceof Set ? relevant : new Set(relevant);
110
+ if (rel.size === 0) return 0;
111
+ let hits = 0;
112
+ let sumPrecision = 0;
113
+ for (let i = 0; i < retrieved.length; i++) {
114
+ if (rel.has(retrieved[i])) {
115
+ hits++;
116
+ sumPrecision += hits / (i + 1);
117
+ }
118
+ }
119
+ return sumPrecision / rel.size;
120
+ }
121
+
122
+ /**
123
+ * Compute all metrics for a single query.
124
+ * @param {string[]} retrieved - Retrieved doc IDs in rank order
125
+ * @param {string[]} relevant - Array of relevant doc IDs
126
+ * @param {number[]} kValues - K values for @K metrics
127
+ * @returns {object}
128
+ */
129
+ function computeMetrics(retrieved, relevant, kValues = [1, 3, 5, 10]) {
130
+ const relSet = new Set(relevant);
131
+ const result = {
132
+ mrr: reciprocalRank(retrieved, relSet),
133
+ ap: averagePrecision(retrieved, relSet),
134
+ };
135
+
136
+ for (const k of kValues) {
137
+ result[`p@${k}`] = precisionAtK(retrieved, relSet, k);
138
+ result[`r@${k}`] = recallAtK(retrieved, relSet, k);
139
+ result[`ndcg@${k}`] = ndcgAtK(retrieved, relSet, k);
140
+ }
141
+
142
+ return result;
143
+ }
144
+
145
+ /**
146
+ * Aggregate metrics across multiple queries (mean).
147
+ * @param {object[]} perQueryMetrics - Array of metric objects from computeMetrics
148
+ * @returns {object} Mean metrics
149
+ */
150
+ function aggregateMetrics(perQueryMetrics) {
151
+ if (perQueryMetrics.length === 0) return {};
152
+
153
+ const keys = Object.keys(perQueryMetrics[0]);
154
+ const agg = {};
155
+
156
+ for (const key of keys) {
157
+ const values = perQueryMetrics.map(m => m[key]).filter(v => v !== undefined);
158
+ agg[key] = values.reduce((s, v) => s + v, 0) / values.length;
159
+ }
160
+
161
+ return agg;
162
+ }
163
+
164
+ module.exports = {
165
+ precisionAtK,
166
+ recallAtK,
167
+ reciprocalRank,
168
+ ndcgAtK,
169
+ dcgAtK,
170
+ idealDcgAtK,
171
+ averagePrecision,
172
+ computeMetrics,
173
+ aggregateMetrics,
174
+ };