voyageai-cli 1.16.0 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,174 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Information retrieval metrics for evaluating search quality.
5
+ * All functions take arrays of retrieved IDs and relevant (expected) IDs.
6
+ */
7
+
8
+ /**
9
+ * Precision@K — fraction of top-K results that are relevant.
10
+ * @param {string[]} retrieved - Retrieved document IDs in rank order
11
+ * @param {Set<string>|string[]} relevant - Set of relevant document IDs
12
+ * @param {number} k
13
+ * @returns {number} 0.0 to 1.0
14
+ */
15
+ function precisionAtK(retrieved, relevant, k) {
16
+ const rel = relevant instanceof Set ? relevant : new Set(relevant);
17
+ const topK = retrieved.slice(0, k);
18
+ if (topK.length === 0) return 0;
19
+ const hits = topK.filter(id => rel.has(id)).length;
20
+ return hits / topK.length;
21
+ }
22
+
23
+ /**
24
+ * Recall@K — fraction of relevant documents found in top-K results.
25
+ * @param {string[]} retrieved
26
+ * @param {Set<string>|string[]} relevant
27
+ * @param {number} k
28
+ * @returns {number} 0.0 to 1.0
29
+ */
30
+ function recallAtK(retrieved, relevant, k) {
31
+ const rel = relevant instanceof Set ? relevant : new Set(relevant);
32
+ if (rel.size === 0) return 0;
33
+ const topK = retrieved.slice(0, k);
34
+ const hits = topK.filter(id => rel.has(id)).length;
35
+ return hits / rel.size;
36
+ }
37
+
38
+ /**
39
+ * Mean Reciprocal Rank — 1/rank of the first relevant result.
40
+ * @param {string[]} retrieved
41
+ * @param {Set<string>|string[]} relevant
42
+ * @returns {number} 0.0 to 1.0
43
+ */
44
+ function reciprocalRank(retrieved, relevant) {
45
+ const rel = relevant instanceof Set ? relevant : new Set(relevant);
46
+ for (let i = 0; i < retrieved.length; i++) {
47
+ if (rel.has(retrieved[i])) return 1 / (i + 1);
48
+ }
49
+ return 0;
50
+ }
51
+
52
+ /**
53
+ * Discounted Cumulative Gain at K.
54
+ * Binary relevance: 1 if relevant, 0 otherwise.
55
+ * @param {string[]} retrieved
56
+ * @param {Set<string>|string[]} relevant
57
+ * @param {number} k
58
+ * @returns {number}
59
+ */
60
+ function dcgAtK(retrieved, relevant, k) {
61
+ const rel = relevant instanceof Set ? relevant : new Set(relevant);
62
+ let dcg = 0;
63
+ const topK = retrieved.slice(0, k);
64
+ for (let i = 0; i < topK.length; i++) {
65
+ if (rel.has(topK[i])) {
66
+ dcg += 1 / Math.log2(i + 2); // i+2 because log2(1) = 0
67
+ }
68
+ }
69
+ return dcg;
70
+ }
71
+
72
+ /**
73
+ * Ideal DCG at K — best possible DCG given the number of relevant docs.
74
+ * @param {number} numRelevant
75
+ * @param {number} k
76
+ * @returns {number}
77
+ */
78
+ function idealDcgAtK(numRelevant, k) {
79
+ let idcg = 0;
80
+ const n = Math.min(numRelevant, k);
81
+ for (let i = 0; i < n; i++) {
82
+ idcg += 1 / Math.log2(i + 2);
83
+ }
84
+ return idcg;
85
+ }
86
+
87
+ /**
88
+ * Normalized DCG at K.
89
+ * @param {string[]} retrieved
90
+ * @param {Set<string>|string[]} relevant
91
+ * @param {number} k
92
+ * @returns {number} 0.0 to 1.0
93
+ */
94
+ function ndcgAtK(retrieved, relevant, k) {
95
+ const rel = relevant instanceof Set ? relevant : new Set(relevant);
96
+ const dcg = dcgAtK(retrieved, rel, k);
97
+ const idcg = idealDcgAtK(rel.size, k);
98
+ if (idcg === 0) return 0;
99
+ return dcg / idcg;
100
+ }
101
+
102
+ /**
103
+ * Average Precision — area under the precision-recall curve for a single query.
104
+ * @param {string[]} retrieved
105
+ * @param {Set<string>|string[]} relevant
106
+ * @returns {number} 0.0 to 1.0
107
+ */
108
+ function averagePrecision(retrieved, relevant) {
109
+ const rel = relevant instanceof Set ? relevant : new Set(relevant);
110
+ if (rel.size === 0) return 0;
111
+ let hits = 0;
112
+ let sumPrecision = 0;
113
+ for (let i = 0; i < retrieved.length; i++) {
114
+ if (rel.has(retrieved[i])) {
115
+ hits++;
116
+ sumPrecision += hits / (i + 1);
117
+ }
118
+ }
119
+ return sumPrecision / rel.size;
120
+ }
121
+
122
+ /**
123
+ * Compute all metrics for a single query.
124
+ * @param {string[]} retrieved - Retrieved doc IDs in rank order
125
+ * @param {string[]} relevant - Array of relevant doc IDs
126
+ * @param {number[]} kValues - K values for @K metrics
127
+ * @returns {object}
128
+ */
129
+ function computeMetrics(retrieved, relevant, kValues = [1, 3, 5, 10]) {
130
+ const relSet = new Set(relevant);
131
+ const result = {
132
+ mrr: reciprocalRank(retrieved, relSet),
133
+ ap: averagePrecision(retrieved, relSet),
134
+ };
135
+
136
+ for (const k of kValues) {
137
+ result[`p@${k}`] = precisionAtK(retrieved, relSet, k);
138
+ result[`r@${k}`] = recallAtK(retrieved, relSet, k);
139
+ result[`ndcg@${k}`] = ndcgAtK(retrieved, relSet, k);
140
+ }
141
+
142
+ return result;
143
+ }
144
+
145
+ /**
146
+ * Aggregate metrics across multiple queries (mean).
147
+ * @param {object[]} perQueryMetrics - Array of metric objects from computeMetrics
148
+ * @returns {object} Mean metrics
149
+ */
150
+ function aggregateMetrics(perQueryMetrics) {
151
+ if (perQueryMetrics.length === 0) return {};
152
+
153
+ const keys = Object.keys(perQueryMetrics[0]);
154
+ const agg = {};
155
+
156
+ for (const key of keys) {
157
+ const values = perQueryMetrics.map(m => m[key]).filter(v => v !== undefined);
158
+ agg[key] = values.reduce((s, v) => s + v, 0) / values.length;
159
+ }
160
+
161
+ return agg;
162
+ }
163
+
164
+ module.exports = {
165
+ precisionAtK,
166
+ recallAtK,
167
+ reciprocalRank,
168
+ ndcgAtK,
169
+ dcgAtK,
170
+ idealDcgAtK,
171
+ averagePrecision,
172
+ computeMetrics,
173
+ aggregateMetrics,
174
+ };