voyageai-cli 1.16.0 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +188 -235
- package/package.json +1 -1
- package/src/cli.js +6 -0
- package/src/commands/completions.js +76 -1
- package/src/commands/eval.js +300 -0
- package/src/commands/models.js +4 -4
- package/src/commands/pipeline.js +311 -0
- package/src/commands/query.js +266 -0
- package/src/lib/catalog.js +1 -1
- package/src/lib/explanations.js +6 -6
- package/src/lib/metrics.js +174 -0
- package/src/playground/index.html +557 -34
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Information retrieval metrics for evaluating search quality.
|
|
5
|
+
* All functions take arrays of retrieved IDs and relevant (expected) IDs.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Precision@K — fraction of top-K results that are relevant.
|
|
10
|
+
* @param {string[]} retrieved - Retrieved document IDs in rank order
|
|
11
|
+
* @param {Set<string>|string[]} relevant - Set of relevant document IDs
|
|
12
|
+
* @param {number} k
|
|
13
|
+
* @returns {number} 0.0 to 1.0
|
|
14
|
+
*/
|
|
15
|
+
function precisionAtK(retrieved, relevant, k) {
|
|
16
|
+
const rel = relevant instanceof Set ? relevant : new Set(relevant);
|
|
17
|
+
const topK = retrieved.slice(0, k);
|
|
18
|
+
if (topK.length === 0) return 0;
|
|
19
|
+
const hits = topK.filter(id => rel.has(id)).length;
|
|
20
|
+
return hits / topK.length;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Recall@K — fraction of relevant documents found in top-K results.
|
|
25
|
+
* @param {string[]} retrieved
|
|
26
|
+
* @param {Set<string>|string[]} relevant
|
|
27
|
+
* @param {number} k
|
|
28
|
+
* @returns {number} 0.0 to 1.0
|
|
29
|
+
*/
|
|
30
|
+
function recallAtK(retrieved, relevant, k) {
|
|
31
|
+
const rel = relevant instanceof Set ? relevant : new Set(relevant);
|
|
32
|
+
if (rel.size === 0) return 0;
|
|
33
|
+
const topK = retrieved.slice(0, k);
|
|
34
|
+
const hits = topK.filter(id => rel.has(id)).length;
|
|
35
|
+
return hits / rel.size;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Mean Reciprocal Rank — 1/rank of the first relevant result.
|
|
40
|
+
* @param {string[]} retrieved
|
|
41
|
+
* @param {Set<string>|string[]} relevant
|
|
42
|
+
* @returns {number} 0.0 to 1.0
|
|
43
|
+
*/
|
|
44
|
+
function reciprocalRank(retrieved, relevant) {
|
|
45
|
+
const rel = relevant instanceof Set ? relevant : new Set(relevant);
|
|
46
|
+
for (let i = 0; i < retrieved.length; i++) {
|
|
47
|
+
if (rel.has(retrieved[i])) return 1 / (i + 1);
|
|
48
|
+
}
|
|
49
|
+
return 0;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Discounted Cumulative Gain at K.
|
|
54
|
+
* Binary relevance: 1 if relevant, 0 otherwise.
|
|
55
|
+
* @param {string[]} retrieved
|
|
56
|
+
* @param {Set<string>|string[]} relevant
|
|
57
|
+
* @param {number} k
|
|
58
|
+
* @returns {number}
|
|
59
|
+
*/
|
|
60
|
+
function dcgAtK(retrieved, relevant, k) {
|
|
61
|
+
const rel = relevant instanceof Set ? relevant : new Set(relevant);
|
|
62
|
+
let dcg = 0;
|
|
63
|
+
const topK = retrieved.slice(0, k);
|
|
64
|
+
for (let i = 0; i < topK.length; i++) {
|
|
65
|
+
if (rel.has(topK[i])) {
|
|
66
|
+
dcg += 1 / Math.log2(i + 2); // i+2 because log2(1) = 0
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return dcg;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Ideal DCG at K — best possible DCG given the number of relevant docs.
|
|
74
|
+
* @param {number} numRelevant
|
|
75
|
+
* @param {number} k
|
|
76
|
+
* @returns {number}
|
|
77
|
+
*/
|
|
78
|
+
function idealDcgAtK(numRelevant, k) {
|
|
79
|
+
let idcg = 0;
|
|
80
|
+
const n = Math.min(numRelevant, k);
|
|
81
|
+
for (let i = 0; i < n; i++) {
|
|
82
|
+
idcg += 1 / Math.log2(i + 2);
|
|
83
|
+
}
|
|
84
|
+
return idcg;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Normalized DCG at K.
|
|
89
|
+
* @param {string[]} retrieved
|
|
90
|
+
* @param {Set<string>|string[]} relevant
|
|
91
|
+
* @param {number} k
|
|
92
|
+
* @returns {number} 0.0 to 1.0
|
|
93
|
+
*/
|
|
94
|
+
function ndcgAtK(retrieved, relevant, k) {
|
|
95
|
+
const rel = relevant instanceof Set ? relevant : new Set(relevant);
|
|
96
|
+
const dcg = dcgAtK(retrieved, rel, k);
|
|
97
|
+
const idcg = idealDcgAtK(rel.size, k);
|
|
98
|
+
if (idcg === 0) return 0;
|
|
99
|
+
return dcg / idcg;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Average Precision — area under the precision-recall curve for a single query.
|
|
104
|
+
* @param {string[]} retrieved
|
|
105
|
+
* @param {Set<string>|string[]} relevant
|
|
106
|
+
* @returns {number} 0.0 to 1.0
|
|
107
|
+
*/
|
|
108
|
+
function averagePrecision(retrieved, relevant) {
|
|
109
|
+
const rel = relevant instanceof Set ? relevant : new Set(relevant);
|
|
110
|
+
if (rel.size === 0) return 0;
|
|
111
|
+
let hits = 0;
|
|
112
|
+
let sumPrecision = 0;
|
|
113
|
+
for (let i = 0; i < retrieved.length; i++) {
|
|
114
|
+
if (rel.has(retrieved[i])) {
|
|
115
|
+
hits++;
|
|
116
|
+
sumPrecision += hits / (i + 1);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return sumPrecision / rel.size;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Compute all metrics for a single query.
|
|
124
|
+
* @param {string[]} retrieved - Retrieved doc IDs in rank order
|
|
125
|
+
* @param {string[]} relevant - Array of relevant doc IDs
|
|
126
|
+
* @param {number[]} kValues - K values for @K metrics
|
|
127
|
+
* @returns {object}
|
|
128
|
+
*/
|
|
129
|
+
function computeMetrics(retrieved, relevant, kValues = [1, 3, 5, 10]) {
|
|
130
|
+
const relSet = new Set(relevant);
|
|
131
|
+
const result = {
|
|
132
|
+
mrr: reciprocalRank(retrieved, relSet),
|
|
133
|
+
ap: averagePrecision(retrieved, relSet),
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
for (const k of kValues) {
|
|
137
|
+
result[`p@${k}`] = precisionAtK(retrieved, relSet, k);
|
|
138
|
+
result[`r@${k}`] = recallAtK(retrieved, relSet, k);
|
|
139
|
+
result[`ndcg@${k}`] = ndcgAtK(retrieved, relSet, k);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return result;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Aggregate metrics across multiple queries (mean).
|
|
147
|
+
* @param {object[]} perQueryMetrics - Array of metric objects from computeMetrics
|
|
148
|
+
* @returns {object} Mean metrics
|
|
149
|
+
*/
|
|
150
|
+
function aggregateMetrics(perQueryMetrics) {
|
|
151
|
+
if (perQueryMetrics.length === 0) return {};
|
|
152
|
+
|
|
153
|
+
const keys = Object.keys(perQueryMetrics[0]);
|
|
154
|
+
const agg = {};
|
|
155
|
+
|
|
156
|
+
for (const key of keys) {
|
|
157
|
+
const values = perQueryMetrics.map(m => m[key]).filter(v => v !== undefined);
|
|
158
|
+
agg[key] = values.reduce((s, v) => s + v, 0) / values.length;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return agg;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
module.exports = {
|
|
165
|
+
precisionAtK,
|
|
166
|
+
recallAtK,
|
|
167
|
+
reciprocalRank,
|
|
168
|
+
ndcgAtK,
|
|
169
|
+
dcgAtK,
|
|
170
|
+
idealDcgAtK,
|
|
171
|
+
averagePrecision,
|
|
172
|
+
computeMetrics,
|
|
173
|
+
aggregateMetrics,
|
|
174
|
+
};
|