voyageai-cli 1.13.0 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli.js +6 -0
- package/src/commands/benchmark.js +164 -0
- package/src/commands/chunk.js +277 -0
- package/src/commands/completions.js +51 -1
- package/src/commands/estimate.js +209 -0
- package/src/commands/init.js +153 -0
- package/src/commands/models.js +32 -4
- package/src/lib/catalog.js +42 -18
- package/src/lib/chunker.js +341 -0
- package/src/lib/explanations.js +183 -0
- package/src/lib/project.js +122 -0
- package/src/lib/readers.js +239 -0
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Available chunking strategies.
|
|
5
|
+
*/
|
|
6
|
+
const STRATEGIES = ['fixed', 'sentence', 'paragraph', 'recursive', 'markdown'];
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Default chunk options.
|
|
10
|
+
*/
|
|
11
|
+
const DEFAULTS = {
|
|
12
|
+
size: 512,
|
|
13
|
+
overlap: 50,
|
|
14
|
+
minSize: 20,
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
// ── Sentence splitting ──
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Split text into sentences. Handles common abbreviations and edge cases.
|
|
21
|
+
* @param {string} text
|
|
22
|
+
* @returns {string[]}
|
|
23
|
+
*/
|
|
24
|
+
function splitSentences(text) {
|
|
25
|
+
// Split on sentence-ending punctuation followed by whitespace or EOL.
|
|
26
|
+
// Negative lookbehind for common abbreviations (Mr., Dr., etc.)
|
|
27
|
+
const parts = text.split(/(?<=[.!?])\s+(?=[A-Z\u00C0-\u024F"])/);
|
|
28
|
+
return parts.map(s => s.trim()).filter(s => s.length > 0);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// ── Strategy implementations ──
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Fixed-size chunking with character count and overlap.
|
|
35
|
+
* @param {string} text
|
|
36
|
+
* @param {object} opts
|
|
37
|
+
* @param {number} opts.size - Target chunk size in characters
|
|
38
|
+
* @param {number} opts.overlap - Overlap between chunks in characters
|
|
39
|
+
* @returns {string[]}
|
|
40
|
+
*/
|
|
41
|
+
function chunkFixed(text, opts) {
|
|
42
|
+
const { size, overlap } = opts;
|
|
43
|
+
const chunks = [];
|
|
44
|
+
let start = 0;
|
|
45
|
+
|
|
46
|
+
while (start < text.length) {
|
|
47
|
+
const end = start + size;
|
|
48
|
+
chunks.push(text.slice(start, end).trim());
|
|
49
|
+
start = end - overlap;
|
|
50
|
+
if (start >= text.length) break;
|
|
51
|
+
// Prevent infinite loop with tiny overlap
|
|
52
|
+
if (end >= text.length) break;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return chunks.filter(c => c.length >= (opts.minSize || DEFAULTS.minSize));
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Sentence-boundary chunking. Groups sentences until size limit.
|
|
60
|
+
* @param {string} text
|
|
61
|
+
* @param {object} opts
|
|
62
|
+
* @returns {string[]}
|
|
63
|
+
*/
|
|
64
|
+
function chunkSentence(text, opts) {
|
|
65
|
+
const { size, overlap } = opts;
|
|
66
|
+
const sentences = splitSentences(text);
|
|
67
|
+
return groupUnits(sentences, size, overlap, opts.minSize || DEFAULTS.minSize);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Paragraph chunking. Splits on double newlines, groups if needed.
|
|
72
|
+
* @param {string} text
|
|
73
|
+
* @param {object} opts
|
|
74
|
+
* @returns {string[]}
|
|
75
|
+
*/
|
|
76
|
+
function chunkParagraph(text, opts) {
|
|
77
|
+
const { size, overlap } = opts;
|
|
78
|
+
const paragraphs = text.split(/\n\s*\n/).map(p => p.trim()).filter(p => p.length > 0);
|
|
79
|
+
return groupUnits(paragraphs, size, overlap, opts.minSize || DEFAULTS.minSize);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Recursive chunking. Tries largest delimiters first, falls back to smaller.
|
|
84
|
+
* This is the most commonly used strategy for RAG pipelines.
|
|
85
|
+
* @param {string} text
|
|
86
|
+
* @param {object} opts
|
|
87
|
+
* @returns {string[]}
|
|
88
|
+
*/
|
|
89
|
+
function chunkRecursive(text, opts) {
|
|
90
|
+
const { size, minSize } = opts;
|
|
91
|
+
const separators = ['\n\n', '\n', '. ', '! ', '? ', '; ', ', ', ' '];
|
|
92
|
+
|
|
93
|
+
return recursiveSplit(text, separators, size, minSize || DEFAULTS.minSize);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Internal recursive split implementation.
|
|
98
|
+
* @param {string} text
|
|
99
|
+
* @param {string[]} separators
|
|
100
|
+
* @param {number} maxSize
|
|
101
|
+
* @param {number} minSize
|
|
102
|
+
* @returns {string[]}
|
|
103
|
+
*/
|
|
104
|
+
function recursiveSplit(text, separators, maxSize, minSize) {
|
|
105
|
+
if (text.length <= maxSize) {
|
|
106
|
+
return text.trim().length >= minSize ? [text.trim()] : [];
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Find the first separator that exists in the text
|
|
110
|
+
let sep = null;
|
|
111
|
+
for (const s of separators) {
|
|
112
|
+
if (text.includes(s)) {
|
|
113
|
+
sep = s;
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// If no separator found, hard-split by characters
|
|
119
|
+
if (sep === null) {
|
|
120
|
+
const chunks = [];
|
|
121
|
+
for (let i = 0; i < text.length; i += maxSize) {
|
|
122
|
+
const chunk = text.slice(i, i + maxSize).trim();
|
|
123
|
+
if (chunk.length >= minSize) chunks.push(chunk);
|
|
124
|
+
}
|
|
125
|
+
return chunks;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Split on this separator and greedily merge pieces under maxSize
|
|
129
|
+
const parts = text.split(sep);
|
|
130
|
+
const chunks = [];
|
|
131
|
+
let current = '';
|
|
132
|
+
|
|
133
|
+
for (const part of parts) {
|
|
134
|
+
const candidate = current ? current + sep + part : part;
|
|
135
|
+
|
|
136
|
+
if (candidate.length <= maxSize) {
|
|
137
|
+
current = candidate;
|
|
138
|
+
} else {
|
|
139
|
+
// Flush current chunk
|
|
140
|
+
if (current.trim().length >= minSize) {
|
|
141
|
+
chunks.push(current.trim());
|
|
142
|
+
}
|
|
143
|
+
// If this single part exceeds maxSize, recurse with next separator level
|
|
144
|
+
if (part.length > maxSize) {
|
|
145
|
+
const remainingSeps = separators.slice(separators.indexOf(sep) + 1);
|
|
146
|
+
chunks.push(...recursiveSplit(part, remainingSeps, maxSize, minSize));
|
|
147
|
+
current = '';
|
|
148
|
+
} else {
|
|
149
|
+
current = part;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Flush remainder
|
|
155
|
+
if (current.trim().length >= minSize) {
|
|
156
|
+
chunks.push(current.trim());
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return chunks;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Markdown-aware chunking. Splits on headings, preserves structure.
|
|
164
|
+
* Each heading starts a new chunk; content under it is grouped.
|
|
165
|
+
* @param {string} text
|
|
166
|
+
* @param {object} opts
|
|
167
|
+
* @returns {string[]}
|
|
168
|
+
*/
|
|
169
|
+
function chunkMarkdown(text, opts) {
|
|
170
|
+
const { size, minSize } = opts;
|
|
171
|
+
|
|
172
|
+
// Split on markdown headings (# through ######)
|
|
173
|
+
const headingPattern = /^(#{1,6}\s.+)$/gm;
|
|
174
|
+
const sections = [];
|
|
175
|
+
let lastIndex = 0;
|
|
176
|
+
let match;
|
|
177
|
+
|
|
178
|
+
while ((match = headingPattern.exec(text)) !== null) {
|
|
179
|
+
// Content before this heading
|
|
180
|
+
if (match.index > lastIndex) {
|
|
181
|
+
const content = text.slice(lastIndex, match.index).trim();
|
|
182
|
+
if (content) {
|
|
183
|
+
if (sections.length > 0) {
|
|
184
|
+
// Append to previous section
|
|
185
|
+
sections[sections.length - 1].content += '\n\n' + content;
|
|
186
|
+
} else {
|
|
187
|
+
sections.push({ heading: '', content });
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
sections.push({ heading: match[1], content: '' });
|
|
192
|
+
lastIndex = match.index + match[0].length;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Remaining content after last heading
|
|
196
|
+
if (lastIndex < text.length) {
|
|
197
|
+
const content = text.slice(lastIndex).trim();
|
|
198
|
+
if (content) {
|
|
199
|
+
if (sections.length > 0) {
|
|
200
|
+
sections[sections.length - 1].content += '\n\n' + content;
|
|
201
|
+
} else {
|
|
202
|
+
sections.push({ heading: '', content });
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Build chunks from sections, splitting large sections recursively
|
|
208
|
+
const chunks = [];
|
|
209
|
+
for (const section of sections) {
|
|
210
|
+
const full = section.heading
|
|
211
|
+
? section.heading + '\n\n' + section.content.trim()
|
|
212
|
+
: section.content.trim();
|
|
213
|
+
|
|
214
|
+
if (!full || full.length < (minSize || DEFAULTS.minSize)) continue;
|
|
215
|
+
|
|
216
|
+
if (full.length <= size) {
|
|
217
|
+
chunks.push(full);
|
|
218
|
+
} else {
|
|
219
|
+
// Section too large — recursively split the content, prepend heading to first chunk
|
|
220
|
+
const subChunks = chunkRecursive(section.content.trim(), opts);
|
|
221
|
+
for (let i = 0; i < subChunks.length; i++) {
|
|
222
|
+
if (i === 0 && section.heading) {
|
|
223
|
+
chunks.push(section.heading + '\n\n' + subChunks[i]);
|
|
224
|
+
} else {
|
|
225
|
+
chunks.push(subChunks[i]);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return chunks;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// ── Shared helpers ──
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Group text units (sentences, paragraphs) into chunks under a size limit.
|
|
238
|
+
* Supports overlap by re-including trailing units from the previous chunk.
|
|
239
|
+
* @param {string[]} units
|
|
240
|
+
* @param {number} maxSize
|
|
241
|
+
* @param {number} overlapChars
|
|
242
|
+
* @param {number} minSize
|
|
243
|
+
* @returns {string[]}
|
|
244
|
+
*/
|
|
245
|
+
function groupUnits(units, maxSize, overlapChars, minSize) {
|
|
246
|
+
const chunks = [];
|
|
247
|
+
let current = [];
|
|
248
|
+
let currentLen = 0;
|
|
249
|
+
|
|
250
|
+
for (const unit of units) {
|
|
251
|
+
const addLen = current.length > 0 ? unit.length + 1 : unit.length; // +1 for space
|
|
252
|
+
|
|
253
|
+
if (currentLen + addLen > maxSize && current.length > 0) {
|
|
254
|
+
chunks.push(current.join(' ').trim());
|
|
255
|
+
|
|
256
|
+
// Overlap: keep trailing units that fit within overlap budget
|
|
257
|
+
if (overlapChars > 0) {
|
|
258
|
+
let overlapUnits = [];
|
|
259
|
+
let overlapLen = 0;
|
|
260
|
+
for (let i = current.length - 1; i >= 0; i--) {
|
|
261
|
+
if (overlapLen + current[i].length + 1 > overlapChars) break;
|
|
262
|
+
overlapUnits.unshift(current[i]);
|
|
263
|
+
overlapLen += current[i].length + 1;
|
|
264
|
+
}
|
|
265
|
+
current = overlapUnits;
|
|
266
|
+
currentLen = overlapLen;
|
|
267
|
+
} else {
|
|
268
|
+
current = [];
|
|
269
|
+
currentLen = 0;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
current.push(unit);
|
|
274
|
+
currentLen += addLen;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// Flush remainder
|
|
278
|
+
if (current.length > 0) {
|
|
279
|
+
const text = current.join(' ').trim();
|
|
280
|
+
if (text.length >= minSize) chunks.push(text);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
return chunks;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
// ── Token estimation ──
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Rough token estimate. ~4 chars per token for English text.
|
|
290
|
+
* @param {string} text
|
|
291
|
+
* @returns {number}
|
|
292
|
+
*/
|
|
293
|
+
function estimateTokens(text) {
|
|
294
|
+
return Math.ceil(text.length / 4);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// ── Public API ──
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Chunk text using the specified strategy.
|
|
301
|
+
* @param {string} text - Input text
|
|
302
|
+
* @param {object} [options]
|
|
303
|
+
* @param {string} [options.strategy='recursive'] - Chunking strategy
|
|
304
|
+
* @param {number} [options.size=512] - Target chunk size in characters
|
|
305
|
+
* @param {number} [options.overlap=50] - Overlap between chunks in characters
|
|
306
|
+
* @param {number} [options.minSize=20] - Minimum chunk size
|
|
307
|
+
* @returns {string[]} Array of text chunks
|
|
308
|
+
*/
|
|
309
|
+
function chunk(text, options = {}) {
|
|
310
|
+
const opts = {
|
|
311
|
+
strategy: options.strategy || 'recursive',
|
|
312
|
+
size: options.size || DEFAULTS.size,
|
|
313
|
+
overlap: options.overlap != null ? options.overlap : DEFAULTS.overlap,
|
|
314
|
+
minSize: options.minSize || DEFAULTS.minSize,
|
|
315
|
+
};
|
|
316
|
+
|
|
317
|
+
if (!text || text.trim().length === 0) return [];
|
|
318
|
+
|
|
319
|
+
switch (opts.strategy) {
|
|
320
|
+
case 'fixed':
|
|
321
|
+
return chunkFixed(text, opts);
|
|
322
|
+
case 'sentence':
|
|
323
|
+
return chunkSentence(text, opts);
|
|
324
|
+
case 'paragraph':
|
|
325
|
+
return chunkParagraph(text, opts);
|
|
326
|
+
case 'recursive':
|
|
327
|
+
return chunkRecursive(text, opts);
|
|
328
|
+
case 'markdown':
|
|
329
|
+
return chunkMarkdown(text, opts);
|
|
330
|
+
default:
|
|
331
|
+
throw new Error(`Unknown chunking strategy: ${opts.strategy}. Available: ${STRATEGIES.join(', ')}`);
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
module.exports = {
|
|
336
|
+
chunk,
|
|
337
|
+
splitSentences,
|
|
338
|
+
estimateTokens,
|
|
339
|
+
STRATEGIES,
|
|
340
|
+
DEFAULTS,
|
|
341
|
+
};
|
package/src/lib/explanations.js
CHANGED
|
@@ -513,6 +513,169 @@ const concepts = {
|
|
|
513
513
|
'vai benchmark similarity --query "your search query" --file your-docs.txt',
|
|
514
514
|
],
|
|
515
515
|
},
|
|
516
|
+
'mixture-of-experts': {
|
|
517
|
+
title: 'Mixture-of-Experts (MoE) Architecture',
|
|
518
|
+
summary: 'How voyage-4-large achieves SOTA quality at 40% lower cost',
|
|
519
|
+
content: [
|
|
520
|
+
`${pc.cyan('Mixture-of-Experts (MoE)')} is a neural network architecture where multiple`,
|
|
521
|
+
`specialized sub-networks ("experts") share a single model. A learned ${pc.cyan('router')}`,
|
|
522
|
+
`selects which experts activate for each input — typically 2-4 out of 8-64 total.`,
|
|
523
|
+
``,
|
|
524
|
+
`${pc.bold('Why MoE matters for embeddings:')}`,
|
|
525
|
+
` ${pc.dim('•')} ${pc.cyan('Higher capacity, lower cost')} — the model has more total parameters`,
|
|
526
|
+
` (knowledge) but only activates a fraction per input, keeping inference fast`,
|
|
527
|
+
` ${pc.dim('•')} ${pc.cyan('Specialization')} — different experts learn different domains (code,`,
|
|
528
|
+
` legal, medical) without interfering with each other`,
|
|
529
|
+
` ${pc.dim('•')} ${pc.cyan('State-of-the-art quality')} — voyage-4-large beats all competitors on`,
|
|
530
|
+
` RTEB benchmarks while costing 40% less than comparable dense models`,
|
|
531
|
+
``,
|
|
532
|
+
`${pc.bold('voyage-4-large')} is the ${pc.cyan('first production-grade embedding model')} to use MoE.`,
|
|
533
|
+
`Previous MoE successes (Mixtral, Switch Transformer) were language models —`,
|
|
534
|
+
`applying MoE to embedding models required solving alignment across the shared`,
|
|
535
|
+
`embedding space, which is what makes the Voyage 4 family unique.`,
|
|
536
|
+
``,
|
|
537
|
+
`${pc.bold('Dense vs MoE:')}`,
|
|
538
|
+
` ${pc.dim('Dense (voyage-4, voyage-4-lite):')} Every parameter is used for every input.`,
|
|
539
|
+
` Simpler, predictable latency, lower total parameter count.`,
|
|
540
|
+
` ${pc.dim('MoE (voyage-4-large):')} Sparse activation — more total parameters, but each`,
|
|
541
|
+
` input only uses a subset. Higher quality ceiling, similar serving cost.`,
|
|
542
|
+
``,
|
|
543
|
+
`${pc.bold('In practice:')} You don't need to do anything special to use MoE — the API`,
|
|
544
|
+
`interface is identical. The architecture difference shows up in quality and cost:`,
|
|
545
|
+
` ${pc.dim('•')} voyage-4-large: $0.12/1M tokens — better quality than voyage-3-large ($0.18/1M)`,
|
|
546
|
+
` ${pc.dim('•')} 40% cheaper than comparable dense models at the same quality tier`,
|
|
547
|
+
].join('\n'),
|
|
548
|
+
links: [
|
|
549
|
+
'https://blog.voyageai.com/2026/01/15/voyage-4-model-family/',
|
|
550
|
+
'https://www.mongodb.com/docs/voyageai/models/text-embeddings/',
|
|
551
|
+
],
|
|
552
|
+
tryIt: [
|
|
553
|
+
'vai embed "test MoE quality" --model voyage-4-large',
|
|
554
|
+
'vai benchmark embed --models voyage-4-large,voyage-4,voyage-4-lite',
|
|
555
|
+
'vai models --wide',
|
|
556
|
+
],
|
|
557
|
+
},
|
|
558
|
+
|
|
559
|
+
'shared-embedding-space': {
|
|
560
|
+
title: 'Shared Embedding Space',
|
|
561
|
+
summary: 'How Voyage 4 models produce compatible, interchangeable embeddings',
|
|
562
|
+
content: [
|
|
563
|
+
`The Voyage 4 series introduces an ${pc.cyan('industry-first capability')}: all four models`,
|
|
564
|
+
`(voyage-4-large, voyage-4, voyage-4-lite, voyage-4-nano) produce embeddings in`,
|
|
565
|
+
`the ${pc.cyan('same vector space')}. Embeddings from different models are directly comparable.`,
|
|
566
|
+
``,
|
|
567
|
+
`${pc.bold('What this means:')}`,
|
|
568
|
+
` ${pc.dim('•')} Embed documents with ${pc.cyan('voyage-4-large')} (best quality, one-time cost)`,
|
|
569
|
+
` ${pc.dim('•')} Query with ${pc.cyan('voyage-4-lite')} or ${pc.cyan('voyage-4-nano')} (low cost, high volume)`,
|
|
570
|
+
` ${pc.dim('•')} Cosine similarity works across model boundaries`,
|
|
571
|
+
` ${pc.dim('•')} Upgrade query model later ${pc.cyan('without re-vectorizing documents')}`,
|
|
572
|
+
``,
|
|
573
|
+
`${pc.bold('Why this is new:')} Previously, embeddings from different models lived in`,
|
|
574
|
+
`incompatible vector spaces. Switching models meant re-embedding your entire`,
|
|
575
|
+
`corpus — expensive and slow. The shared space eliminates this constraint.`,
|
|
576
|
+
``,
|
|
577
|
+
`${pc.bold('Recommended workflow:')}`,
|
|
578
|
+
` ${pc.dim('1.')} Vectorize your document corpus once with ${pc.cyan('voyage-4-large')}`,
|
|
579
|
+
` ${pc.dim('2.')} Start with ${pc.cyan('voyage-4-lite')} for queries in development / early production`,
|
|
580
|
+
` ${pc.dim('3.')} Upgrade to ${pc.cyan('voyage-4')} or ${pc.cyan('voyage-4-large')} as accuracy needs grow`,
|
|
581
|
+
` ${pc.dim('4.')} No re-vectorization needed at any step`,
|
|
582
|
+
``,
|
|
583
|
+
`${pc.bold('Validate it yourself:')} Use ${pc.cyan('vai benchmark space')} to embed identical text`,
|
|
584
|
+
`with all Voyage 4 models and see the cross-model cosine similarities.`,
|
|
585
|
+
].join('\n'),
|
|
586
|
+
links: [
|
|
587
|
+
'https://blog.voyageai.com/2026/01/15/voyage-4-model-family/',
|
|
588
|
+
],
|
|
589
|
+
tryIt: [
|
|
590
|
+
'vai benchmark space',
|
|
591
|
+
'vai benchmark asymmetric --query "your search" --file corpus.txt',
|
|
592
|
+
'vai estimate --docs 1M --queries 10M',
|
|
593
|
+
],
|
|
594
|
+
},
|
|
595
|
+
|
|
596
|
+
'rteb-benchmarks': {
|
|
597
|
+
title: 'RTEB Benchmark Scores',
|
|
598
|
+
summary: 'Retrieval quality scores across embedding providers',
|
|
599
|
+
content: [
|
|
600
|
+
`The ${pc.cyan('Retrieval Embedding Benchmark (RTEB)')} evaluates general-purpose retrieval`,
|
|
601
|
+
`quality across 29 diverse datasets. Scores are ${pc.cyan('NDCG@10')} (normalized discounted`,
|
|
602
|
+
`cumulative gain at top 10 results) — higher is better.`,
|
|
603
|
+
``,
|
|
604
|
+
`${pc.bold('Current standings (Jan 2026):')}`,
|
|
605
|
+
` ${pc.cyan('voyage-4-large')} ${pc.bold('71.41')} ${pc.dim('— SOTA, MoE architecture')}`,
|
|
606
|
+
` ${pc.cyan('voyage-4')} ${pc.bold('70.07')} ${pc.dim('— near voyage-3-large quality')}`,
|
|
607
|
+
` ${pc.cyan('Gemini Embedding 001')} ${pc.bold('68.66')} ${pc.dim('— Google')}`,
|
|
608
|
+
` ${pc.cyan('voyage-4-lite')} ${pc.bold('68.10')} ${pc.dim('— near voyage-3.5 quality')}`,
|
|
609
|
+
` ${pc.cyan('Cohere Embed v4')} ${pc.bold('65.75')} ${pc.dim('— Cohere')}`,
|
|
610
|
+
` ${pc.cyan('OpenAI v3 Large')} ${pc.bold('62.57')} ${pc.dim('— OpenAI')}`,
|
|
611
|
+
``,
|
|
612
|
+
`${pc.bold('What the numbers mean:')}`,
|
|
613
|
+
` ${pc.dim('•')} voyage-4-large beats Gemini by ${pc.cyan('3.87%')}, Cohere by ${pc.cyan('8.20%')}, OpenAI by ${pc.cyan('14.05%')}`,
|
|
614
|
+
` ${pc.dim('•')} voyage-4 (mid-tier pricing) outperforms all non-Voyage models`,
|
|
615
|
+
` ${pc.dim('•')} Even voyage-4-lite ($0.02/1M) is competitive with Gemini Embedding`,
|
|
616
|
+
``,
|
|
617
|
+
`${pc.bold('Asymmetric retrieval bonus:')} When documents are embedded with voyage-4-large`,
|
|
618
|
+
`and queries with a smaller Voyage 4 model, retrieval quality ${pc.cyan('improves')} over`,
|
|
619
|
+
`using the smaller model alone — you get the benefit of the larger model's`,
|
|
620
|
+
`document representations.`,
|
|
621
|
+
``,
|
|
622
|
+
`${pc.bold('Note:')} These scores are from Voyage AI's evaluation. Independent benchmarks`,
|
|
623
|
+
`may differ. Always test on your own data with ${pc.cyan('vai benchmark similarity')}.`,
|
|
624
|
+
].join('\n'),
|
|
625
|
+
links: [
|
|
626
|
+
'https://blog.voyageai.com/2026/01/15/voyage-4-model-family/',
|
|
627
|
+
'https://docs.google.com/spreadsheets/d/1GfPkqCAjPKaGS9f66IDhMRxVpd2bMuqL2wXjj-kNS7E/',
|
|
628
|
+
],
|
|
629
|
+
tryIt: [
|
|
630
|
+
'vai models --benchmarks',
|
|
631
|
+
'vai benchmark similarity --query "your query" --file your-docs.txt',
|
|
632
|
+
'vai estimate --docs 1M --queries 10M',
|
|
633
|
+
],
|
|
634
|
+
},
|
|
635
|
+
|
|
636
|
+
'voyage-4-nano': {
|
|
637
|
+
title: 'voyage-4-nano — Open-Weight Local Model',
|
|
638
|
+
summary: 'Free, local-first embeddings with shared space compatibility',
|
|
639
|
+
content: [
|
|
640
|
+
`${pc.cyan('voyage-4-nano')} is Voyage AI's first ${pc.cyan('open-weight')} embedding model, freely`,
|
|
641
|
+
`available on Hugging Face under the ${pc.bold('Apache 2.0')} license.`,
|
|
642
|
+
``,
|
|
643
|
+
`${pc.bold('Key specs:')}`,
|
|
644
|
+
` ${pc.dim('•')} Dimensions: 512 (default), 128, 256`,
|
|
645
|
+
` ${pc.dim('•')} Context: 32K tokens`,
|
|
646
|
+
` ${pc.dim('•')} License: Apache 2.0 (fully open)`,
|
|
647
|
+
` ${pc.dim('•')} Shared space: Compatible with voyage-4-large/4/4-lite embeddings`,
|
|
648
|
+
``,
|
|
649
|
+
`${pc.bold('Use cases:')}`,
|
|
650
|
+
` ${pc.dim('•')} ${pc.cyan('Local development')} — no API key, no network, no cost`,
|
|
651
|
+
` ${pc.dim('•')} ${pc.cyan('Prototyping')} — fast iteration before committing to API models`,
|
|
652
|
+
` ${pc.dim('•')} ${pc.cyan('Edge/on-device')} — run inference on your own hardware`,
|
|
653
|
+
` ${pc.dim('•')} ${pc.cyan('Asymmetric queries')} — use nano for queries against voyage-4-large docs`,
|
|
654
|
+
``,
|
|
655
|
+
`${pc.bold('Getting started with Hugging Face:')}`,
|
|
656
|
+
` ${pc.dim('pip install sentence-transformers')}`,
|
|
657
|
+
` ${pc.dim('from sentence_transformers import SentenceTransformer')}`,
|
|
658
|
+
` ${pc.dim('model = SentenceTransformer("voyageai/voyage-4-nano")')}`,
|
|
659
|
+
` ${pc.dim('embeddings = model.encode(["your text here"])')}`,
|
|
660
|
+
``,
|
|
661
|
+
`${pc.bold('With the Voyage API:')} voyage-4-nano is also available via the standard API`,
|
|
662
|
+
`endpoint, so you can use ${pc.cyan('vai embed --model voyage-4-nano')} for testing before`,
|
|
663
|
+
`switching to local inference.`,
|
|
664
|
+
``,
|
|
665
|
+
`${pc.bold('Shared space advantage:')} Since nano shares the same embedding space as the`,
|
|
666
|
+
`larger Voyage 4 models, you can prototype locally with nano, then seamlessly`,
|
|
667
|
+
`use the same document embeddings with voyage-4 or voyage-4-large in production.`,
|
|
668
|
+
].join('\n'),
|
|
669
|
+
links: [
|
|
670
|
+
'https://huggingface.co/voyageai/voyage-4-nano',
|
|
671
|
+
'https://blog.voyageai.com/2026/01/15/voyage-4-model-family/',
|
|
672
|
+
],
|
|
673
|
+
tryIt: [
|
|
674
|
+
'vai embed "test nano" --model voyage-4-nano',
|
|
675
|
+
'vai benchmark space',
|
|
676
|
+
'vai benchmark asymmetric --doc-model voyage-4-large --query-models voyage-4-nano',
|
|
677
|
+
],
|
|
678
|
+
},
|
|
516
679
|
};
|
|
517
680
|
|
|
518
681
|
/**
|
|
@@ -567,6 +730,26 @@ const aliases = {
|
|
|
567
730
|
'model-selection': 'benchmarking',
|
|
568
731
|
choosing: 'benchmarking',
|
|
569
732
|
compare: 'benchmarking',
|
|
733
|
+
moe: 'mixture-of-experts',
|
|
734
|
+
'mixture-of-experts': 'mixture-of-experts',
|
|
735
|
+
'moe-architecture': 'mixture-of-experts',
|
|
736
|
+
experts: 'mixture-of-experts',
|
|
737
|
+
sparse: 'mixture-of-experts',
|
|
738
|
+
'shared-space': 'shared-embedding-space',
|
|
739
|
+
'shared-embedding-space': 'shared-embedding-space',
|
|
740
|
+
'embedding-space': 'shared-embedding-space',
|
|
741
|
+
interchangeable: 'shared-embedding-space',
|
|
742
|
+
compatible: 'shared-embedding-space',
|
|
743
|
+
rteb: 'rteb-benchmarks',
|
|
744
|
+
'rteb-benchmarks': 'rteb-benchmarks',
|
|
745
|
+
ndcg: 'rteb-benchmarks',
|
|
746
|
+
scores: 'rteb-benchmarks',
|
|
747
|
+
leaderboard: 'rteb-benchmarks',
|
|
748
|
+
nano: 'voyage-4-nano',
|
|
749
|
+
'voyage-4-nano': 'voyage-4-nano',
|
|
750
|
+
'open-weight': 'voyage-4-nano',
|
|
751
|
+
huggingface: 'voyage-4-nano',
|
|
752
|
+
local: 'voyage-4-nano',
|
|
570
753
|
};
|
|
571
754
|
|
|
572
755
|
/**
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
|
|
6
|
+
const PROJECT_FILE = '.vai.json';
|
|
7
|
+
const PROJECT_VERSION = 1;
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Search for .vai.json starting from startDir, walking up to root.
|
|
11
|
+
* @param {string} [startDir] - Directory to start from (default: cwd)
|
|
12
|
+
* @returns {string|null} Absolute path to .vai.json or null
|
|
13
|
+
*/
|
|
14
|
+
function findProjectFile(startDir) {
|
|
15
|
+
let dir = path.resolve(startDir || process.cwd());
|
|
16
|
+
const root = path.parse(dir).root;
|
|
17
|
+
|
|
18
|
+
while (dir !== root) {
|
|
19
|
+
const candidate = path.join(dir, PROJECT_FILE);
|
|
20
|
+
if (fs.existsSync(candidate)) return candidate;
|
|
21
|
+
dir = path.dirname(dir);
|
|
22
|
+
}
|
|
23
|
+
// Check root too
|
|
24
|
+
const rootCandidate = path.join(root, PROJECT_FILE);
|
|
25
|
+
if (fs.existsSync(rootCandidate)) return rootCandidate;
|
|
26
|
+
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Load project config from .vai.json.
|
|
32
|
+
* @param {string} [startDir] - Directory to start searching from
|
|
33
|
+
* @returns {{ config: object, filePath: string|null }}
|
|
34
|
+
*/
|
|
35
|
+
function loadProject(startDir) {
|
|
36
|
+
const filePath = findProjectFile(startDir);
|
|
37
|
+
if (!filePath) return { config: {}, filePath: null };
|
|
38
|
+
|
|
39
|
+
try {
|
|
40
|
+
const raw = fs.readFileSync(filePath, 'utf-8');
|
|
41
|
+
return { config: JSON.parse(raw), filePath };
|
|
42
|
+
} catch (err) {
|
|
43
|
+
return { config: {}, filePath };
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Save project config to .vai.json.
|
|
49
|
+
* @param {object} config - Project configuration
|
|
50
|
+
* @param {string} [targetPath] - Path to write (default: cwd/.vai.json)
|
|
51
|
+
*/
|
|
52
|
+
function saveProject(config, targetPath) {
|
|
53
|
+
const filePath = targetPath || path.join(process.cwd(), PROJECT_FILE);
|
|
54
|
+
const output = { version: PROJECT_VERSION, ...config };
|
|
55
|
+
fs.writeFileSync(filePath, JSON.stringify(output, null, 2) + '\n', 'utf-8');
|
|
56
|
+
return filePath;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Merge project config with CLI options. CLI options take precedence.
|
|
61
|
+
* Only merges known keys — doesn't blindly spread everything.
|
|
62
|
+
* @param {object} projectConfig - From .vai.json
|
|
63
|
+
* @param {object} cliOpts - From commander
|
|
64
|
+
* @returns {object} Merged options
|
|
65
|
+
*/
|
|
66
|
+
function mergeOptions(projectConfig, cliOpts) {
|
|
67
|
+
const merged = {};
|
|
68
|
+
|
|
69
|
+
// Map of project config keys → CLI option keys
|
|
70
|
+
const keys = [
|
|
71
|
+
'model', 'db', 'collection', 'field', 'inputType',
|
|
72
|
+
'dimensions', 'index',
|
|
73
|
+
];
|
|
74
|
+
|
|
75
|
+
for (const key of keys) {
|
|
76
|
+
// CLI explicit value wins, then project config, then undefined
|
|
77
|
+
if (cliOpts[key] !== undefined) {
|
|
78
|
+
merged[key] = cliOpts[key];
|
|
79
|
+
} else if (projectConfig[key] !== undefined) {
|
|
80
|
+
merged[key] = projectConfig[key];
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Chunk config nests under project.chunk
|
|
85
|
+
if (projectConfig.chunk) {
|
|
86
|
+
merged.chunk = { ...projectConfig.chunk };
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return merged;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Default project config scaffold.
|
|
94
|
+
* @returns {object}
|
|
95
|
+
*/
|
|
96
|
+
function defaultProjectConfig() {
|
|
97
|
+
return {
|
|
98
|
+
version: PROJECT_VERSION,
|
|
99
|
+
model: 'voyage-4-large',
|
|
100
|
+
db: '',
|
|
101
|
+
collection: '',
|
|
102
|
+
field: 'embedding',
|
|
103
|
+
inputType: 'document',
|
|
104
|
+
dimensions: 1024,
|
|
105
|
+
index: 'vector_index',
|
|
106
|
+
chunk: {
|
|
107
|
+
strategy: 'recursive',
|
|
108
|
+
size: 512,
|
|
109
|
+
overlap: 50,
|
|
110
|
+
},
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
module.exports = {
|
|
115
|
+
PROJECT_FILE,
|
|
116
|
+
PROJECT_VERSION,
|
|
117
|
+
findProjectFile,
|
|
118
|
+
loadProject,
|
|
119
|
+
saveProject,
|
|
120
|
+
mergeOptions,
|
|
121
|
+
defaultProjectConfig,
|
|
122
|
+
};
|