voyageai-cli 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +2 -0
- package/NOTICE +23 -0
- package/README.md +84 -0
- package/demo.gif +0 -0
- package/demo.tape +39 -0
- package/package.json +1 -1
- package/scripts/record-demo.sh +63 -0
- package/src/cli.js +19 -2
- package/src/commands/completions.js +463 -0
- package/src/commands/demo.js +3 -0
- package/src/commands/explain.js +170 -0
- package/src/commands/ingest.js +414 -0
- package/src/commands/models.js +61 -13
- package/src/commands/ping.js +5 -4
- package/src/commands/similarity.js +175 -0
- package/src/lib/api.js +48 -2
- package/src/lib/banner.js +1 -0
- package/src/lib/catalog.js +10 -10
- package/src/lib/config.js +1 -0
- package/src/lib/explanations.js +480 -0
- package/src/lib/math.js +20 -0
- package/test/commands/completions.test.js +166 -0
- package/test/commands/explain.test.js +207 -0
- package/test/commands/ingest.test.js +248 -0
- package/test/commands/ping.test.js +24 -11
- package/test/commands/similarity.test.js +79 -0
- package/test/fixtures/sample.csv +6 -0
- package/test/fixtures/sample.json +7 -0
- package/test/fixtures/sample.jsonl +5 -0
- package/test/fixtures/sample.txt +5 -0
- package/test/lib/api.test.js +12 -3
- package/test/lib/explanations.test.js +134 -0
- package/test/lib/math.test.js +43 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const { generateEmbeddings } = require('../lib/api');
|
|
5
|
+
const { cosineSimilarity } = require('../lib/math');
|
|
6
|
+
const { getDefaultModel } = require('../lib/catalog');
|
|
7
|
+
const ui = require('../lib/ui');
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Register the similarity command on a Commander program.
|
|
11
|
+
* @param {import('commander').Command} program
|
|
12
|
+
*/
|
|
13
|
+
function registerSimilarity(program) {
|
|
14
|
+
program
|
|
15
|
+
.command('similarity')
|
|
16
|
+
.description('Compute cosine similarity between texts')
|
|
17
|
+
.argument('[texts...]', 'Two texts to compare')
|
|
18
|
+
.option('--against <texts...>', 'Compare first text against multiple texts')
|
|
19
|
+
.option('--file1 <path>', 'Read text A from file')
|
|
20
|
+
.option('--file2 <path>', 'Read text B from file')
|
|
21
|
+
.option('-m, --model <model>', 'Embedding model', getDefaultModel())
|
|
22
|
+
.option('--dimensions <n>', 'Output dimensions', (v) => parseInt(v, 10))
|
|
23
|
+
.option('--json', 'Machine-readable JSON output')
|
|
24
|
+
.option('-q, --quiet', 'Suppress non-essential output')
|
|
25
|
+
.action(async (texts, opts) => {
|
|
26
|
+
try {
|
|
27
|
+
let textA = null;
|
|
28
|
+
let compareTexts = [];
|
|
29
|
+
let isOneVsMany = false;
|
|
30
|
+
|
|
31
|
+
// Resolve text A
|
|
32
|
+
if (opts.file1) {
|
|
33
|
+
textA = fs.readFileSync(opts.file1, 'utf-8').trim();
|
|
34
|
+
} else if (texts.length > 0) {
|
|
35
|
+
textA = texts[0];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Resolve comparison targets
|
|
39
|
+
if (opts.against && opts.against.length > 0) {
|
|
40
|
+
// One-vs-many mode
|
|
41
|
+
isOneVsMany = true;
|
|
42
|
+
compareTexts = opts.against;
|
|
43
|
+
} else if (opts.file2) {
|
|
44
|
+
compareTexts = [fs.readFileSync(opts.file2, 'utf-8').trim()];
|
|
45
|
+
} else if (texts.length >= 2) {
|
|
46
|
+
compareTexts = [texts[1]];
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Validate inputs
|
|
50
|
+
if (!textA) {
|
|
51
|
+
console.error(ui.error('No input text provided. Provide two texts, use --file1/--file2, or use --against.'));
|
|
52
|
+
process.exit(1);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (compareTexts.length === 0) {
|
|
56
|
+
console.error(ui.error('Need at least two texts to compare. Provide a second text, --file2, or --against.'));
|
|
57
|
+
process.exit(1);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Batch all texts into one API call
|
|
61
|
+
const allTexts = [textA, ...compareTexts];
|
|
62
|
+
|
|
63
|
+
const useSpinner = !opts.json && !opts.quiet;
|
|
64
|
+
let spin;
|
|
65
|
+
if (useSpinner) {
|
|
66
|
+
spin = ui.spinner('Computing similarity...');
|
|
67
|
+
spin.start();
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const embeddingOpts = {
|
|
71
|
+
model: opts.model,
|
|
72
|
+
};
|
|
73
|
+
if (opts.dimensions) {
|
|
74
|
+
embeddingOpts.dimensions = opts.dimensions;
|
|
75
|
+
}
|
|
76
|
+
// Don't set inputType — we're comparing directly, not query/document
|
|
77
|
+
|
|
78
|
+
const result = await generateEmbeddings(allTexts, embeddingOpts);
|
|
79
|
+
|
|
80
|
+
if (spin) spin.stop();
|
|
81
|
+
|
|
82
|
+
const embeddings = result.data.map(d => d.embedding);
|
|
83
|
+
const tokens = result.usage?.total_tokens || 0;
|
|
84
|
+
const model = result.model || opts.model;
|
|
85
|
+
|
|
86
|
+
const refEmbedding = embeddings[0];
|
|
87
|
+
|
|
88
|
+
if (!isOneVsMany && compareTexts.length === 1) {
|
|
89
|
+
// Two-text comparison
|
|
90
|
+
const sim = cosineSimilarity(refEmbedding, embeddings[1]);
|
|
91
|
+
|
|
92
|
+
if (opts.json) {
|
|
93
|
+
console.log(JSON.stringify({
|
|
94
|
+
similarity: sim,
|
|
95
|
+
metric: 'cosine',
|
|
96
|
+
textA,
|
|
97
|
+
textB: compareTexts[0],
|
|
98
|
+
model,
|
|
99
|
+
tokens,
|
|
100
|
+
}, null, 2));
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if (opts.quiet) {
|
|
105
|
+
console.log(sim.toFixed(6));
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
console.log('');
|
|
110
|
+
console.log(` Similarity: ${ui.score(sim)} (cosine)`);
|
|
111
|
+
console.log('');
|
|
112
|
+
console.log(ui.label('Text A', `"${truncate(textA, 70)}"`));
|
|
113
|
+
console.log(ui.label('Text B', `"${truncate(compareTexts[0], 70)}"`));
|
|
114
|
+
console.log(ui.label('Model', ui.cyan(model)));
|
|
115
|
+
console.log(ui.label('Tokens', ui.dim(String(tokens))));
|
|
116
|
+
console.log('');
|
|
117
|
+
} else {
|
|
118
|
+
// One-vs-many comparison
|
|
119
|
+
const results = compareTexts.map((text, i) => ({
|
|
120
|
+
text,
|
|
121
|
+
similarity: cosineSimilarity(refEmbedding, embeddings[i + 1]),
|
|
122
|
+
}));
|
|
123
|
+
|
|
124
|
+
// Sort by similarity descending
|
|
125
|
+
results.sort((a, b) => b.similarity - a.similarity);
|
|
126
|
+
|
|
127
|
+
if (opts.json) {
|
|
128
|
+
console.log(JSON.stringify({
|
|
129
|
+
query: textA,
|
|
130
|
+
results,
|
|
131
|
+
model,
|
|
132
|
+
tokens,
|
|
133
|
+
}, null, 2));
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (opts.quiet) {
|
|
138
|
+
for (const r of results) {
|
|
139
|
+
console.log(`${r.similarity.toFixed(6)}\t"${truncate(r.text, 60)}"`);
|
|
140
|
+
}
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
console.log('');
|
|
145
|
+
console.log(` Query: ${ui.cyan(`"${truncate(textA, 60)}"`)}`);
|
|
146
|
+
console.log(` Model: ${ui.cyan(model)}`);
|
|
147
|
+
console.log('');
|
|
148
|
+
|
|
149
|
+
for (const r of results) {
|
|
150
|
+
console.log(` ${ui.score(r.similarity)} "${truncate(r.text, 60)}"`);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
console.log('');
|
|
154
|
+
console.log(` ${ui.dim(`${results.length} comparisons, ${tokens} tokens`)}`);
|
|
155
|
+
console.log('');
|
|
156
|
+
}
|
|
157
|
+
} catch (err) {
|
|
158
|
+
console.error(ui.error(err.message));
|
|
159
|
+
process.exit(1);
|
|
160
|
+
}
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Truncate a string to maxLen, appending '...' if truncated.
|
|
166
|
+
* @param {string} str
|
|
167
|
+
* @param {number} maxLen
|
|
168
|
+
* @returns {string}
|
|
169
|
+
*/
|
|
170
|
+
function truncate(str, maxLen) {
|
|
171
|
+
if (str.length <= maxLen) return str;
|
|
172
|
+
return str.substring(0, maxLen) + '...';
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
module.exports = { registerSimilarity };
|
package/src/lib/api.js
CHANGED
|
@@ -1,8 +1,32 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const
|
|
3
|
+
const ATLAS_API_BASE = 'https://ai.mongodb.com/v1';
|
|
4
|
+
const VOYAGE_API_BASE = 'https://api.voyageai.com/v1';
|
|
4
5
|
const MAX_RETRIES = 3;
|
|
5
6
|
|
|
7
|
+
/**
|
|
8
|
+
* Resolve the API base URL.
|
|
9
|
+
* Priority: VOYAGE_API_BASE env → config baseUrl → auto-detect from key prefix.
|
|
10
|
+
* Keys starting with 'pa-' that work on Voyage platform use VOYAGE_API_BASE.
|
|
11
|
+
* @returns {string}
|
|
12
|
+
*/
|
|
13
|
+
function getApiBase() {
|
|
14
|
+
const { getConfigValue } = require('./config');
|
|
15
|
+
|
|
16
|
+
// Explicit override wins
|
|
17
|
+
const envBase = process.env.VOYAGE_API_BASE;
|
|
18
|
+
if (envBase) return envBase.replace(/\/+$/, '');
|
|
19
|
+
|
|
20
|
+
const configBase = getConfigValue('baseUrl');
|
|
21
|
+
if (configBase) return configBase.replace(/\/+$/, '');
|
|
22
|
+
|
|
23
|
+
// Default to Atlas endpoint
|
|
24
|
+
return ATLAS_API_BASE;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// Legacy export for backward compat
|
|
28
|
+
const API_BASE = ATLAS_API_BASE;
|
|
29
|
+
|
|
6
30
|
/**
|
|
7
31
|
* Get the Voyage API key or exit with a helpful error.
|
|
8
32
|
* Checks: env var → config file.
|
|
@@ -18,6 +42,7 @@ function requireApiKey() {
|
|
|
18
42
|
console.error('Option 2: vai config set api-key <your-key>');
|
|
19
43
|
console.error('');
|
|
20
44
|
console.error('Get one from MongoDB Atlas → AI Models → Create model API key');
|
|
45
|
+
console.error(' or Voyage AI platform → Dashboard → API Keys');
|
|
21
46
|
process.exit(1);
|
|
22
47
|
}
|
|
23
48
|
return key;
|
|
@@ -40,7 +65,8 @@ function sleep(ms) {
|
|
|
40
65
|
*/
|
|
41
66
|
async function apiRequest(endpoint, body) {
|
|
42
67
|
const apiKey = requireApiKey();
|
|
43
|
-
const
|
|
68
|
+
const base = getApiBase();
|
|
69
|
+
const url = `${base}${endpoint}`;
|
|
44
70
|
|
|
45
71
|
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
46
72
|
const response = await fetch(url, {
|
|
@@ -69,6 +95,23 @@ async function apiRequest(endpoint, body) {
|
|
|
69
95
|
errorDetail = await response.text();
|
|
70
96
|
}
|
|
71
97
|
console.error(`API Error (${response.status}): ${errorDetail}`);
|
|
98
|
+
|
|
99
|
+
// Help users diagnose endpoint mismatch
|
|
100
|
+
if (response.status === 403 && base === ATLAS_API_BASE) {
|
|
101
|
+
console.error('');
|
|
102
|
+
console.error('Hint: 403 on ai.mongodb.com often means your key is for the Voyage AI');
|
|
103
|
+
console.error('platform, not MongoDB Atlas. Try switching the base URL:');
|
|
104
|
+
console.error('');
|
|
105
|
+
console.error(' vai config set base-url https://api.voyageai.com/v1/');
|
|
106
|
+
console.error('');
|
|
107
|
+
console.error('Or set VOYAGE_API_BASE=https://api.voyageai.com/v1/ in your environment.');
|
|
108
|
+
} else if (response.status === 401 && base === VOYAGE_API_BASE) {
|
|
109
|
+
console.error('');
|
|
110
|
+
console.error('Hint: 401 on api.voyageai.com may mean your key is an Atlas AI key.');
|
|
111
|
+
console.error('Try switching back:');
|
|
112
|
+
console.error('');
|
|
113
|
+
console.error(' vai config set base-url https://ai.mongodb.com/v1/');
|
|
114
|
+
}
|
|
72
115
|
process.exit(1);
|
|
73
116
|
}
|
|
74
117
|
|
|
@@ -105,6 +148,9 @@ async function generateEmbeddings(texts, options = {}) {
|
|
|
105
148
|
|
|
106
149
|
module.exports = {
|
|
107
150
|
API_BASE,
|
|
151
|
+
ATLAS_API_BASE,
|
|
152
|
+
VOYAGE_API_BASE,
|
|
153
|
+
getApiBase,
|
|
108
154
|
requireApiKey,
|
|
109
155
|
apiRequest,
|
|
110
156
|
generateEmbeddings,
|
package/src/lib/banner.js
CHANGED
package/src/lib/catalog.js
CHANGED
|
@@ -24,16 +24,16 @@ function getDefaultDimensions() {
|
|
|
24
24
|
|
|
25
25
|
/** @type {Array<{name: string, type: string, context: string, dimensions: string, price: string, bestFor: string}>} */
|
|
26
26
|
const MODEL_CATALOG = [
|
|
27
|
-
{ name: 'voyage-4-large', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.12/1M tokens', bestFor: 'Best quality, multilingual' },
|
|
28
|
-
{ name: 'voyage-4', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.06/1M tokens', bestFor: 'Balanced quality/perf' },
|
|
29
|
-
{ name: 'voyage-4-lite', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.02/1M tokens', bestFor: 'Lowest cost' },
|
|
30
|
-
{ name: 'voyage-code-3', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', bestFor: 'Code retrieval' },
|
|
31
|
-
{ name: 'voyage-finance-2', type: 'embedding', context: '32K', dimensions: '1024', price: '$0.12/1M tokens', bestFor: 'Finance' },
|
|
32
|
-
{ name: 'voyage-law-2', type: 'embedding', context: '16K', dimensions: '1024', price: '$0.12/1M tokens', bestFor: 'Legal' },
|
|
33
|
-
{ name: 'voyage-context-3', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', bestFor: 'Contextualized chunks' },
|
|
34
|
-
{ name: 'voyage-multimodal-3.5', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.12/M + $0.60/B px', bestFor: 'Text + images + video' },
|
|
35
|
-
{ name: 'rerank-2.5', type: 'reranking', context: '32K', dimensions: '—', price: '$0.05/1M tokens', bestFor: 'Best quality reranking' },
|
|
36
|
-
{ name: 'rerank-2.5-lite', type: 'reranking', context: '32K', dimensions: '—', price: '$0.02/1M tokens', bestFor: 'Fast reranking' },
|
|
27
|
+
{ name: 'voyage-4-large', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.12/1M tokens', bestFor: 'Best quality, multilingual', shortFor: 'Best quality' },
|
|
28
|
+
{ name: 'voyage-4', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.06/1M tokens', bestFor: 'Balanced quality/perf', shortFor: 'Balanced' },
|
|
29
|
+
{ name: 'voyage-4-lite', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.02/1M tokens', bestFor: 'Lowest cost', shortFor: 'Budget' },
|
|
30
|
+
{ name: 'voyage-code-3', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', bestFor: 'Code retrieval', shortFor: 'Code' },
|
|
31
|
+
{ name: 'voyage-finance-2', type: 'embedding', context: '32K', dimensions: '1024', price: '$0.12/1M tokens', bestFor: 'Finance', shortFor: 'Finance' },
|
|
32
|
+
{ name: 'voyage-law-2', type: 'embedding', context: '16K', dimensions: '1024', price: '$0.12/1M tokens', bestFor: 'Legal', shortFor: 'Legal' },
|
|
33
|
+
{ name: 'voyage-context-3', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', bestFor: 'Contextualized chunks', shortFor: 'Context chunks' },
|
|
34
|
+
{ name: 'voyage-multimodal-3.5', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.12/M + $0.60/B px', bestFor: 'Text + images + video', shortFor: 'Multimodal' },
|
|
35
|
+
{ name: 'rerank-2.5', type: 'reranking', context: '32K', dimensions: '—', price: '$0.05/1M tokens', bestFor: 'Best quality reranking', shortFor: 'Best reranker' },
|
|
36
|
+
{ name: 'rerank-2.5-lite', type: 'reranking', context: '32K', dimensions: '—', price: '$0.02/1M tokens', bestFor: 'Fast reranking', shortFor: 'Fast reranker' },
|
|
37
37
|
];
|
|
38
38
|
|
|
39
39
|
module.exports = {
|