voyageai-cli 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Michael Lynn
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,193 @@
1
+ # voyageai-cli
2
+
3
+ CLI for [Voyage AI](https://www.mongodb.com/docs/voyageai/) embeddings, reranking, and [MongoDB Atlas Vector Search](https://www.mongodb.com/docs/atlas/atlas-vector-search/). Pure Node.js — no Python required.
4
+
5
+ Generate embeddings, rerank search results, store vectors in Atlas, and run semantic search — all from the command line.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ npm install -g voyageai-cli
11
+ ```
12
+
13
+ ## Quick Start
14
+
15
+ ```bash
16
+ # Set your API key (get one from MongoDB Atlas → AI Models)
17
+ export VOYAGE_API_KEY="your-key"
18
+
19
+ # Generate an embedding
20
+ vai embed "What is MongoDB?"
21
+
22
+ # List available models
23
+ vai models
24
+ ```
25
+
26
+ ## Commands
27
+
28
+ ### `vai embed` — Generate embeddings
29
+
30
+ ```bash
31
+ # Single text
32
+ vai embed "Hello, world"
33
+
34
+ # With options
35
+ vai embed "search query" --model voyage-4-large --input-type query --dimensions 512
36
+
37
+ # From a file
38
+ vai embed --file document.txt --input-type document
39
+
40
+ # Bulk from stdin (newline-delimited)
41
+ cat texts.txt | vai embed
42
+
43
+ # Raw array output
44
+ vai embed "hello" --output-format array
45
+ ```
46
+
47
+ ### `vai rerank` — Rerank documents by relevance
48
+
49
+ ```bash
50
+ # Inline documents
51
+ vai rerank --query "database performance" \
52
+ --documents "MongoDB is fast" "Redis is cached" "SQL is relational"
53
+
54
+ # From a file (JSON array or newline-delimited)
55
+ vai rerank --query "best database" --documents-file candidates.json --top-k 3
56
+
57
+ # Different model
58
+ vai rerank --query "query" --documents "doc1" "doc2" --model rerank-2.5-lite
59
+ ```
60
+
61
+ ### `vai store` — Embed and insert into MongoDB Atlas
62
+
63
+ Requires `MONGODB_URI` environment variable.
64
+
65
+ ```bash
66
+ # Single document with metadata
67
+ vai store --db myapp --collection docs --field embedding \
68
+ --text "MongoDB Atlas is a cloud database" \
69
+ --metadata '{"source": "docs", "category": "product"}'
70
+
71
+ # From a file
72
+ vai store --db myapp --collection docs --field embedding \
73
+ --file article.txt
74
+
75
+ # Batch from JSONL (one {"text": "...", "metadata": {...}} per line)
76
+ vai store --db myapp --collection docs --field embedding \
77
+ --file documents.jsonl
78
+ ```
79
+
80
+ ### `vai search` — Vector similarity search
81
+
82
+ Requires `MONGODB_URI` environment variable.
83
+
84
+ ```bash
85
+ # Basic search
86
+ vai search --query "cloud database" \
87
+ --db myapp --collection docs \
88
+ --index vector_index --field embedding
89
+
90
+ # With pre-filter and limit
91
+ vai search --query "performance tuning" \
92
+ --db myapp --collection docs \
93
+ --index vector_index --field embedding \
94
+ --filter '{"category": "guides"}' --limit 5
95
+ ```
96
+
97
+ ### `vai index` — Manage Atlas Vector Search indexes
98
+
99
+ Requires `MONGODB_URI` environment variable.
100
+
101
+ ```bash
102
+ # Create an index
103
+ vai index create --db myapp --collection docs --field embedding \
104
+ --dimensions 1024 --similarity cosine --index-name my_index
105
+
106
+ # List indexes
107
+ vai index list --db myapp --collection docs
108
+
109
+ # Delete an index
110
+ vai index delete --db myapp --collection docs --index-name my_index
111
+ ```
112
+
113
+ ### `vai models` — List available models
114
+
115
+ ```bash
116
+ # All models
117
+ vai models
118
+
119
+ # Filter by type
120
+ vai models --type embedding
121
+ vai models --type reranking
122
+ ```
123
+
124
+ ## Full Pipeline Example
125
+
126
+ ```bash
127
+ export VOYAGE_API_KEY="your-key"
128
+ export MONGODB_URI="mongodb+srv://user:pass@cluster.mongodb.net/"
129
+
130
+ # 1. Store documents with embeddings
131
+ vai store --db myapp --collection articles --field embedding \
132
+ --text "MongoDB Atlas provides a fully managed cloud database" \
133
+ --metadata '{"title": "Atlas Overview"}'
134
+
135
+ vai store --db myapp --collection articles --field embedding \
136
+ --text "Vector search enables semantic similarity matching" \
137
+ --metadata '{"title": "Vector Search Guide"}'
138
+
139
+ # 2. Create a vector search index
140
+ vai index create --db myapp --collection articles --field embedding \
141
+ --dimensions 1024 --similarity cosine --index-name article_search
142
+
143
+ # 3. Search (wait ~60s for index to build on small collections)
144
+ vai search --query "how does cloud database work" \
145
+ --db myapp --collection articles --index article_search --field embedding
146
+
147
+ # 4. Rerank for precision
148
+ vai rerank --query "how does cloud database work" \
149
+ --documents "MongoDB Atlas provides a fully managed cloud database" \
150
+ "Vector search enables semantic similarity matching"
151
+ ```
152
+
153
+ ## Environment Variables
154
+
155
+ | Variable | Required For | Description |
156
+ |----------|-------------|-------------|
157
+ | `VOYAGE_API_KEY` | embed, rerank, store, search | [Model API key](https://www.mongodb.com/docs/voyageai/management/api-keys/) from MongoDB Atlas |
158
+ | `MONGODB_URI` | store, search, index | MongoDB Atlas connection string |
159
+
160
+ ## Global Flags
161
+
162
+ All commands support:
163
+
164
+ | Flag | Description |
165
+ |------|-------------|
166
+ | `--json` | Machine-readable JSON output |
167
+ | `--quiet` | Suppress non-essential output |
168
+
169
+ ## Models
170
+
171
+ | Model | Type | Dimensions | Price/1M tokens | Best For |
172
+ |-------|------|-----------|----------------|----------|
173
+ | voyage-4-large | embedding | 1024 (default), 256-2048 | $0.12 | Best quality |
174
+ | voyage-4 | embedding | 1024 (default), 256-2048 | $0.06 | Balanced |
175
+ | voyage-4-lite | embedding | 1024 (default), 256-2048 | $0.02 | Lowest cost |
176
+ | voyage-code-3 | embedding | 1024 (default), 256-2048 | $0.18 | Code |
177
+ | voyage-finance-2 | embedding | 1024 | $0.12 | Finance |
178
+ | voyage-law-2 | embedding | 1024 | $0.12 | Legal |
179
+ | voyage-multimodal-3.5 | embedding | 1024 (default), 256-2048 | $0.12 + pixels | Text + images |
180
+ | rerank-2.5 | reranking | — | $0.05 | Best reranking |
181
+ | rerank-2.5-lite | reranking | — | $0.02 | Fast reranking |
182
+
183
+ Free tier: 200M tokens for most models. All Voyage 4 series models share the same embedding space.
184
+
185
+ ## Requirements
186
+
187
+ - Node.js 18+
188
+ - A [MongoDB Atlas](https://www.mongodb.com/atlas) account (free tier works)
189
+ - A [Voyage AI model API key](https://www.mongodb.com/docs/voyageai/management/api-keys/) (created in Atlas)
190
+
191
+ ## License
192
+
193
+ MIT
package/package.json ADDED
@@ -0,0 +1,37 @@
1
+ {
2
+ "name": "voyageai-cli",
3
+ "version": "1.1.0",
4
+ "description": "CLI for Voyage AI embeddings, reranking, and MongoDB Atlas Vector Search",
5
+ "bin": {
6
+ "vai": "./src/cli.js"
7
+ },
8
+ "keywords": [
9
+ "voyage-ai",
10
+ "voyageai",
11
+ "embeddings",
12
+ "vector-search",
13
+ "reranking",
14
+ "mongodb",
15
+ "atlas",
16
+ "semantic-search",
17
+ "rag",
18
+ "cli"
19
+ ],
20
+ "author": "Michael Lynn",
21
+ "license": "MIT",
22
+ "repository": {
23
+ "type": "git",
24
+ "url": "https://github.com/mrlynn/voyageai-cli"
25
+ },
26
+ "homepage": "https://github.com/mrlynn/voyageai-cli#readme",
27
+ "bugs": {
28
+ "url": "https://github.com/mrlynn/voyageai-cli/issues"
29
+ },
30
+ "engines": {
31
+ "node": ">=18.0.0"
32
+ },
33
+ "dependencies": {
34
+ "commander": "^12.0.0",
35
+ "mongodb": "^6.0.0"
36
+ }
37
+ }
package/src/cli.js ADDED
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ const { program } = require('commander');
5
+ const { registerEmbed } = require('./commands/embed');
6
+ const { registerRerank } = require('./commands/rerank');
7
+ const { registerStore } = require('./commands/store');
8
+ const { registerSearch } = require('./commands/search');
9
+ const { registerIndex } = require('./commands/index');
10
+ const { registerModels } = require('./commands/models');
11
+
12
+ program
13
+ .name('vai')
14
+ .description('Voyage AI embeddings, reranking, and Atlas Vector Search CLI')
15
+ .version('1.0.0');
16
+
17
+ registerEmbed(program);
18
+ registerRerank(program);
19
+ registerStore(program);
20
+ registerSearch(program);
21
+ registerIndex(program);
22
+ registerModels(program);
23
+
24
+ program.parse();
@@ -0,0 +1,68 @@
1
+ 'use strict';
2
+
3
+ const { DEFAULT_EMBED_MODEL } = require('../lib/catalog');
4
+ const { generateEmbeddings } = require('../lib/api');
5
+ const { resolveTextInput } = require('../lib/input');
6
+
7
+ /**
8
+ * Register the embed command on a Commander program.
9
+ * @param {import('commander').Command} program
10
+ */
11
+ function registerEmbed(program) {
12
+ program
13
+ .command('embed [text]')
14
+ .description('Generate embeddings for text')
15
+ .option('-m, --model <model>', 'Embedding model', DEFAULT_EMBED_MODEL)
16
+ .option('-t, --input-type <type>', 'Input type: query or document')
17
+ .option('-d, --dimensions <n>', 'Output dimensions', (v) => parseInt(v, 10))
18
+ .option('-f, --file <path>', 'Read text from file')
19
+ .option('-o, --output-format <format>', 'Output format: json or array', 'json')
20
+ .option('--json', 'Machine-readable JSON output')
21
+ .option('-q, --quiet', 'Suppress non-essential output')
22
+ .action(async (text, opts) => {
23
+ try {
24
+ const texts = await resolveTextInput(text, opts.file);
25
+
26
+ const result = await generateEmbeddings(texts, {
27
+ model: opts.model,
28
+ inputType: opts.inputType,
29
+ dimensions: opts.dimensions,
30
+ });
31
+
32
+ if (opts.outputFormat === 'array') {
33
+ if (result.data.length === 1) {
34
+ console.log(JSON.stringify(result.data[0].embedding));
35
+ } else {
36
+ console.log(JSON.stringify(result.data.map(d => d.embedding)));
37
+ }
38
+ return;
39
+ }
40
+
41
+ if (opts.json) {
42
+ console.log(JSON.stringify(result, null, 2));
43
+ return;
44
+ }
45
+
46
+ // Friendly output
47
+ if (!opts.quiet) {
48
+ console.log(`Model: ${result.model}`);
49
+ console.log(`Texts: ${result.data.length}`);
50
+ if (result.usage) {
51
+ console.log(`Tokens: ${result.usage.total_tokens}`);
52
+ }
53
+ console.log(`Dimensions: ${result.data[0]?.embedding?.length || 'N/A'}`);
54
+ console.log('');
55
+ }
56
+
57
+ for (const item of result.data) {
58
+ const preview = item.embedding.slice(0, 5).map(v => v.toFixed(6)).join(', ');
59
+ console.log(`[${item.index}] [${preview}, ...] (${item.embedding.length} dims)`);
60
+ }
61
+ } catch (err) {
62
+ console.error(`Error: ${err.message}`);
63
+ process.exit(1);
64
+ }
65
+ });
66
+ }
67
+
68
+ module.exports = { registerEmbed };
@@ -0,0 +1,156 @@
1
+ 'use strict';
2
+
3
+ const { DEFAULT_DIMENSIONS } = require('../lib/catalog');
4
+ const { getMongoCollection } = require('../lib/mongo');
5
+
6
+ /**
7
+ * Register the index command (with create, list, delete subcommands) on a Commander program.
8
+ * @param {import('commander').Command} program
9
+ */
10
+ function registerIndex(program) {
11
+ const indexCmd = program
12
+ .command('index')
13
+ .description('Manage Atlas Vector Search indexes');
14
+
15
+ // ── index create ──
16
+ indexCmd
17
+ .command('create')
18
+ .description('Create a vector search index')
19
+ .requiredOption('--db <database>', 'Database name')
20
+ .requiredOption('--collection <name>', 'Collection name')
21
+ .requiredOption('--field <name>', 'Embedding field name')
22
+ .option('-d, --dimensions <n>', 'Vector dimensions', (v) => parseInt(v, 10), DEFAULT_DIMENSIONS)
23
+ .option('-s, --similarity <type>', 'Similarity function: cosine, dotProduct, euclidean', 'cosine')
24
+ .option('-n, --index-name <name>', 'Index name', 'default')
25
+ .option('--json', 'Machine-readable JSON output')
26
+ .option('-q, --quiet', 'Suppress non-essential output')
27
+ .action(async (opts) => {
28
+ let client;
29
+ try {
30
+ const { client: c, collection } = await getMongoCollection(opts.db, opts.collection);
31
+ client = c;
32
+
33
+ const indexDef = {
34
+ name: opts.indexName,
35
+ type: 'vectorSearch',
36
+ definition: {
37
+ fields: [
38
+ {
39
+ type: 'vector',
40
+ path: opts.field,
41
+ numDimensions: parseInt(opts.dimensions, 10) || DEFAULT_DIMENSIONS,
42
+ similarity: opts.similarity,
43
+ },
44
+ ],
45
+ },
46
+ };
47
+
48
+ const result = await collection.createSearchIndex(indexDef);
49
+
50
+ if (opts.json) {
51
+ console.log(JSON.stringify({ indexName: result, definition: indexDef }, null, 2));
52
+ } else if (!opts.quiet) {
53
+ console.log(`✓ Vector search index created: "${result}"`);
54
+ console.log(` Database: ${opts.db}`);
55
+ console.log(` Collection: ${opts.collection}`);
56
+ console.log(` Field: ${opts.field}`);
57
+ console.log(` Dimensions: ${opts.dimensions}`);
58
+ console.log(` Similarity: ${opts.similarity}`);
59
+ console.log('');
60
+ console.log('Note: Index may take a few minutes to become ready.');
61
+ }
62
+ } catch (err) {
63
+ if (err.message && err.message.includes('already exists')) {
64
+ console.error(`Error: Index "${opts.indexName}" already exists on ${opts.db}.${opts.collection}`);
65
+ console.error('Use a different --index-name or delete the existing index first.');
66
+ } else {
67
+ console.error(`Error: ${err.message}`);
68
+ }
69
+ process.exit(1);
70
+ } finally {
71
+ if (client) await client.close();
72
+ }
73
+ });
74
+
75
+ // ── index list ──
76
+ indexCmd
77
+ .command('list')
78
+ .description('List all search indexes on a collection')
79
+ .requiredOption('--db <database>', 'Database name')
80
+ .requiredOption('--collection <name>', 'Collection name')
81
+ .option('--json', 'Machine-readable JSON output')
82
+ .option('-q, --quiet', 'Suppress non-essential output')
83
+ .action(async (opts) => {
84
+ let client;
85
+ try {
86
+ const { client: c, collection } = await getMongoCollection(opts.db, opts.collection);
87
+ client = c;
88
+
89
+ const indexes = await collection.listSearchIndexes().toArray();
90
+
91
+ if (opts.json) {
92
+ console.log(JSON.stringify(indexes, null, 2));
93
+ return;
94
+ }
95
+
96
+ if (indexes.length === 0) {
97
+ console.log(`No search indexes found on ${opts.db}.${opts.collection}`);
98
+ return;
99
+ }
100
+
101
+ if (!opts.quiet) {
102
+ console.log(`Search indexes on ${opts.db}.${opts.collection}:`);
103
+ console.log('');
104
+ }
105
+
106
+ for (const idx of indexes) {
107
+ console.log(` Name: ${idx.name}`);
108
+ console.log(` Type: ${idx.type || 'N/A'}`);
109
+ console.log(` Status: ${idx.status || 'N/A'}`);
110
+ if (idx.latestDefinition) {
111
+ console.log(` Fields: ${JSON.stringify(idx.latestDefinition.fields || [])}`);
112
+ }
113
+ console.log('');
114
+ }
115
+ } catch (err) {
116
+ console.error(`Error: ${err.message}`);
117
+ process.exit(1);
118
+ } finally {
119
+ if (client) await client.close();
120
+ }
121
+ });
122
+
123
+ // ── index delete ──
124
+ indexCmd
125
+ .command('delete')
126
+ .description('Drop a search index')
127
+ .requiredOption('--db <database>', 'Database name')
128
+ .requiredOption('--collection <name>', 'Collection name')
129
+ .requiredOption('-n, --index-name <name>', 'Index name to delete')
130
+ .option('--json', 'Machine-readable JSON output')
131
+ .option('-q, --quiet', 'Suppress non-essential output')
132
+ .action(async (opts) => {
133
+ let client;
134
+ try {
135
+ const { client: c, collection } = await getMongoCollection(opts.db, opts.collection);
136
+ client = c;
137
+
138
+ await collection.dropSearchIndex(opts.indexName);
139
+
140
+ if (opts.json) {
141
+ console.log(JSON.stringify({ dropped: opts.indexName }, null, 2));
142
+ } else if (!opts.quiet) {
143
+ console.log(`✓ Dropped search index: "${opts.indexName}"`);
144
+ console.log(` Database: ${opts.db}`);
145
+ console.log(` Collection: ${opts.collection}`);
146
+ }
147
+ } catch (err) {
148
+ console.error(`Error: ${err.message}`);
149
+ process.exit(1);
150
+ } finally {
151
+ if (client) await client.close();
152
+ }
153
+ });
154
+ }
155
+
156
+ module.exports = { registerIndex };
@@ -0,0 +1,54 @@
1
+ 'use strict';
2
+
3
+ const { MODEL_CATALOG } = require('../lib/catalog');
4
+ const { API_BASE } = require('../lib/api');
5
+ const { formatTable } = require('../lib/format');
6
+
7
+ /**
8
+ * Register the models command on a Commander program.
9
+ * @param {import('commander').Command} program
10
+ */
11
+ function registerModels(program) {
12
+ program
13
+ .command('models')
14
+ .description('List available Voyage AI models')
15
+ .option('-t, --type <type>', 'Filter by type: embedding, reranking, or all', 'all')
16
+ .option('--json', 'Machine-readable JSON output')
17
+ .option('-q, --quiet', 'Suppress non-essential output')
18
+ .action((opts) => {
19
+ let models = MODEL_CATALOG;
20
+
21
+ if (opts.type !== 'all') {
22
+ models = models.filter(m => m.type === opts.type);
23
+ }
24
+
25
+ if (opts.json) {
26
+ console.log(JSON.stringify(models, null, 2));
27
+ return;
28
+ }
29
+
30
+ if (models.length === 0) {
31
+ console.log(`No models found for type: ${opts.type}`);
32
+ return;
33
+ }
34
+
35
+ if (!opts.quiet) {
36
+ console.log('Voyage AI Models');
37
+ console.log(`(via MongoDB AI API — ${API_BASE})`);
38
+ console.log('');
39
+ }
40
+
41
+ const headers = ['Model', 'Type', 'Context', 'Dimensions', 'Price', 'Best For'];
42
+ const rows = models.map(m => [m.name, m.type, m.context, m.dimensions, m.price, m.bestFor]);
43
+
44
+ console.log(formatTable(headers, rows));
45
+
46
+ if (!opts.quiet) {
47
+ console.log('');
48
+ console.log('Free tier: 200M tokens (most models), 50M (domain-specific)');
49
+ console.log('All 4-series models share the same embedding space.');
50
+ }
51
+ });
52
+ }
53
+
54
+ module.exports = { registerModels };
@@ -0,0 +1,110 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const { DEFAULT_RERANK_MODEL } = require('../lib/catalog');
5
+ const { apiRequest } = require('../lib/api');
6
+
7
+ /**
8
+ * Register the rerank command on a Commander program.
9
+ * @param {import('commander').Command} program
10
+ */
11
+ function registerRerank(program) {
12
+ program
13
+ .command('rerank')
14
+ .description('Rerank documents against a query')
15
+ .requiredOption('--query <text>', 'Search query')
16
+ .option('--documents <docs...>', 'Documents to rerank')
17
+ .option('--documents-file <path>', 'File with documents (JSON array or newline-delimited)')
18
+ .option('-m, --model <model>', 'Reranking model', DEFAULT_RERANK_MODEL)
19
+ .option('-k, --top-k <n>', 'Return top K results', (v) => parseInt(v, 10))
20
+ .option('--json', 'Machine-readable JSON output')
21
+ .option('-q, --quiet', 'Suppress non-essential output')
22
+ .action(async (opts) => {
23
+ try {
24
+ let documents = opts.documents;
25
+
26
+ if (opts.documentsFile) {
27
+ const content = fs.readFileSync(opts.documentsFile, 'utf-8').trim();
28
+ try {
29
+ const parsed = JSON.parse(content);
30
+ if (Array.isArray(parsed)) {
31
+ documents = parsed.map(item => {
32
+ if (typeof item === 'string') return item;
33
+ if (item.text) return item.text;
34
+ return JSON.stringify(item);
35
+ });
36
+ } else {
37
+ documents = [typeof parsed === 'string' ? parsed : JSON.stringify(parsed)];
38
+ }
39
+ } catch {
40
+ documents = content.split('\n').filter(line => line.trim());
41
+ }
42
+ }
43
+
44
+ // Also support stdin for documents
45
+ if (!documents && !process.stdin.isTTY) {
46
+ const chunks = [];
47
+ for await (const chunk of process.stdin) {
48
+ chunks.push(chunk);
49
+ }
50
+ const input = Buffer.concat(chunks).toString('utf-8').trim();
51
+ try {
52
+ const parsed = JSON.parse(input);
53
+ if (Array.isArray(parsed)) {
54
+ documents = parsed.map(item => {
55
+ if (typeof item === 'string') return item;
56
+ if (item.text) return item.text;
57
+ return JSON.stringify(item);
58
+ });
59
+ }
60
+ } catch {
61
+ documents = input.split('\n').filter(line => line.trim());
62
+ }
63
+ }
64
+
65
+ if (!documents || documents.length === 0) {
66
+ console.error('Error: No documents provided. Use --documents, --documents-file, or pipe via stdin.');
67
+ process.exit(1);
68
+ }
69
+
70
+ const body = {
71
+ query: opts.query,
72
+ documents,
73
+ model: opts.model,
74
+ };
75
+ if (opts.topK) {
76
+ body.top_k = opts.topK;
77
+ }
78
+
79
+ const result = await apiRequest('/rerank', body);
80
+
81
+ if (opts.json) {
82
+ console.log(JSON.stringify(result, null, 2));
83
+ return;
84
+ }
85
+
86
+ if (!opts.quiet) {
87
+ console.log(`Model: ${result.model}`);
88
+ console.log(`Query: "${opts.query}"`);
89
+ console.log(`Results: ${result.data?.length || 0}`);
90
+ if (result.usage) {
91
+ console.log(`Tokens: ${result.usage.total_tokens}`);
92
+ }
93
+ console.log('');
94
+ }
95
+
96
+ if (result.data) {
97
+ for (const item of result.data) {
98
+ const docPreview = documents[item.index].substring(0, 80);
99
+ const ellipsis = documents[item.index].length > 80 ? '...' : '';
100
+ console.log(`[${item.index}] Score: ${item.relevance_score.toFixed(6)} "${docPreview}${ellipsis}"`);
101
+ }
102
+ }
103
+ } catch (err) {
104
+ console.error(`Error: ${err.message}`);
105
+ process.exit(1);
106
+ }
107
+ });
108
+ }
109
+
110
+ module.exports = { registerRerank };
@@ -0,0 +1,111 @@
1
+ 'use strict';
2
+
3
+ const { DEFAULT_EMBED_MODEL } = require('../lib/catalog');
4
+ const { generateEmbeddings } = require('../lib/api');
5
+ const { getMongoCollection } = require('../lib/mongo');
6
+
7
+ /**
8
+ * Register the search command on a Commander program.
9
+ * @param {import('commander').Command} program
10
+ */
11
+ function registerSearch(program) {
12
+ program
13
+ .command('search')
14
+ .description('Vector search against Atlas collection')
15
+ .requiredOption('--query <text>', 'Search query text')
16
+ .requiredOption('--db <database>', 'Database name')
17
+ .requiredOption('--collection <name>', 'Collection name')
18
+ .requiredOption('--index <name>', 'Vector search index name')
19
+ .requiredOption('--field <name>', 'Embedding field name')
20
+ .option('-m, --model <model>', 'Embedding model', DEFAULT_EMBED_MODEL)
21
+ .option('--input-type <type>', 'Input type for query embedding', 'query')
22
+ .option('-d, --dimensions <n>', 'Output dimensions', (v) => parseInt(v, 10))
23
+ .option('-l, --limit <n>', 'Maximum results', (v) => parseInt(v, 10), 10)
24
+ .option('--min-score <n>', 'Minimum similarity score', parseFloat)
25
+ .option('--num-candidates <n>', 'Number of candidates for ANN search', (v) => parseInt(v, 10))
26
+ .option('--filter <json>', 'Pre-filter JSON for $vectorSearch (e.g. \'{"category": "docs"}\')')
27
+ .option('--json', 'Machine-readable JSON output')
28
+ .option('-q, --quiet', 'Suppress non-essential output')
29
+ .action(async (opts) => {
30
+ let client;
31
+ try {
32
+ const embedResult = await generateEmbeddings([opts.query], {
33
+ model: opts.model,
34
+ inputType: opts.inputType,
35
+ dimensions: opts.dimensions,
36
+ });
37
+
38
+ const queryVector = embedResult.data[0].embedding;
39
+ const numCandidates = opts.numCandidates || Math.min(opts.limit * 15, 10000);
40
+
41
+ const { client: c, collection } = await getMongoCollection(opts.db, opts.collection);
42
+ client = c;
43
+
44
+ const vectorSearchStage = {
45
+ index: opts.index,
46
+ path: opts.field,
47
+ queryVector,
48
+ numCandidates,
49
+ limit: opts.limit,
50
+ };
51
+
52
+ // Add pre-filter if provided
53
+ if (opts.filter) {
54
+ try {
55
+ vectorSearchStage.filter = JSON.parse(opts.filter);
56
+ } catch (e) {
57
+ console.error('Error: Invalid filter JSON. Ensure it is valid JSON.');
58
+ process.exit(1);
59
+ }
60
+ }
61
+
62
+ const pipeline = [
63
+ { $vectorSearch: vectorSearchStage },
64
+ { $addFields: { score: { $meta: 'vectorSearchScore' } } },
65
+ ...(opts.minScore ? [{ $match: { score: { $gte: opts.minScore } } }] : []),
66
+ ];
67
+
68
+ const results = await collection.aggregate(pipeline).toArray();
69
+
70
+ const cleanResults = results.map(doc => {
71
+ const clean = { ...doc };
72
+ delete clean[opts.field];
73
+ return clean;
74
+ });
75
+
76
+ if (opts.json) {
77
+ console.log(JSON.stringify(cleanResults, null, 2));
78
+ return;
79
+ }
80
+
81
+ if (!opts.quiet) {
82
+ console.log(`Query: "${opts.query}"`);
83
+ console.log(`Results: ${cleanResults.length}`);
84
+ console.log('');
85
+ }
86
+
87
+ if (cleanResults.length === 0) {
88
+ console.log('No results found.');
89
+ return;
90
+ }
91
+
92
+ for (let i = 0; i < cleanResults.length; i++) {
93
+ const doc = cleanResults[i];
94
+ const score = doc.score?.toFixed(6) || 'N/A';
95
+ console.log(`── Result ${i + 1} (score: ${score}) ──`);
96
+ const textPreview = doc.text ? doc.text.substring(0, 200) : 'No text field';
97
+ const ellipsis = doc.text && doc.text.length > 200 ? '...' : '';
98
+ console.log(` ${textPreview}${ellipsis}`);
99
+ console.log(` _id: ${doc._id}`);
100
+ console.log('');
101
+ }
102
+ } catch (err) {
103
+ console.error(`Error: ${err.message}`);
104
+ process.exit(1);
105
+ } finally {
106
+ if (client) await client.close();
107
+ }
108
+ });
109
+ }
110
+
111
+ module.exports = { registerSearch };
@@ -0,0 +1,186 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const { DEFAULT_EMBED_MODEL } = require('../lib/catalog');
5
+ const { generateEmbeddings } = require('../lib/api');
6
+ const { resolveTextInput } = require('../lib/input');
7
+ const { getMongoCollection } = require('../lib/mongo');
8
+
9
+ /**
10
+ * Register the store command on a Commander program.
11
+ * @param {import('commander').Command} program
12
+ */
13
+ function registerStore(program) {
14
+ program
15
+ .command('store')
16
+ .description('Embed text and store in MongoDB Atlas')
17
+ .requiredOption('--db <database>', 'Database name')
18
+ .requiredOption('--collection <name>', 'Collection name')
19
+ .requiredOption('--field <name>', 'Embedding field name')
20
+ .option('--text <text>', 'Text to embed and store')
21
+ .option('-f, --file <path>', 'File to embed and store (text file or .jsonl for batch mode)')
22
+ .option('-m, --model <model>', 'Embedding model', DEFAULT_EMBED_MODEL)
23
+ .option('--input-type <type>', 'Input type: query or document', 'document')
24
+ .option('-d, --dimensions <n>', 'Output dimensions', (v) => parseInt(v, 10))
25
+ .option('--metadata <json>', 'Additional metadata as JSON')
26
+ .option('--json', 'Machine-readable JSON output')
27
+ .option('-q, --quiet', 'Suppress non-essential output')
28
+ .action(async (opts) => {
29
+ let client;
30
+ try {
31
+ // Batch mode: .jsonl file
32
+ if (opts.file && opts.file.endsWith('.jsonl')) {
33
+ await handleBatchStore(opts);
34
+ return;
35
+ }
36
+
37
+ const texts = await resolveTextInput(opts.text, opts.file);
38
+ const textContent = texts[0];
39
+
40
+ const embedResult = await generateEmbeddings([textContent], {
41
+ model: opts.model,
42
+ inputType: opts.inputType,
43
+ dimensions: opts.dimensions,
44
+ });
45
+
46
+ const embedding = embedResult.data[0].embedding;
47
+
48
+ const doc = {
49
+ text: textContent,
50
+ [opts.field]: embedding,
51
+ model: opts.model || DEFAULT_EMBED_MODEL,
52
+ dimensions: embedding.length,
53
+ createdAt: new Date(),
54
+ };
55
+
56
+ if (opts.metadata) {
57
+ try {
58
+ const meta = JSON.parse(opts.metadata);
59
+ Object.assign(doc, meta);
60
+ } catch (e) {
61
+ console.error('Error: Invalid metadata JSON. Ensure it is valid JSON.');
62
+ process.exit(1);
63
+ }
64
+ }
65
+
66
+ const { client: c, collection } = await getMongoCollection(opts.db, opts.collection);
67
+ client = c;
68
+ const result = await collection.insertOne(doc);
69
+
70
+ if (opts.json) {
71
+ console.log(JSON.stringify({
72
+ insertedId: result.insertedId,
73
+ dimensions: embedding.length,
74
+ model: doc.model,
75
+ tokens: embedResult.usage?.total_tokens,
76
+ }, null, 2));
77
+ } else if (!opts.quiet) {
78
+ console.log(`✓ Stored document: ${result.insertedId}`);
79
+ console.log(` Database: ${opts.db}`);
80
+ console.log(` Collection: ${opts.collection}`);
81
+ console.log(` Field: ${opts.field}`);
82
+ console.log(` Dimensions: ${embedding.length}`);
83
+ console.log(` Model: ${doc.model}`);
84
+ if (embedResult.usage) {
85
+ console.log(` Tokens: ${embedResult.usage.total_tokens}`);
86
+ }
87
+ }
88
+ } catch (err) {
89
+ console.error(`Error: ${err.message}`);
90
+ process.exit(1);
91
+ } finally {
92
+ if (client) await client.close();
93
+ }
94
+ });
95
+ }
96
+
97
+ /**
98
+ * Handle batch store from a .jsonl file.
99
+ * Each line: {"text": "...", "metadata": {...}}
100
+ * @param {object} opts - Command options
101
+ */
102
+ async function handleBatchStore(opts) {
103
+ let client;
104
+ try {
105
+ const content = fs.readFileSync(opts.file, 'utf-8').trim();
106
+ const lines = content.split('\n').filter(line => line.trim());
107
+
108
+ if (lines.length === 0) {
109
+ console.error('Error: JSONL file is empty.');
110
+ process.exit(1);
111
+ }
112
+
113
+ const records = lines.map((line, i) => {
114
+ try {
115
+ return JSON.parse(line);
116
+ } catch (e) {
117
+ console.error(`Error: Invalid JSON on line ${i + 1}: ${e.message}`);
118
+ process.exit(1);
119
+ }
120
+ });
121
+
122
+ const texts = records.map(r => {
123
+ if (!r.text) {
124
+ console.error('Error: Each JSONL line must have a "text" field.');
125
+ process.exit(1);
126
+ }
127
+ return r.text;
128
+ });
129
+
130
+ if (!opts.quiet) {
131
+ console.log(`Embedding ${texts.length} documents...`);
132
+ }
133
+
134
+ const embedResult = await generateEmbeddings(texts, {
135
+ model: opts.model,
136
+ inputType: opts.inputType,
137
+ dimensions: opts.dimensions,
138
+ });
139
+
140
+ const docs = records.map((record, i) => {
141
+ const embedding = embedResult.data[i].embedding;
142
+ const doc = {
143
+ text: record.text,
144
+ [opts.field]: embedding,
145
+ model: opts.model || DEFAULT_EMBED_MODEL,
146
+ dimensions: embedding.length,
147
+ createdAt: new Date(),
148
+ };
149
+ if (record.metadata) {
150
+ Object.assign(doc, record.metadata);
151
+ }
152
+ return doc;
153
+ });
154
+
155
+ const { client: c, collection } = await getMongoCollection(opts.db, opts.collection);
156
+ client = c;
157
+ const result = await collection.insertMany(docs);
158
+
159
+ if (opts.json) {
160
+ console.log(JSON.stringify({
161
+ insertedCount: result.insertedCount,
162
+ insertedIds: result.insertedIds,
163
+ dimensions: docs[0]?.dimensions,
164
+ model: opts.model || DEFAULT_EMBED_MODEL,
165
+ tokens: embedResult.usage?.total_tokens,
166
+ }, null, 2));
167
+ } else if (!opts.quiet) {
168
+ console.log(`✓ Stored ${result.insertedCount} documents`);
169
+ console.log(` Database: ${opts.db}`);
170
+ console.log(` Collection: ${opts.collection}`);
171
+ console.log(` Field: ${opts.field}`);
172
+ console.log(` Dimensions: ${docs[0]?.dimensions}`);
173
+ console.log(` Model: ${opts.model || DEFAULT_EMBED_MODEL}`);
174
+ if (embedResult.usage) {
175
+ console.log(` Tokens: ${embedResult.usage.total_tokens}`);
176
+ }
177
+ }
178
+ } catch (err) {
179
+ console.error(`Error: ${err.message}`);
180
+ process.exit(1);
181
+ } finally {
182
+ if (client) await client.close();
183
+ }
184
+ }
185
+
186
+ module.exports = { registerStore };
package/src/lib/api.js ADDED
@@ -0,0 +1,107 @@
1
+ 'use strict';
2
+
3
+ const API_BASE = 'https://ai.mongodb.com/v1';
4
+ const MAX_RETRIES = 3;
5
+
6
+ /**
7
+ * Get the Voyage API key or exit with a helpful error.
8
+ * @returns {string}
9
+ */
10
+ function requireApiKey() {
11
+ const key = process.env.VOYAGE_API_KEY;
12
+ if (!key) {
13
+ console.error('Error: VOYAGE_API_KEY environment variable is not set.');
14
+ console.error('');
15
+ console.error('Get one from MongoDB Atlas → AI Models → Create model API key');
16
+ console.error('Then: export VOYAGE_API_KEY="your-key-here"');
17
+ process.exit(1);
18
+ }
19
+ return key;
20
+ }
21
+
22
+ /**
23
+ * Sleep for the given number of milliseconds.
24
+ * @param {number} ms
25
+ * @returns {Promise<void>}
26
+ */
27
+ function sleep(ms) {
28
+ return new Promise(resolve => setTimeout(resolve, ms));
29
+ }
30
+
31
+ /**
32
+ * Make an authenticated request to the Voyage AI API with retry on 429.
33
+ * @param {string} endpoint - API endpoint path (e.g., '/embeddings')
34
+ * @param {object} body - Request body
35
+ * @returns {Promise<object>}
36
+ */
37
+ async function apiRequest(endpoint, body) {
38
+ const apiKey = requireApiKey();
39
+ const url = `${API_BASE}${endpoint}`;
40
+
41
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
42
+ const response = await fetch(url, {
43
+ method: 'POST',
44
+ headers: {
45
+ 'Content-Type': 'application/json',
46
+ 'Authorization': `Bearer ${apiKey}`,
47
+ },
48
+ body: JSON.stringify(body),
49
+ });
50
+
51
+ if (response.status === 429 && attempt < MAX_RETRIES) {
52
+ const retryAfter = response.headers.get('Retry-After');
53
+ const waitMs = retryAfter ? parseInt(retryAfter, 10) * 1000 : Math.pow(2, attempt) * 1000;
54
+ console.error(`Rate limited (429). Retrying in ${waitMs / 1000}s... (attempt ${attempt + 1}/${MAX_RETRIES})`);
55
+ await sleep(waitMs);
56
+ continue;
57
+ }
58
+
59
+ if (!response.ok) {
60
+ let errorDetail = '';
61
+ try {
62
+ const errBody = await response.json();
63
+ errorDetail = errBody.detail || errBody.message || errBody.error?.message || JSON.stringify(errBody);
64
+ } catch {
65
+ errorDetail = await response.text();
66
+ }
67
+ console.error(`API Error (${response.status}): ${errorDetail}`);
68
+ process.exit(1);
69
+ }
70
+
71
+ return response.json();
72
+ }
73
+ }
74
+
75
+ /**
76
+ * Generate embeddings for an array of texts.
77
+ * @param {string[]} texts - Array of texts to embed
78
+ * @param {object} options - Embedding options
79
+ * @param {string} [options.model] - Model name
80
+ * @param {string} [options.inputType] - Input type (query|document)
81
+ * @param {number} [options.dimensions] - Output dimensions
82
+ * @returns {Promise<object>} API response with embeddings
83
+ */
84
+ async function generateEmbeddings(texts, options = {}) {
85
+ const { DEFAULT_EMBED_MODEL } = require('./catalog');
86
+
87
+ const body = {
88
+ input: texts,
89
+ model: options.model || DEFAULT_EMBED_MODEL,
90
+ };
91
+
92
+ if (options.inputType) {
93
+ body.input_type = options.inputType;
94
+ }
95
+ if (options.dimensions) {
96
+ body.output_dimension = options.dimensions;
97
+ }
98
+
99
+ return apiRequest('/embeddings', body);
100
+ }
101
+
102
+ module.exports = {
103
+ API_BASE,
104
+ requireApiKey,
105
+ apiRequest,
106
+ generateEmbeddings,
107
+ };
@@ -0,0 +1,26 @@
1
+ 'use strict';
2
+
3
+ const DEFAULT_EMBED_MODEL = 'voyage-4-large';
4
+ const DEFAULT_RERANK_MODEL = 'rerank-2.5';
5
+ const DEFAULT_DIMENSIONS = 1024;
6
+
7
+ /** @type {Array<{name: string, type: string, context: string, dimensions: string, price: string, bestFor: string}>} */
8
+ const MODEL_CATALOG = [
9
+ { name: 'voyage-4-large', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.12/1M tokens', bestFor: 'Best quality, multilingual' },
10
+ { name: 'voyage-4', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.06/1M tokens', bestFor: 'Balanced quality/perf' },
11
+ { name: 'voyage-4-lite', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.02/1M tokens', bestFor: 'Lowest cost' },
12
+ { name: 'voyage-code-3', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', bestFor: 'Code retrieval' },
13
+ { name: 'voyage-finance-2', type: 'embedding', context: '32K', dimensions: '1024', price: '$0.12/1M tokens', bestFor: 'Finance' },
14
+ { name: 'voyage-law-2', type: 'embedding', context: '16K', dimensions: '1024', price: '$0.12/1M tokens', bestFor: 'Legal' },
15
+ { name: 'voyage-context-3', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', bestFor: 'Contextualized chunks' },
16
+ { name: 'voyage-multimodal-3.5', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.12/M + $0.60/B px', bestFor: 'Text + images + video' },
17
+ { name: 'rerank-2.5', type: 'reranking', context: '32K', dimensions: '—', price: '$0.05/1M tokens', bestFor: 'Best quality reranking' },
18
+ { name: 'rerank-2.5-lite', type: 'reranking', context: '32K', dimensions: '—', price: '$0.02/1M tokens', bestFor: 'Fast reranking' },
19
+ ];
20
+
21
+ module.exports = {
22
+ DEFAULT_EMBED_MODEL,
23
+ DEFAULT_RERANK_MODEL,
24
+ DEFAULT_DIMENSIONS,
25
+ MODEL_CATALOG,
26
+ };
@@ -0,0 +1,24 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Format a simple table for terminal output.
5
+ * @param {string[]} headers - Column headers
6
+ * @param {string[][]} rows - Table rows
7
+ * @returns {string}
8
+ */
9
+ function formatTable(headers, rows) {
10
+ const colWidths = headers.map((h, i) => {
11
+ const maxRow = rows.reduce((max, row) => Math.max(max, (row[i] || '').length), 0);
12
+ return Math.max(h.length, maxRow);
13
+ });
14
+
15
+ const sep = colWidths.map(w => '─'.repeat(w + 2)).join('┼');
16
+ const headerLine = headers.map((h, i) => ` ${h.padEnd(colWidths[i])} `).join('│');
17
+ const dataLines = rows.map(row =>
18
+ row.map((cell, i) => ` ${(cell || '').padEnd(colWidths[i])} `).join('│')
19
+ );
20
+
21
+ return [headerLine, sep, ...dataLines].join('\n');
22
+ }
23
+
24
+ module.exports = { formatTable };
@@ -0,0 +1,40 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+
5
+ /**
6
+ * Read text input from argument, --file flag, or stdin.
7
+ * @param {string|undefined} textArg - Text argument from CLI
8
+ * @param {string|undefined} filePath - File path from --file flag
9
+ * @returns {Promise<string[]>} Array of text strings
10
+ */
11
+ async function resolveTextInput(textArg, filePath) {
12
+ if (filePath) {
13
+ const content = fs.readFileSync(filePath, 'utf-8').trim();
14
+ return [content];
15
+ }
16
+
17
+ if (textArg) {
18
+ return [textArg];
19
+ }
20
+
21
+ // Try reading from stdin (piped input)
22
+ if (!process.stdin.isTTY) {
23
+ const chunks = [];
24
+ for await (const chunk of process.stdin) {
25
+ chunks.push(chunk);
26
+ }
27
+ const input = Buffer.concat(chunks).toString('utf-8').trim();
28
+ if (!input) {
29
+ console.error('Error: No input provided. Pass text as an argument, use --file, or pipe via stdin.');
30
+ process.exit(1);
31
+ }
32
+ // Split by newlines for bulk embedding
33
+ return input.split('\n').filter(line => line.trim());
34
+ }
35
+
36
+ console.error('Error: No input provided. Pass text as an argument, use --file, or pipe via stdin.');
37
+ process.exit(1);
38
+ }
39
+
40
+ module.exports = { resolveTextInput };
@@ -0,0 +1,55 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Get MongoDB URI or exit with a helpful error.
5
+ * @returns {string}
6
+ */
7
+ function requireMongoUri() {
8
+ const uri = process.env.MONGODB_URI;
9
+ if (!uri) {
10
+ console.error('Error: MONGODB_URI environment variable is not set.');
11
+ console.error('');
12
+ console.error('Set your Atlas connection string:');
13
+ console.error(' export MONGODB_URI="mongodb+srv://user:pass@cluster.mongodb.net/"');
14
+ process.exit(1);
15
+ }
16
+ return uri;
17
+ }
18
+
19
+ /**
20
+ * Get a connected MongoDB client and target collection.
21
+ * Lazy-requires the mongodb driver.
22
+ * @param {string} db - Database name
23
+ * @param {string} collectionName - Collection name
24
+ * @returns {Promise<{client: import('mongodb').MongoClient, collection: import('mongodb').Collection}>}
25
+ */
26
+ async function getMongoCollection(db, collectionName) {
27
+ const { MongoClient } = require('mongodb');
28
+ const uri = requireMongoUri();
29
+ const client = new MongoClient(uri);
30
+ await client.connect();
31
+ const collection = client.db(db).collection(collectionName);
32
+ return { client, collection };
33
+ }
34
+
35
+ /**
36
+ * Connect to MongoDB, run a function with the collection, then close.
37
+ * @param {string} db - Database name
38
+ * @param {string} collectionName - Collection name
39
+ * @param {(collection: import('mongodb').Collection) => Promise<*>} fn - Function to run
40
+ * @returns {Promise<*>}
41
+ */
42
+ async function connectAndClose(db, collectionName, fn) {
43
+ const { client, collection } = await getMongoCollection(db, collectionName);
44
+ try {
45
+ return await fn(collection);
46
+ } finally {
47
+ await client.close();
48
+ }
49
+ }
50
+
51
+ module.exports = {
52
+ requireMongoUri,
53
+ getMongoCollection,
54
+ connectAndClose,
55
+ };
Binary file