voyageai-cli 1.16.0 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,7 @@ _vai_completions() {
19
19
  prev="\${COMP_WORDS[COMP_CWORD-1]}"
20
20
 
21
21
  # Top-level commands
22
- commands="embed rerank store search index models ping config demo explain similarity ingest estimate init chunk completions help"
22
+ commands="embed rerank store search index models ping config demo explain similarity ingest estimate init chunk query pipeline eval completions help"
23
23
 
24
24
  # Subcommands
25
25
  local index_subs="create list delete"
@@ -114,6 +114,18 @@ _vai_completions() {
114
114
  COMPREPLY=( \$(compgen -W "--strategy --chunk-size --overlap --min-size --output --text-field --extensions --ignore --dry-run --stats --json --quiet --help" -- "\$cur") )
115
115
  return 0
116
116
  ;;
117
+ query)
118
+ COMPREPLY=( \$(compgen -W "--db --collection --index --field --model --dimensions --limit --top-k --rerank --no-rerank --rerank-model --text-field --filter --num-candidates --show-vectors --json --quiet --help" -- "\$cur") )
119
+ return 0
120
+ ;;
121
+ pipeline)
122
+ COMPREPLY=( \$(compgen -W "--db --collection --field --index --model --dimensions --strategy --chunk-size --overlap --batch-size --text-field --extensions --ignore --create-index --dry-run --json --quiet --help" -- "\$cur") )
123
+ return 0
124
+ ;;
125
+ eval)
126
+ COMPREPLY=( \$(compgen -W "--test-set --db --collection --index --field --model --dimensions --limit --k-values --rerank --no-rerank --rerank-model --text-field --id-field --compare --json --quiet --help" -- "\$cur") )
127
+ return 0
128
+ ;;
117
129
  completions)
118
130
  COMPREPLY=( \$(compgen -W "bash zsh --help" -- "\$cur") )
119
131
  return 0
@@ -187,6 +199,9 @@ _vai() {
187
199
  'estimate:Estimate embedding costs — symmetric vs asymmetric'
188
200
  'init:Initialize project with .vai.json'
189
201
  'chunk:Chunk documents for embedding'
202
+ 'query:Search + rerank in one shot'
203
+ 'pipeline:Chunk, embed, and store documents'
204
+ 'eval:Evaluate retrieval quality (MRR, NDCG, recall)'
190
205
  'completions:Generate shell completion scripts'
191
206
  'help:Display help for command'
192
207
  )
@@ -425,6 +440,66 @@ _vai() {
425
440
  '--json[JSON output]' \\
426
441
  '(-q --quiet)'{-q,--quiet}'[Suppress non-essential output]'
427
442
  ;;
443
+ query)
444
+ _arguments \\
445
+ '1:query text:' \\
446
+ '--db[Database name]:database:' \\
447
+ '--collection[Collection name]:collection:' \\
448
+ '--index[Vector search index]:index:' \\
449
+ '--field[Embedding field]:field:' \\
450
+ '(-m --model)'{-m,--model}'[Embedding model]:model:(\$models)' \\
451
+ '(-d --dimensions)'{-d,--dimensions}'[Output dimensions]:dims:' \\
452
+ '(-l --limit)'{-l,--limit}'[Search candidates]:limit:' \\
453
+ '(-k --top-k)'{-k,--top-k}'[Final results]:k:' \\
454
+ '--rerank[Enable reranking]' \\
455
+ '--no-rerank[Skip reranking]' \\
456
+ '--rerank-model[Reranking model]:model:' \\
457
+ '--text-field[Document text field]:field:' \\
458
+ '--filter[Pre-filter JSON]:json:' \\
459
+ '--json[JSON output]' \\
460
+ '(-q --quiet)'{-q,--quiet}'[Suppress non-essential output]'
461
+ ;;
462
+ pipeline)
463
+ _arguments \\
464
+ '1:input:_files' \\
465
+ '--db[Database name]:database:' \\
466
+ '--collection[Collection name]:collection:' \\
467
+ '--field[Embedding field]:field:' \\
468
+ '--index[Vector search index]:index:' \\
469
+ '(-m --model)'{-m,--model}'[Embedding model]:model:(\$models)' \\
470
+ '(-d --dimensions)'{-d,--dimensions}'[Output dimensions]:dims:' \\
471
+ '(-s --strategy)'{-s,--strategy}'[Chunking strategy]:strategy:(fixed sentence paragraph recursive markdown)' \\
472
+ '(-c --chunk-size)'{-c,--chunk-size}'[Chunk size]:size:' \\
473
+ '--overlap[Chunk overlap]:chars:' \\
474
+ '--batch-size[Texts per API call]:size:' \\
475
+ '--text-field[Text field for JSON]:field:' \\
476
+ '--extensions[File extensions]:exts:' \\
477
+ '--ignore[Dirs to skip]:dirs:' \\
478
+ '--create-index[Auto-create vector index]' \\
479
+ '--dry-run[Preview without executing]' \\
480
+ '--json[JSON output]' \\
481
+ '(-q --quiet)'{-q,--quiet}'[Suppress non-essential output]'
482
+ ;;
483
+ eval)
484
+ _arguments \\
485
+ '--test-set[JSONL test set file]:file:_files' \\
486
+ '--db[Database name]:database:' \\
487
+ '--collection[Collection name]:collection:' \\
488
+ '--index[Vector search index]:index:' \\
489
+ '--field[Embedding field]:field:' \\
490
+ '(-m --model)'{-m,--model}'[Embedding model]:model:(\$models)' \\
491
+ '(-d --dimensions)'{-d,--dimensions}'[Output dimensions]:dims:' \\
492
+ '(-l --limit)'{-l,--limit}'[Search candidates]:limit:' \\
493
+ '(-k --k-values)'{-k,--k-values}'[K values for metrics]:values:' \\
494
+ '--rerank[Enable reranking]' \\
495
+ '--no-rerank[Skip reranking]' \\
496
+ '--rerank-model[Reranking model]:model:' \\
497
+ '--text-field[Document text field]:field:' \\
498
+ '--id-field[Document ID field]:field:' \\
499
+ '--compare[Compare configs]:configs:' \\
500
+ '--json[JSON output]' \\
501
+ '(-q --quiet)'{-q,--quiet}'[Suppress non-essential output]'
502
+ ;;
428
503
  completions)
429
504
  _arguments \\
430
505
  '1:shell:(bash zsh)'
@@ -0,0 +1,300 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const { getDefaultModel, DEFAULT_RERANK_MODEL } = require('../lib/catalog');
5
+ const { generateEmbeddings, apiRequest } = require('../lib/api');
6
+ const { getMongoCollection } = require('../lib/mongo');
7
+ const { loadProject } = require('../lib/project');
8
+ const { computeMetrics, aggregateMetrics } = require('../lib/metrics');
9
+ const ui = require('../lib/ui');
10
+
11
+ /**
12
+ * Load a test set from a JSONL file.
13
+ * Each line: { "query": "...", "relevant": ["id1", "id2"] }
14
+ * Or: { "query": "...", "relevant_texts": ["text1", "text2"] }
15
+ * @param {string} filePath
16
+ * @returns {Array<{query: string, relevant: string[], relevantTexts?: string[]}>}
17
+ */
18
+ function loadTestSet(filePath) {
19
+ const raw = fs.readFileSync(filePath, 'utf-8');
20
+ const lines = raw.split('\n').filter(l => l.trim().length > 0);
21
+ return lines.map((line, i) => {
22
+ const item = JSON.parse(line);
23
+ if (!item.query) throw new Error(`Line ${i + 1}: missing "query" field`);
24
+ if (!item.relevant && !item.relevant_texts) {
25
+ throw new Error(`Line ${i + 1}: need "relevant" (doc IDs) or "relevant_texts" (text matches)`);
26
+ }
27
+ return {
28
+ query: item.query,
29
+ relevant: item.relevant || [],
30
+ relevantTexts: item.relevant_texts || [],
31
+ };
32
+ });
33
+ }
34
+
35
+ /**
36
+ * Register the eval command on a Commander program.
37
+ * @param {import('commander').Command} program
38
+ */
39
+ function registerEval(program) {
40
+ program
41
+ .command('eval')
42
+ .description('Evaluate retrieval quality — measure MRR, NDCG, recall on your data')
43
+ .requiredOption('--test-set <path>', 'JSONL file with queries and expected results')
44
+ .option('--db <database>', 'Database name')
45
+ .option('--collection <name>', 'Collection name')
46
+ .option('--index <name>', 'Vector search index name')
47
+ .option('--field <name>', 'Embedding field name')
48
+ .option('-m, --model <model>', 'Embedding model for queries')
49
+ .option('-d, --dimensions <n>', 'Output dimensions', (v) => parseInt(v, 10))
50
+ .option('-l, --limit <n>', 'Vector search candidates per query', (v) => parseInt(v, 10), 20)
51
+ .option('-k, --k-values <values>', 'Comma-separated K values for @K metrics', '1,3,5,10')
52
+ .option('--rerank', 'Enable reranking')
53
+ .option('--no-rerank', 'Skip reranking')
54
+ .option('--rerank-model <model>', 'Reranking model')
55
+ .option('--text-field <name>', 'Document text field', 'text')
56
+ .option('--id-field <name>', 'Document ID field for matching (default: _id)', '_id')
57
+ .option('--compare <configs>', 'Compare configs: "model1,model2" or "rerank,no-rerank"')
58
+ .option('--json', 'Machine-readable JSON output')
59
+ .option('-q, --quiet', 'Suppress non-essential output')
60
+ .action(async (opts) => {
61
+ let client;
62
+ try {
63
+ // Merge project config
64
+ const { config: proj } = loadProject();
65
+ const db = opts.db || proj.db;
66
+ const collection = opts.collection || proj.collection;
67
+ const index = opts.index || proj.index || 'vector_index';
68
+ const field = opts.field || proj.field || 'embedding';
69
+ const model = opts.model || proj.model || getDefaultModel();
70
+ const rerankModel = opts.rerankModel || DEFAULT_RERANK_MODEL;
71
+ const textField = opts.textField || 'text';
72
+ const idField = opts.idField || '_id';
73
+ const doRerank = opts.rerank !== false;
74
+ const dimensions = opts.dimensions || proj.dimensions;
75
+ const kValues = opts.kValues.split(',').map(v => parseInt(v.trim(), 10)).filter(v => !isNaN(v));
76
+
77
+ if (!db || !collection) {
78
+ console.error(ui.error('Database and collection required. Use --db/--collection or "vai init".'));
79
+ process.exit(1);
80
+ }
81
+
82
+ // Load test set
83
+ let testSet;
84
+ try {
85
+ testSet = loadTestSet(opts.testSet);
86
+ } catch (err) {
87
+ console.error(ui.error(`Failed to load test set: ${err.message}`));
88
+ process.exit(1);
89
+ }
90
+
91
+ if (testSet.length === 0) {
92
+ console.error(ui.error('Test set is empty.'));
93
+ process.exit(1);
94
+ }
95
+
96
+ const verbose = !opts.json && !opts.quiet;
97
+
98
+ if (verbose) {
99
+ console.log('');
100
+ console.log(ui.bold('📊 Retrieval Evaluation'));
101
+ console.log(ui.dim(` Test set: ${testSet.length} queries`));
102
+ console.log(ui.dim(` Collection: ${db}.${collection}`));
103
+ console.log(ui.dim(` Model: ${model}${doRerank ? ` + ${rerankModel}` : ''}`));
104
+ console.log(ui.dim(` K values: ${kValues.join(', ')}`));
105
+ console.log('');
106
+ }
107
+
108
+ // Connect to MongoDB
109
+ const { client: c, collection: coll } = await getMongoCollection(db, collection);
110
+ client = c;
111
+
112
+ // Run evaluation
113
+ const perQueryResults = [];
114
+ let totalEmbedTokens = 0;
115
+ let totalRerankTokens = 0;
116
+
117
+ for (let qi = 0; qi < testSet.length; qi++) {
118
+ const testCase = testSet[qi];
119
+
120
+ if (verbose) {
121
+ process.stderr.write(`\r Evaluating query ${qi + 1}/${testSet.length}...`);
122
+ }
123
+
124
+ // Embed query
125
+ const embedOpts = { model, inputType: 'query' };
126
+ if (dimensions) embedOpts.dimensions = dimensions;
127
+ const embedResult = await generateEmbeddings([testCase.query], embedOpts);
128
+ const queryVector = embedResult.data[0].embedding;
129
+ totalEmbedTokens += embedResult.usage?.total_tokens || 0;
130
+
131
+ // Vector search
132
+ const numCandidates = Math.min(opts.limit * 15, 10000);
133
+ const pipeline = [
134
+ {
135
+ $vectorSearch: {
136
+ index,
137
+ path: field,
138
+ queryVector,
139
+ numCandidates,
140
+ limit: opts.limit,
141
+ },
142
+ },
143
+ { $addFields: { _vsScore: { $meta: 'vectorSearchScore' } } },
144
+ ];
145
+
146
+ let searchResults = await coll.aggregate(pipeline).toArray();
147
+
148
+ // Rerank if enabled
149
+ if (doRerank && searchResults.length > 1) {
150
+ const documents = searchResults.map(doc => {
151
+ const txt = doc[textField];
152
+ return typeof txt === 'string' ? txt : JSON.stringify(txt || doc);
153
+ });
154
+
155
+ const rerankResult = await apiRequest('/rerank', {
156
+ query: testCase.query,
157
+ documents,
158
+ model: rerankModel,
159
+ });
160
+ totalRerankTokens += rerankResult.usage?.total_tokens || 0;
161
+
162
+ // Reorder by rerank score
163
+ searchResults = (rerankResult.data || []).map(item => searchResults[item.index]);
164
+ }
165
+
166
+ // Build retrieved ID list
167
+ let retrievedIds;
168
+ if (testCase.relevant.length > 0) {
169
+ // Match by ID field
170
+ retrievedIds = searchResults.map(doc => String(doc[idField]));
171
+ } else {
172
+ // Match by text similarity (fuzzy — check if retrieved text contains expected text)
173
+ retrievedIds = searchResults.map((doc, i) => {
174
+ const docText = (doc[textField] || '').toLowerCase();
175
+ for (const expectedText of testCase.relevantTexts) {
176
+ if (docText.includes(expectedText.toLowerCase()) ||
177
+ expectedText.toLowerCase().includes(docText.substring(0, 50))) {
178
+ return `match_${i}`;
179
+ }
180
+ }
181
+ return `miss_${i}`;
182
+ });
183
+ // Remap relevant to match format
184
+ testCase.relevant = testCase.relevantTexts.map((_, i) => `match_${i}`);
185
+ }
186
+
187
+ // Compute metrics
188
+ const metrics = computeMetrics(retrievedIds, testCase.relevant, kValues);
189
+
190
+ perQueryResults.push({
191
+ query: testCase.query,
192
+ relevant: testCase.relevant,
193
+ retrieved: retrievedIds.slice(0, Math.max(...kValues)),
194
+ metrics,
195
+ hits: retrievedIds.filter(id => new Set(testCase.relevant).has(id)).length,
196
+ });
197
+ }
198
+
199
+ if (verbose) {
200
+ process.stderr.write('\r' + ' '.repeat(50) + '\r');
201
+ }
202
+
203
+ // Aggregate metrics
204
+ const allMetrics = perQueryResults.map(r => r.metrics);
205
+ const aggregated = aggregateMetrics(allMetrics);
206
+
207
+ // Find worst-performing queries
208
+ const sorted = [...perQueryResults].sort((a, b) => a.metrics.mrr - b.metrics.mrr);
209
+ const worstQueries = sorted.slice(0, Math.min(3, sorted.length));
210
+
211
+ if (opts.json) {
212
+ console.log(JSON.stringify({
213
+ config: { model, rerank: doRerank, rerankModel: doRerank ? rerankModel : null, db, collection, kValues },
214
+ summary: aggregated,
215
+ tokens: { embed: totalEmbedTokens, rerank: totalRerankTokens },
216
+ queries: perQueryResults.length,
217
+ perQuery: perQueryResults,
218
+ }, null, 2));
219
+ return;
220
+ }
221
+
222
+ // Pretty output
223
+ console.log(ui.bold('Results'));
224
+ console.log('');
225
+
226
+ // Main metrics table
227
+ const metricKeys = Object.keys(aggregated);
228
+ const maxKeyLen = Math.max(...metricKeys.map(k => k.length));
229
+
230
+ for (const key of metricKeys) {
231
+ const val = aggregated[key];
232
+ const bar = renderBar(val, 20);
233
+ const label = key.toUpperCase().padEnd(maxKeyLen + 1);
234
+ const valStr = val.toFixed(4);
235
+ const color = val >= 0.8 ? ui.green(valStr) : val >= 0.5 ? ui.cyan(valStr) : ui.yellow(valStr);
236
+ console.log(` ${label} ${bar} ${color}`);
237
+ }
238
+
239
+ console.log('');
240
+
241
+ // Highlight key metrics
242
+ const mrr = aggregated.mrr;
243
+ const recall5 = aggregated['r@5'];
244
+ const ndcg10 = aggregated['ndcg@10'];
245
+
246
+ if (mrr !== undefined) {
247
+ const grade = mrr >= 0.8 ? ui.green('Excellent') : mrr >= 0.6 ? ui.cyan('Good') : mrr >= 0.4 ? ui.yellow('Fair') : ui.red('Needs work');
248
+ console.log(ui.label('MRR', `${mrr.toFixed(4)} — ${grade}`));
249
+ }
250
+ if (recall5 !== undefined) {
251
+ console.log(ui.label('Recall@5', `${(recall5 * 100).toFixed(1)}% of relevant docs found in top 5`));
252
+ }
253
+ if (ndcg10 !== undefined) {
254
+ console.log(ui.label('NDCG@10', `${ndcg10.toFixed(4)} — ranking quality`));
255
+ }
256
+
257
+ // Worst queries
258
+ if (worstQueries.length > 0 && worstQueries[0].metrics.mrr < 1) {
259
+ console.log('');
260
+ console.log(ui.bold('Hardest queries:'));
261
+ for (const wq of worstQueries) {
262
+ const preview = wq.query.substring(0, 60) + (wq.query.length > 60 ? '...' : '');
263
+ const mrrStr = wq.metrics.mrr === 0 ? ui.red('miss') : ui.yellow(wq.metrics.mrr.toFixed(2));
264
+ console.log(` ${mrrStr} "${preview}" (${wq.hits}/${wq.relevant.length} relevant found)`);
265
+ }
266
+ }
267
+
268
+ console.log('');
269
+ console.log(ui.dim(` ${testSet.length} queries evaluated | Tokens: embed ${totalEmbedTokens}${totalRerankTokens ? `, rerank ${totalRerankTokens}` : ''}`));
270
+
271
+ // Suggestions
272
+ console.log('');
273
+ if (mrr !== undefined && mrr < 0.6) {
274
+ console.log(ui.dim(' 💡 Low MRR? Try: larger model, more candidates (--limit), or enable reranking (--rerank)'));
275
+ }
276
+ if (recall5 !== undefined && recall5 < 0.5) {
277
+ console.log(ui.dim(' 💡 Low recall? Try: increasing --limit, different chunking strategy, or review your test set'));
278
+ }
279
+ } catch (err) {
280
+ console.error(ui.error(err.message));
281
+ process.exit(1);
282
+ } finally {
283
+ if (client) await client.close();
284
+ }
285
+ });
286
+ }
287
+
288
+ /**
289
+ * Render a simple ASCII bar chart.
290
+ * @param {number} value - 0.0 to 1.0
291
+ * @param {number} width - Bar width in characters
292
+ * @returns {string}
293
+ */
294
+ function renderBar(value, width) {
295
+ const filled = Math.round(value * width);
296
+ const empty = width - filled;
297
+ return '█'.repeat(filled) + '░'.repeat(empty);
298
+ }
299
+
300
+ module.exports = { registerEval };
@@ -84,20 +84,20 @@ function registerModels(program) {
84
84
  const displayLegacy = models.filter(m => m.legacy);
85
85
 
86
86
  const formatWideRow = (m) => {
87
- const name = ui.cyan(m.name);
87
+ const label = m.unreleased ? ui.cyan(m.name) + ' ' + ui.dim('(unreleased)') : ui.cyan(m.name);
88
88
  const type = m.type.startsWith('embedding') ? ui.green(m.type) : ui.yellow(m.type);
89
89
  const price = ui.dim(m.price);
90
90
  const arch = m.architecture ? (m.architecture === 'moe' ? ui.cyan('MoE') : m.architecture) : '—';
91
91
  const space = m.sharedSpace ? ui.green('✓ ' + m.sharedSpace) : '—';
92
- return [name, type, m.context, m.dimensions, arch, space, price, m.bestFor];
92
+ return [label, type, m.context, m.dimensions, arch, space, price, m.bestFor];
93
93
  };
94
94
 
95
95
  const formatCompactRow = (m) => {
96
- const name = ui.cyan(m.name);
96
+ const label = m.unreleased ? ui.cyan(m.name) + ' ' + ui.dim('(soon)') : ui.cyan(m.name);
97
97
  const type = m.type.startsWith('embedding') ? ui.green(m.multimodal ? 'multi' : 'embed') : ui.yellow('rerank');
98
98
  const dims = compactDimensions(m.dimensions);
99
99
  const price = ui.dim(compactPrice(m.price));
100
- return [name, type, dims, price, m.shortFor || m.bestFor];
100
+ return [label, type, dims, price, m.shortFor || m.bestFor];
101
101
  };
102
102
 
103
103
  if (opts.wide) {