npm - voyageai-cli - Versions diffs - 1.7.0 → 1.9.0 - Mend

voyageai-cli 1.7.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/.github/workflows/ci.yml +1 -0
package/package.json +2 -3
package/src/cli.js +2 -0
package/src/commands/benchmark.js +799 -0
package/src/commands/playground.js +49 -1
package/src/lib/catalog.js +1 -1
package/src/lib/explanations.js +47 -0
package/src/playground/index.html +835 -0
package/test/commands/benchmark.test.js +252 -0
package/test/commands/ping.test.js +7 -3
package/test/lib/explanations.test.js +1 -0

package/src/commands/playground.js CHANGED Viewed

@@ -84,7 +84,7 @@ function createPlaygroundServer() {
       // API: Models
       if (req.method === 'GET' && req.url === '/api/models') {
-        const models = MODEL_CATALOG.filter(m => !m.legacy);
+        const models = MODEL_CATALOG.filter(m => !m.legacy && !m.local);
         res.writeHead(200, { 'Content-Type': 'application/json' });
         res.end(JSON.stringify({ models }));
         return;
@@ -152,6 +152,54 @@ function createPlaygroundServer() {
           return;
         }
+        // API: Benchmark (single model, single round — UI calls this per model)
+        if (req.url === '/api/benchmark/embed') {
+          const { texts, model, inputType, dimensions } = parsed;
+          if (!texts || !Array.isArray(texts) || texts.length === 0) {
+            res.writeHead(400, { 'Content-Type': 'application/json' });
+            res.end(JSON.stringify({ error: 'texts must be a non-empty array' }));
+            return;
+          }
+          const opts = { model: model || undefined };
+          if (inputType) opts.inputType = inputType;
+          if (dimensions) opts.dimensions = dimensions;
+          const start = performance.now();
+          const result = await generateEmbeddings(texts, opts);
+          const elapsed = performance.now() - start;
+          res.writeHead(200, { 'Content-Type': 'application/json' });
+          res.end(JSON.stringify({
+            model: result.model,
+            elapsed,
+            tokens: result.usage?.total_tokens || 0,
+            dimensions: result.data?.[0]?.embedding?.length || 0,
+            embeddings: result.data?.map(d => d.embedding),
+          }));
+          return;
+        }
+        if (req.url === '/api/benchmark/rerank') {
+          const { query, documents, model, topK } = parsed;
+          if (!query || !documents || !Array.isArray(documents)) {
+            res.writeHead(400, { 'Content-Type': 'application/json' });
+            res.end(JSON.stringify({ error: 'query and documents are required' }));
+            return;
+          }
+          const { apiRequest } = require('../lib/api');
+          const body = { query, documents, model: model || 'rerank-2.5' };
+          if (topK) body.top_k = topK;
+          const start = performance.now();
+          const result = await apiRequest('/rerank', body);
+          const elapsed = performance.now() - start;
+          res.writeHead(200, { 'Content-Type': 'application/json' });
+          res.end(JSON.stringify({
+            model: result.model,
+            elapsed,
+            tokens: result.usage?.total_tokens || 0,
+            results: result.data || [],
+          }));
+          return;
+        }
         // API: Similarity
         if (req.url === '/api/similarity') {
           const { texts, model } = parsed;

package/src/lib/catalog.js CHANGED Viewed

@@ -34,7 +34,7 @@ const MODEL_CATALOG = [
   { name: 'voyage-multimodal-3.5', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.12/M + $0.60/B px', bestFor: 'Text + images + video', shortFor: 'Multimodal' },
   { name: 'rerank-2.5', type: 'reranking', context: '32K', dimensions: '—', price: '$0.05/1M tokens', bestFor: 'Best quality reranking', shortFor: 'Best reranker' },
   { name: 'rerank-2.5-lite', type: 'reranking', context: '32K', dimensions: '—', price: '$0.02/1M tokens', bestFor: 'Fast reranking', shortFor: 'Fast reranker' },
-  { name: 'voyage-4-nano', type: 'embedding', context: '32K', dimensions: '512 (default), 128, 256', price: 'Open-weight', bestFor: 'Open-weight / edge', shortFor: 'Open / edge' },
+  { name: 'voyage-4-nano', type: 'embedding', context: '32K', dimensions: '512 (default), 128, 256', price: 'Open-weight', bestFor: 'Open-weight / edge', shortFor: 'Open / edge', local: true },
   // Legacy models
   { name: 'voyage-3-large', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.18/1M tokens', bestFor: 'Previous gen quality', shortFor: 'Previous gen quality', legacy: true },
   { name: 'voyage-3.5', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.06/1M tokens', bestFor: 'Previous gen balanced', shortFor: 'Previous gen balanced', legacy: true },

package/src/lib/explanations.js CHANGED Viewed

@@ -406,6 +406,48 @@ const concepts = {
       'vai embed --file document.txt --input-type document',
     ],
   },
+  benchmarking: {
+    title: 'Benchmarking & Model Selection',
+    summary: 'How to choose the right model for your use case',
+    content: [
+      `Choosing the right embedding or reranking model depends on your priorities:`,
+      `${pc.cyan('latency')}, ${pc.cyan('accuracy')}, ${pc.cyan('cost')}, or a balance of all three.`,
+      ``,
+      `${pc.bold('vai benchmark embed')} — Compare embedding models head-to-head:`,
+      `  Measures avg/p50/p95 latency, token usage, and cost per model.`,
+      `  ${pc.dim('vai benchmark embed --models voyage-4-large,voyage-4,voyage-4-lite --rounds 5')}`,
+      ``,
+      `${pc.bold('vai benchmark similarity')} — Test ranking quality on your data:`,
+      `  Embeds a query + corpus with each model, shows side-by-side top-K rankings.`,
+      `  If models agree on the top results, the cheaper one is likely sufficient.`,
+      `  ${pc.dim('vai benchmark similarity --query "your query" --file corpus.txt')}`,
+      ``,
+      `${pc.bold('vai benchmark rerank')} — Compare reranking models:`,
+      `  Measures latency and shows how models order the same documents.`,
+      `  ${pc.dim('vai benchmark rerank --query "your query" --documents-file docs.json')}`,
+      ``,
+      `${pc.bold('vai benchmark cost')} — Project monthly costs at scale:`,
+      `  Shows estimated cost for each model at different daily query volumes.`,
+      `  ${pc.dim('vai benchmark cost --tokens 500 --volumes 100,1000,10000,100000')}`,
+      ``,
+      `${pc.bold('vai benchmark batch')} — Find optimal batch size for ingestion:`,
+      `  Measures throughput (texts/sec) at different batch sizes.`,
+      `  ${pc.dim('vai benchmark batch --batch-sizes 1,5,10,25,50 --rounds 3')}`,
+      ``,
+      `${pc.bold('Decision framework:')}`,
+      `  1. Run ${pc.cyan('benchmark cost')} to eliminate models outside your budget`,
+      `  2. Run ${pc.cyan('benchmark embed')} to compare latency of affordable models`,
+      `  3. Run ${pc.cyan('benchmark similarity')} with your actual data to compare quality`,
+      `  4. If quality is similar, pick the cheaper/faster model`,
+      `  5. Use ${pc.cyan('--save')} to track results over time as your data evolves`,
+    ].join('\n'),
+    links: ['https://www.mongodb.com/docs/voyageai/models/text-embeddings/'],
+    tryIt: [
+      'vai benchmark embed --rounds 3',
+      'vai benchmark cost',
+      'vai benchmark similarity --query "your search query" --file your-docs.txt',
+    ],
+  },
 };
 /**
@@ -446,6 +488,11 @@ const aliases = {
   batch: 'batch-processing',
   'batch-processing': 'batch-processing',
   batching: 'batch-processing',
+  benchmark: 'benchmarking',
+  benchmarking: 'benchmarking',
+  'model-selection': 'benchmarking',
+  choosing: 'benchmarking',
+  compare: 'benchmarking',
 };
 /**