npm - voyageai-cli - Versions diffs - 1.10.0 → 1.12.0 - Mend

voyageai-cli 1.10.0 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/README.md +23 -0
package/demo.gif +0 -0
package/package.json +1 -1
package/src/cli.js +2 -0
package/src/commands/about.js +85 -0
package/src/commands/benchmark.js +418 -0
package/src/commands/embed.js +5 -0
package/src/commands/playground.js +7 -3
package/src/commands/store.js +15 -4
package/src/lib/api.js +6 -0
package/src/lib/catalog.js +2 -0
package/src/lib/explanations.js +76 -2
package/src/lib/math.js +5 -0
package/src/playground/index.html +530 -1
package/test/commands/about.test.js +23 -0
package/test/commands/benchmark.test.js +67 -0
package/test/commands/embed.test.js +10 -0
package/test/lib/explanations.test.js +6 -0
package/voyageai-cli-playground.png +0 -0

package/src/lib/api.js CHANGED Viewed

@@ -78,6 +78,8 @@ async function apiRequest(endpoint, body) {
       body: JSON.stringify(body),
     });
+    // 429: The API said "slow down monkey" — respect the rate limit
+    // like you'd respect a $merge that's already running on your replica set.
     if (response.status === 429 && attempt < MAX_RETRIES) {
       const retryAfter = response.headers.get('Retry-After');
       const waitMs = retryAfter ? parseInt(retryAfter, 10) * 1000 : Math.pow(2, attempt) * 1000;
@@ -127,6 +129,7 @@ async function apiRequest(endpoint, body) {
  * @param {string} [options.inputType] - Input type (query|document)
  * @param {number} [options.dimensions] - Output dimensions
  * @param {boolean} [options.truncation] - Enable/disable truncation
+ * @param {string} [options.outputDtype] - Output data type: float, int8, uint8, binary, ubinary
  * @returns {Promise<object>} API response with embeddings
  */
 async function generateEmbeddings(texts, options = {}) {
@@ -146,6 +149,9 @@ async function generateEmbeddings(texts, options = {}) {
   if (options.truncation !== undefined) {
     body.truncation = options.truncation;
   }
+  if (options.outputDtype && options.outputDtype !== 'float') {
+    body.output_dtype = options.outputDtype;
+  }
   return apiRequest('/embeddings', body);
 }

package/src/lib/catalog.js CHANGED Viewed

@@ -22,6 +22,8 @@ function getDefaultDimensions() {
   return getConfigValue('defaultDimensions') || DEFAULT_DIMENSIONS;
 }
+// The model catalog: like a wine list (I don't drink :-P), except every choice
+// leads to vectors instead of regret.
 /** @type {Array<{name: string, type: string, context: string, dimensions: string, price: string, bestFor: string}>} */
 const MODEL_CATALOG = [
   { name: 'voyage-4-large', type: 'embedding', context: '32K', dimensions: '1024 (default), 256, 512, 2048', price: '$0.12/1M tokens', bestFor: 'Best quality, multilingual', shortFor: 'Best quality' },

package/src/lib/explanations.js CHANGED Viewed

@@ -406,6 +406,65 @@ const concepts = {
       'vai embed --file document.txt --input-type document',
     ],
   },
+  quantization: {
+    title: 'Quantization & Flexible Dimensions',
+    summary: 'Reduce storage costs with lower-precision embeddings',
+    content: [
+      `${pc.cyan('Quantization')} reduces embedding precision from 32-bit floats to smaller`,
+      `representations, dramatically cutting storage and search costs with minimal`,
+      `quality loss. Combined with ${pc.cyan('Matryoshka dimensions')}, you can shrink vectors`,
+      `by up to ${pc.bold('128×')} (32× from binary × 4× from fewer dimensions).`,
+      ``,
+      `${pc.bold('Output data types (--output-dtype):')}`,
+      ``,
+      `  ${pc.cyan('float')}    32 bits/dim   4 bytes/dim   Baseline (default)`,
+      `  ${pc.cyan('int8')}     8 bits/dim    1 byte/dim    ${pc.green('4× smaller')}   Signed: -128 to 127`,
+      `  ${pc.cyan('uint8')}    8 bits/dim    1 byte/dim    ${pc.green('4× smaller')}   Unsigned: 0 to 255`,
+      `  ${pc.cyan('binary')}   1 bit/dim     1/8 byte/dim  ${pc.green('32× smaller')}  Bit-packed int8 (offset binary)`,
+      `  ${pc.cyan('ubinary')}  1 bit/dim     1/8 byte/dim  ${pc.green('32× smaller')}  Bit-packed uint8`,
+      ``,
+      `${pc.bold('Storage math for 1M documents at 1024 dims:')}`,
+      `  float:   ${pc.dim('1M × 1024 × 4B')}  = ${pc.cyan('4.0 GB')}`,
+      `  int8:    ${pc.dim('1M × 1024 × 1B')}  = ${pc.cyan('1.0 GB')}   (4× savings)`,
+      `  binary:  ${pc.dim('1M × 1024 / 8B')}  = ${pc.cyan('128 MB')}   (32× savings)`,
+      `  ${pc.dim('+ reduced dimensions:')} 256-dim binary = ${pc.cyan('32 MB')}   (128× savings)`,
+      ``,
+      `${pc.bold('How binary quantization works:')} Each float value is converted to a single bit:`,
+      `positive values become 1, zero/negative become 0. Eight bits are packed into`,
+      `one byte. ${pc.cyan('binary')} uses offset binary (subtract 128) for signed int8 output;`,
+      `${pc.cyan('ubinary')} stores the raw unsigned uint8 value.`,
+      ``,
+      `${pc.bold('Quality impact:')} Quantization-aware training minimizes degradation:`,
+      `  ${pc.dim('•')} ${pc.cyan('int8/uint8')} — Typically <1% retrieval quality loss vs float`,
+      `  ${pc.dim('•')} ${pc.cyan('binary/ubinary')} — ~2-5% quality loss; best paired with a reranker`,
+      `  ${pc.dim('•')} Combining lower dimensions + quantization compounds the quality loss`,
+      ``,
+      `${pc.bold('Matryoshka dimensions:')} Voyage 4 models produce ${pc.cyan('nested embeddings')} — the`,
+      `first 256 entries of a 1024-dim vector are themselves a valid 256-dim embedding.`,
+      `You can embed once at full dimension and truncate later without re-embedding.`,
+      `Supported values: 256, 512, 1024 (default), 2048.`,
+      ``,
+      `${pc.bold('Which vector databases support quantized storage?')}`,
+      `  ${pc.dim('•')} MongoDB Atlas Vector Search — float and int8`,
+      `  ${pc.dim('•')} Milvus, Qdrant, Weaviate, Elasticsearch, Vespa — float, int8, binary`,
+      ``,
+      `${pc.bold('Decision framework:')}`,
+      `  1. Start with ${pc.cyan('float')} at default dimensions — measure your baseline`,
+      `  2. Try ${pc.cyan('int8')} — if quality holds, you get 4× storage savings for free`,
+      `  3. If storage is critical, try ${pc.cyan('binary')} + reranker for 32× savings`,
+      `  4. Reduce dimensions (1024→256) for another 4× on top of quantization`,
+      `  5. Use ${pc.cyan('vai benchmark quantization')} to measure the tradeoffs on your data`,
+    ].join('\n'),
+    links: [
+      'https://docs.voyageai.com/docs/flexible-dimensions-and-quantization',
+      'https://www.mongodb.com/docs/voyageai/models/text-embeddings/',
+    ],
+    tryIt: [
+      'vai embed "hello world" --output-dtype int8',
+      'vai embed "hello world" --output-dtype binary --dimensions 256',
+      'vai benchmark quantization --model voyage-4-large',
+    ],
+  },
   benchmarking: {
     title: 'Benchmarking & Model Selection',
     summary: 'How to choose the right model for your use case',
@@ -434,12 +493,18 @@ const concepts = {
       `  Measures throughput (texts/sec) at different batch sizes.`,
       `  ${pc.dim('vai benchmark batch --batch-sizes 1,5,10,25,50 --rounds 3')}`,
       ``,
+      `${pc.bold('vai benchmark quantization')} — Compare output dtypes for storage savings:`,
+      `  Embeds the same corpus with float, int8, and binary, measures ranking quality`,
+      `  degradation vs storage savings. Helps you decide if quantization works for your data.`,
+      `  ${pc.dim('vai benchmark quantization --model voyage-4-large --dtypes float,int8,ubinary')}`,
+      ``,
       `${pc.bold('Decision framework:')}`,
       `  1. Run ${pc.cyan('benchmark cost')} to eliminate models outside your budget`,
       `  2. Run ${pc.cyan('benchmark embed')} to compare latency of affordable models`,
       `  3. Run ${pc.cyan('benchmark similarity')} with your actual data to compare quality`,
-      `  4. If quality is similar, pick the cheaper/faster model`,
-      `  5. Use ${pc.cyan('--save')} to track results over time as your data evolves`,
+      `  4. Run ${pc.cyan('benchmark quantization')} to see if int8/binary preserves your ranking`,
+      `  5. If quality is similar, pick the cheaper/faster model + smallest viable dtype`,
+      `  6. Use ${pc.cyan('--save')} to track results over time as your data evolves`,
     ].join('\n'),
     links: ['https://www.mongodb.com/docs/voyageai/models/text-embeddings/'],
     tryIt: [
@@ -488,6 +553,15 @@ const aliases = {
   batch: 'batch-processing',
   'batch-processing': 'batch-processing',
   batching: 'batch-processing',
+  quantization: 'quantization',
+  quantize: 'quantization',
+  'output-dtype': 'quantization',
+  dtype: 'quantization',
+  int8: 'quantization',
+  binary: 'quantization',
+  ubinary: 'quantization',
+  matryoshka: 'quantization',
+  'flexible-dimensions': 'quantization',
   benchmark: 'benchmarking',
   benchmarking: 'benchmarking',
   'model-selection': 'benchmarking',

package/src/lib/math.js CHANGED Viewed

@@ -3,6 +3,11 @@
 /**
  * Compute cosine similarity between two vectors.
  * cosine_sim(a, b) = dot(a, b) / (||a|| * ||b||)
+ *
+ * Fun fact: this is basically asking "how much do these two vectors
+ * vibe?" — 1.0 means soulmates, 0.0 means strangers at a party,
+ * -1.0 means they're in a Twitter argument.
+ *
  * @param {number[]} a
  * @param {number[]} b
  * @returns {number} Similarity score in [-1, 1]