npm - @memvid/sdk - Versions diffs - 2.0.154 → 2.0.155 - Mend

@memvid/sdk 2.0.154 → 2.0.155

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/embeddings.d.ts CHANGED Viewed

@@ -98,9 +98,13 @@ export interface OpenAIEmbeddingsConfig {
     apiKey?: string;
     /** Model to use. Default: 'text-embedding-3-small' */
     model?: string;
-    /** Max number of texts to embed in a single API call. Default: 2048 */
+    /** Max number of texts to embed in a single API call. Default: 2048 (OpenAI hard limit) */
     batchSize?: number;
-    /** Max tokens per batch (OpenAI limit is 8191). Default: 8000 (with safety margin) */
+    /** Max tokens per individual input text (OpenAI limit is 8191). Default: 8000 (with safety margin).
+     *  Note: this is a per-INPUT limit, not a per-batch total. Each input in a batch
+     *  must individually be under this limit, but the batch total can be much higher. */
+    maxTokensPerInput?: number;
+    /** @deprecated Use maxTokensPerInput instead */
     maxTokensPerBatch?: number;
 }
 /**
@@ -120,7 +124,7 @@ export declare class OpenAIEmbeddings implements EmbeddingProvider {
     private readonly _apiKey;
     private readonly _model;
     private readonly _batchSize;
-    private readonly _maxTokensPerBatch;
+    private readonly _maxTokensPerInput;
     constructor(config?: OpenAIEmbeddingsConfig);
     get dimension(): number;
     get modelName(): string;
@@ -132,15 +136,17 @@ export declare class OpenAIEmbeddings implements EmbeddingProvider {
      */
     private estimateTokens;
     /**
-     * Truncate text to fit within token limit.
+     * Truncate a single input text to fit within the per-input token limit.
      * Preserves beginning of text as it typically contains the most important context.
-     * Uses conservative 3.0 chars/token for truncation to handle mixed content safely.
+     * Uses conservative 2.0 chars/token for truncation to handle data-heavy content
+     * (spreadsheets, numbers, cell refs) where tokenization is denser than prose.
      */
     private truncateToTokenLimit;
     /**
-     * Split texts into batches respecting both document count and token limits.
-     * This prevents OpenAI API errors when total tokens exceed 8,192.
-     * Automatically truncates individual texts that exceed the token limit.
+     * Split texts into batches respecting:
+     *  1. Per-input token limit (8,192 for text-embedding-3-small) — truncate oversized inputs
+     *  2. Per-request token limit (300K for most tiers) — split into multiple requests
+     *  3. Per-request input count (2,048 max inputs per request)
      */
     private createTokenAwareBatches;
     embedDocuments(texts: string[]): Promise<number[][]>;

package/dist/embeddings.js CHANGED Viewed

@@ -115,8 +115,9 @@ class OpenAIEmbeddings {
         }
         this._model = config.model || 'text-embedding-3-small';
         this._batchSize = config.batchSize || 2048;
-        // OpenAI's limit is 8,192 tokens. Use 8,000 as default for max throughput.
-        this._maxTokensPerBatch = config.maxTokensPerBatch || 8000;
+        // OpenAI's limit is 8,192 tokens PER INPUT (not per batch).
+        // You can send up to 2048 inputs per request regardless of total tokens.
+        this._maxTokensPerInput = config.maxTokensPerInput || config.maxTokensPerBatch || 8000;
     }
     get dimension() {
         return exports.MODEL_DIMENSIONS[this._model] || 1536;
@@ -136,48 +137,51 @@ class OpenAIEmbeddings {
         return Math.ceil(text.length / 3.5);
     }
     /**
-     * Truncate text to fit within token limit.
+     * Truncate a single input text to fit within the per-input token limit.
      * Preserves beginning of text as it typically contains the most important context.
-     * Uses conservative 3.0 chars/token for truncation to handle mixed content safely.
+     * Uses conservative 2.0 chars/token for truncation to handle data-heavy content
+     * (spreadsheets, numbers, cell refs) where tokenization is denser than prose.
      */
     truncateToTokenLimit(text) {
-        // Use conservative limit for truncation: 7800 tokens max for single text
-        const maxTokensForSingleText = Math.min(this._maxTokensPerBatch, 7800);
-        // Use 3.0 chars/token for safe truncation
-        const maxChars = Math.floor(maxTokensForSingleText * 3.0);
+        const maxTokens = Math.min(this._maxTokensPerInput, 7800);
+        // Use 2.0 chars/token for safe truncation — handles spreadsheet data,
+        // numbers, and special characters which tokenize at ~2.2 chars/token
+        const maxChars = Math.floor(maxTokens * 2.0);
         if (text.length <= maxChars) {
             return text;
         }
         return text.slice(0, maxChars);
     }
     /**
-     * Split texts into batches respecting both document count and token limits.
-     * This prevents OpenAI API errors when total tokens exceed 8,192.
-     * Automatically truncates individual texts that exceed the token limit.
+     * Split texts into batches respecting:
+     *  1. Per-input token limit (8,192 for text-embedding-3-small) — truncate oversized inputs
+     *  2. Per-request token limit (300K for most tiers) — split into multiple requests
+     *  3. Per-request input count (2,048 max inputs per request)
      */
     createTokenAwareBatches(texts) {
+        // OpenAI enforces a per-request total token limit (typically 300K).
+        // Use 250K as a safe default to account for token estimation inaccuracy.
+        const MAX_TOKENS_PER_REQUEST = 250000;
         const batches = [];
         let currentBatch = [];
-        let currentTokens = 0;
+        let currentBatchTokens = 0;
         for (let text of texts) {
+            // Truncate individual texts that exceed the per-input token limit
             let textTokens = this.estimateTokens(text);
-            // Truncate if single text exceeds token limit
-            if (textTokens > this._maxTokensPerBatch) {
+            if (textTokens > this._maxTokensPerInput) {
                 text = this.truncateToTokenLimit(text);
                 textTokens = this.estimateTokens(text);
             }
-            const wouldExceedTokens = (currentTokens + textTokens) > this._maxTokensPerBatch;
+            const wouldExceedRequestTokens = (currentBatchTokens + textTokens) > MAX_TOKENS_PER_REQUEST;
             const wouldExceedCount = currentBatch.length >= this._batchSize;
-            if (wouldExceedTokens || wouldExceedCount) {
-                if (currentBatch.length > 0) {
-                    batches.push(currentBatch);
-                }
+            if ((wouldExceedRequestTokens || wouldExceedCount) && currentBatch.length > 0) {
+                batches.push(currentBatch);
                 currentBatch = [text];
-                currentTokens = textTokens;
+                currentBatchTokens = textTokens;
             }
             else {
                 currentBatch.push(text);
-                currentTokens += textTokens;
+                currentBatchTokens += textTokens;
             }
         }
         if (currentBatch.length > 0) {

package/dist/index.js CHANGED Viewed

@@ -1924,6 +1924,27 @@ class MemvidImpl {
             const lines = text.split('\n');
             let current = '';
             for (const line of lines) {
+                // Handle lines longer than chunkSize (e.g. wide spreadsheet rows)
+                if (line.length > size) {
+                    if (current.trim()) {
+                        chunks.push(current.trim());
+                        current = '';
+                    }
+                    // Split long line at delimiter boundaries (" | " for XLSX rows)
+                    let remaining = line;
+                    while (remaining.length > size) {
+                        let splitAt = remaining.lastIndexOf(' | ', size);
+                        if (splitAt <= 0)
+                            splitAt = remaining.lastIndexOf(' ', size);
+                        if (splitAt <= 0)
+                            splitAt = size;
+                        chunks.push(remaining.slice(0, splitAt).trim());
+                        remaining = remaining.slice(splitAt).replace(/^\s*\|\s*/, '');
+                    }
+                    if (remaining.trim())
+                        current = remaining;
+                    continue;
+                }
                 if (current.length + line.length + 1 > size && current.length > 0) {
                     chunks.push(current.trim());
                     current = line;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@memvid/sdk",
-  "version": "2.0.154",
+  "version": "2.0.155",
   "description": "Single-file AI memory system for Node.js. Store, search, and query documents with built-in RAG.",
   "main": "./dist/index.js",
   "types": "./dist/index.d.ts",
@@ -41,11 +41,11 @@
     "node": ">=18"
   },
   "optionalDependencies": {
-    "@memvid/sdk-darwin-arm64": "2.0.154",
-    "@memvid/sdk-darwin-x64": "2.0.154",
-    "@memvid/sdk-linux-x64-gnu": "2.0.154",
-    "@memvid/sdk-linux-arm64-gnu": "2.0.154",
-    "@memvid/sdk-win32-x64-msvc": "2.0.154"
+    "@memvid/sdk-darwin-arm64": "2.0.155",
+    "@memvid/sdk-darwin-x64": "2.0.155",
+    "@memvid/sdk-linux-x64-gnu": "2.0.155",
+    "@memvid/sdk-linux-arm64-gnu": "2.0.155",
+    "@memvid/sdk-win32-x64-msvc": "2.0.155"
   },
   "peerDependencies": {
     "@langchain/core": ">=0.3.0",