npm - @memvid/sdk - Versions diffs - 2.0.152 → 2.0.154 - Mend

@memvid/sdk 2.0.152 → 2.0.154

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/embeddings.d.ts CHANGED Viewed

@@ -98,8 +98,10 @@ export interface OpenAIEmbeddingsConfig {
     apiKey?: string;
     /** Model to use. Default: 'text-embedding-3-small' */
     model?: string;
-    /** Number of texts to embed in a single API call. Default: 100 */
+    /** Max number of texts to embed in a single API call. Default: 2048 */
     batchSize?: number;
+    /** Max tokens per batch (OpenAI limit is 8191). Default: 8000 (with safety margin) */
+    maxTokensPerBatch?: number;
 }
 /**
  * OpenAI embedding provider.
@@ -118,10 +120,29 @@ export declare class OpenAIEmbeddings implements EmbeddingProvider {
     private readonly _apiKey;
     private readonly _model;
     private readonly _batchSize;
+    private readonly _maxTokensPerBatch;
     constructor(config?: OpenAIEmbeddingsConfig);
     get dimension(): number;
     get modelName(): string;
     get provider(): string;
+    /**
+     * Estimate token count for a text string.
+     * Using 3.5 chars/token - balanced for mixed content (prose + data).
+     * For pure prose: ~4 chars/token. For numbers/symbols: ~2 chars/token.
+     */
+    private estimateTokens;
+    /**
+     * Truncate text to fit within token limit.
+     * Preserves beginning of text as it typically contains the most important context.
+     * Uses conservative 3.0 chars/token for truncation to handle mixed content safely.
+     */
+    private truncateToTokenLimit;
+    /**
+     * Split texts into batches respecting both document count and token limits.
+     * This prevents OpenAI API errors when total tokens exceed 8,192.
+     * Automatically truncates individual texts that exceed the token limit.
+     */
+    private createTokenAwareBatches;
     embedDocuments(texts: string[]): Promise<number[][]>;
     embedQuery(text: string): Promise<number[]>;
 }
@@ -285,8 +306,10 @@ export interface MistralEmbeddingsConfig {
     apiKey?: string;
     /** Model to use. Default: 'mistral-embed' */
     model?: string;
-    /** Number of texts to embed in a single API call. Default: 100 */
+    /** Max number of texts to embed in a single API call. Default: 100 */
     batchSize?: number;
+    /** Max tokens per batch (Mistral limit is ~16k). Default: 15000 (with safety margin) */
+    maxTokensPerBatch?: number;
 }
 /**
  * Mistral AI embedding provider.
@@ -304,17 +327,87 @@ export declare class MistralEmbeddings implements EmbeddingProvider {
     private readonly _apiKey;
     private readonly _model;
     private readonly _batchSize;
+    private readonly _maxTokensPerBatch;
     constructor(config?: MistralEmbeddingsConfig);
     get dimension(): number;
     get modelName(): string;
     get provider(): string;
+    /**
+     * Estimate token count for a text string.
+     * Using a conservative estimate of 3.5 chars/token.
+     */
+    private estimateTokens;
+    /**
+     * Split texts into batches respecting both document count and token limits.
+     */
+    private createTokenAwareBatches;
+    embedDocuments(texts: string[]): Promise<number[][]>;
+    embedQuery(text: string): Promise<number[]>;
+}
+/**
+ * Ollama embedding provider configuration.
+ */
+export interface OllamaEmbeddingsConfig {
+    /** Ollama server URL. Default: 'http://localhost:11434' or OLLAMA_HOST env var */
+    baseUrl?: string;
+    /** Model to use. Default: 'nomic-embed-text' */
+    model?: string;
+    /** Known embedding dimension. If omitted, auto-detected on first call. */
+    dimension?: number;
+}
+/**
+ * Ollama embedding provider.
+ *
+ * Uses a local Ollama server to generate embeddings. Supports any embedding model
+ * available in Ollama, including nomic-embed-text, mxbai-embed-large, all-minilm, etc.
+ *
+ * @example
+ * ```typescript
+ * // Default: localhost:11434 with nomic-embed-text
+ * const embedder = new OllamaEmbeddings();
+ *
+ * // Custom configuration
+ * const embedder = new OllamaEmbeddings({
+ *   baseUrl: 'http://gpu-server:11434',
+ *   model: 'nomic-embed-text',
+ * });
+ *
+ * // Use with Memvid
+ * const texts = ['Hello world', 'Goodbye world'];
+ * const embeddings = await embedder.embedDocuments(texts);
+ *
+ * // Or embed and store in one step
+ * const embedding = await embedder.embedQuery('Document text...');
+ * await mem.put({
+ *   title: 'My Doc',
+ *   label: 'docs',
+ *   text: 'Document text...',
+ *   embedding,
+ *   embeddingIdentity: {
+ *     provider: 'ollama',
+ *     model: 'nomic-embed-text',
+ *     dimension: embedding.length,
+ *   },
+ * });
+ * ```
+ */
+export declare class OllamaEmbeddings implements EmbeddingProvider {
+    private readonly _baseUrl;
+    private readonly _model;
+    private _dimension?;
+    constructor(config?: OllamaEmbeddingsConfig);
+    private static readonly OLLAMA_MODEL_DIMENSIONS;
+    get dimension(): number;
+    get modelName(): string;
+    get provider(): string;
+    private setDimensionFromEmbedding;
     embedDocuments(texts: string[]): Promise<number[][]>;
     embedQuery(text: string): Promise<number[]>;
 }
 /**
  * Factory function to create an embedding provider.
  *
- * @param provider - One of: 'openai', 'cohere', 'voyage', 'nvidia', 'gemini', 'mistral'
+ * @param provider - One of: 'openai', 'cohere', 'voyage', 'nvidia', 'gemini', 'mistral', 'ollama'
  * @param config - Provider-specific configuration
  * @returns EmbeddingProvider instance
  *
@@ -324,6 +417,8 @@ export declare class MistralEmbeddings implements EmbeddingProvider {
  * const embedder = getEmbedder('cohere', { model: 'embed-multilingual-v3.0' });
  * const embedder = getEmbedder('gemini'); // Uses GOOGLE_API_KEY or GEMINI_API_KEY
  * const embedder = getEmbedder('mistral'); // Uses MISTRAL_API_KEY
+ * const embedder = getEmbedder('ollama'); // Uses local Ollama server
+ * const embedder = getEmbedder('ollama', { model: 'nomic-embed-text', baseUrl: 'http://gpu:11434' });
  * ```
  */
-export declare function getEmbedder(provider: 'openai' | 'cohere' | 'voyage' | 'nvidia' | 'gemini' | 'mistral', config?: Record<string, unknown>): EmbeddingProvider;
+export declare function getEmbedder(provider: 'openai' | 'cohere' | 'voyage' | 'nvidia' | 'gemini' | 'mistral' | 'ollama', config?: Record<string, unknown>): EmbeddingProvider;

package/dist/embeddings.js CHANGED Viewed

@@ -26,7 +26,7 @@
  * ```
  */
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.MistralEmbeddings = exports.GeminiEmbeddings = exports.NvidiaEmbeddings = exports.VoyageEmbeddings = exports.CohereEmbeddings = exports.OpenAIEmbeddings = exports.LOCAL_EMBEDDING_MODELS = exports.MODEL_DIMENSIONS = void 0;
+exports.OllamaEmbeddings = exports.MistralEmbeddings = exports.GeminiEmbeddings = exports.NvidiaEmbeddings = exports.VoyageEmbeddings = exports.CohereEmbeddings = exports.OpenAIEmbeddings = exports.LOCAL_EMBEDDING_MODELS = exports.MODEL_DIMENSIONS = void 0;
 exports.getEmbedder = getEmbedder;
 /**
  * Model dimension mappings for common embedding models.
@@ -114,7 +114,9 @@ class OpenAIEmbeddings {
             throw new Error('OpenAI API key required. Pass apiKey or set OPENAI_API_KEY environment variable.');
         }
         this._model = config.model || 'text-embedding-3-small';
-        this._batchSize = config.batchSize || 100;
+        this._batchSize = config.batchSize || 2048;
+        // OpenAI's limit is 8,192 tokens. Use 8,000 as default for max throughput.
+        this._maxTokensPerBatch = config.maxTokensPerBatch || 8000;
     }
     get dimension() {
         return exports.MODEL_DIMENSIONS[this._model] || 1536;
@@ -125,35 +127,104 @@ class OpenAIEmbeddings {
     get provider() {
         return 'openai';
     }
+    /**
+     * Estimate token count for a text string.
+     * Using 3.5 chars/token - balanced for mixed content (prose + data).
+     * For pure prose: ~4 chars/token. For numbers/symbols: ~2 chars/token.
+     */
+    estimateTokens(text) {
+        return Math.ceil(text.length / 3.5);
+    }
+    /**
+     * Truncate text to fit within token limit.
+     * Preserves beginning of text as it typically contains the most important context.
+     * Uses conservative 3.0 chars/token for truncation to handle mixed content safely.
+     */
+    truncateToTokenLimit(text) {
+        // Use conservative limit for truncation: 7800 tokens max for single text
+        const maxTokensForSingleText = Math.min(this._maxTokensPerBatch, 7800);
+        // Use 3.0 chars/token for safe truncation
+        const maxChars = Math.floor(maxTokensForSingleText * 3.0);
+        if (text.length <= maxChars) {
+            return text;
+        }
+        return text.slice(0, maxChars);
+    }
+    /**
+     * Split texts into batches respecting both document count and token limits.
+     * This prevents OpenAI API errors when total tokens exceed 8,192.
+     * Automatically truncates individual texts that exceed the token limit.
+     */
+    createTokenAwareBatches(texts) {
+        const batches = [];
+        let currentBatch = [];
+        let currentTokens = 0;
+        for (let text of texts) {
+            let textTokens = this.estimateTokens(text);
+            // Truncate if single text exceeds token limit
+            if (textTokens > this._maxTokensPerBatch) {
+                text = this.truncateToTokenLimit(text);
+                textTokens = this.estimateTokens(text);
+            }
+            const wouldExceedTokens = (currentTokens + textTokens) > this._maxTokensPerBatch;
+            const wouldExceedCount = currentBatch.length >= this._batchSize;
+            if (wouldExceedTokens || wouldExceedCount) {
+                if (currentBatch.length > 0) {
+                    batches.push(currentBatch);
+                }
+                currentBatch = [text];
+                currentTokens = textTokens;
+            }
+            else {
+                currentBatch.push(text);
+                currentTokens += textTokens;
+            }
+        }
+        if (currentBatch.length > 0) {
+            batches.push(currentBatch);
+        }
+        return batches;
+    }
     async embedDocuments(texts) {
         if (texts.length === 0) {
             return [];
         }
-        const allEmbeddings = [];
-        // Process in batches
-        for (let i = 0; i < texts.length; i += this._batchSize) {
-            const batch = texts.slice(i, i + this._batchSize);
-            const response = await fetch('https://api.openai.com/v1/embeddings', {
-                method: 'POST',
-                headers: {
-                    'Authorization': `Bearer ${this._apiKey}`,
-                    'Content-Type': 'application/json',
-                },
-                body: JSON.stringify({
-                    model: this._model,
-                    input: batch,
-                }),
+        // Create token-aware batches to avoid exceeding OpenAI's 8,192 token limit
+        const batches = this.createTokenAwareBatches(texts);
+        // Process batches in parallel (OpenAI allows 3000 RPM, 1M TPM)
+        // Use high concurrency for maximum throughput
+        const CONCURRENCY = 100;
+        const results = [];
+        for (let i = 0; i < batches.length; i += CONCURRENCY) {
+            const batchSlice = batches.slice(i, i + CONCURRENCY);
+            const promises = batchSlice.map(async (batch, sliceIndex) => {
+                const batchIndex = i + sliceIndex;
+                const response = await fetch('https://api.openai.com/v1/embeddings', {
+                    method: 'POST',
+                    headers: {
+                        'Authorization': `Bearer ${this._apiKey}`,
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify({
+                        model: this._model,
+                        input: batch,
+                    }),
+                });
+                if (!response.ok) {
+                    const error = await response.text();
+                    throw new Error(`OpenAI API error: ${response.status} ${error}`);
+                }
+                const data = await response.json();
+                // Sort by index to ensure correct order within batch
+                const sorted = data.data.sort((a, b) => a.index - b.index);
+                return { batchIndex, embeddings: sorted.map(e => e.embedding) };
             });
-            if (!response.ok) {
-                const error = await response.text();
-                throw new Error(`OpenAI API error: ${response.status} ${error}`);
-            }
-            const data = await response.json();
-            // Sort by index to ensure correct order
-            const sorted = data.data.sort((a, b) => a.index - b.index);
-            allEmbeddings.push(...sorted.map(e => e.embedding));
+            const batchResults = await Promise.all(promises);
+            results.push(...batchResults);
         }
-        return allEmbeddings;
+        // Sort by batch index and flatten
+        results.sort((a, b) => a.batchIndex - b.batchIndex);
+        return results.flatMap(r => r.embeddings);
     }
     async embedQuery(text) {
         const response = await fetch('https://api.openai.com/v1/embeddings', {
@@ -529,6 +600,8 @@ class MistralEmbeddings {
         }
         this._model = config.model || 'mistral-embed';
         this._batchSize = config.batchSize || 100;
+        // Mistral's limit is ~16k tokens. Use 15000 as default with safety margin.
+        this._maxTokensPerBatch = config.maxTokensPerBatch || 15000;
     }
     get dimension() {
         return exports.MODEL_DIMENSIONS[this._model] || 1024;
@@ -539,14 +612,59 @@ class MistralEmbeddings {
     get provider() {
         return 'mistral';
     }
+    /**
+     * Estimate token count for a text string.
+     * Using a conservative estimate of 3.5 chars/token.
+     */
+    estimateTokens(text) {
+        return Math.ceil(text.length / 3.5);
+    }
+    /**
+     * Split texts into batches respecting both document count and token limits.
+     */
+    createTokenAwareBatches(texts) {
+        const batches = [];
+        let currentBatch = [];
+        let currentTokens = 0;
+        for (const text of texts) {
+            const textTokens = this.estimateTokens(text);
+            // If single text exceeds token limit, send it alone
+            if (textTokens > this._maxTokensPerBatch) {
+                if (currentBatch.length > 0) {
+                    batches.push(currentBatch);
+                    currentBatch = [];
+                    currentTokens = 0;
+                }
+                batches.push([text]);
+                continue;
+            }
+            const wouldExceedTokens = (currentTokens + textTokens) > this._maxTokensPerBatch;
+            const wouldExceedCount = currentBatch.length >= this._batchSize;
+            if (wouldExceedTokens || wouldExceedCount) {
+                if (currentBatch.length > 0) {
+                    batches.push(currentBatch);
+                }
+                currentBatch = [text];
+                currentTokens = textTokens;
+            }
+            else {
+                currentBatch.push(text);
+                currentTokens += textTokens;
+            }
+        }
+        if (currentBatch.length > 0) {
+            batches.push(currentBatch);
+        }
+        return batches;
+    }
     async embedDocuments(texts) {
         if (texts.length === 0) {
             return [];
         }
         const allEmbeddings = [];
-        // Process in batches
-        for (let i = 0; i < texts.length; i += this._batchSize) {
-            const batch = texts.slice(i, i + this._batchSize);
+        // Create token-aware batches
+        const batches = this.createTokenAwareBatches(texts);
+        for (const batch of batches) {
             const response = await fetch('https://api.mistral.ai/v1/embeddings', {
                 method: 'POST',
                 headers: {
@@ -575,10 +693,164 @@ class MistralEmbeddings {
     }
 }
 exports.MistralEmbeddings = MistralEmbeddings;
+/**
+ * Ollama embedding provider.
+ *
+ * Uses a local Ollama server to generate embeddings. Supports any embedding model
+ * available in Ollama, including nomic-embed-text, mxbai-embed-large, all-minilm, etc.
+ *
+ * @example
+ * ```typescript
+ * // Default: localhost:11434 with nomic-embed-text
+ * const embedder = new OllamaEmbeddings();
+ *
+ * // Custom configuration
+ * const embedder = new OllamaEmbeddings({
+ *   baseUrl: 'http://gpu-server:11434',
+ *   model: 'nomic-embed-text',
+ * });
+ *
+ * // Use with Memvid
+ * const texts = ['Hello world', 'Goodbye world'];
+ * const embeddings = await embedder.embedDocuments(texts);
+ *
+ * // Or embed and store in one step
+ * const embedding = await embedder.embedQuery('Document text...');
+ * await mem.put({
+ *   title: 'My Doc',
+ *   label: 'docs',
+ *   text: 'Document text...',
+ *   embedding,
+ *   embeddingIdentity: {
+ *     provider: 'ollama',
+ *     model: 'nomic-embed-text',
+ *     dimension: embedding.length,
+ *   },
+ * });
+ * ```
+ */
+class OllamaEmbeddings {
+    constructor(config = {}) {
+        const defaultHost = process.env.OLLAMA_HOST || 'http://localhost:11434';
+        this._baseUrl = (config.baseUrl || defaultHost).trim().replace(/\/+$/, '');
+        this._model = config.model || 'nomic-embed-text';
+        this._dimension = config.dimension;
+    }
+    get dimension() {
+        if (this._dimension)
+            return this._dimension;
+        return OllamaEmbeddings.OLLAMA_MODEL_DIMENSIONS[this._model] || 768;
+    }
+    get modelName() {
+        return this._model;
+    }
+    get provider() {
+        return 'ollama';
+    }
+    setDimensionFromEmbedding(embedding) {
+        if (!this._dimension && embedding.length > 0) {
+            this._dimension = embedding.length;
+        }
+    }
+    async embedDocuments(texts) {
+        if (texts.length === 0) {
+            return [];
+        }
+        // Ollama doesn't support batch embedding, so we process one at a time
+        // For better performance, consider using Promise.all with concurrency limit
+        const embeddings = [];
+        for (const text of texts) {
+            const response = await fetch(`${this._baseUrl}/api/embeddings`, {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                },
+                body: JSON.stringify({
+                    model: this._model,
+                    prompt: text,
+                }),
+            });
+            if (!response.ok) {
+                const error = await response.text();
+                throw new Error(`Ollama API error: ${response.status} ${error}`);
+            }
+            const data = await response.json();
+            if (!Array.isArray(data.embedding)) {
+                throw new Error(`Ollama API error: invalid response format`);
+            }
+            this.setDimensionFromEmbedding(data.embedding);
+            embeddings.push(data.embedding);
+        }
+        return embeddings;
+    }
+    async embedQuery(text) {
+        const response = await fetch(`${this._baseUrl}/api/embeddings`, {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json',
+            },
+            body: JSON.stringify({
+                model: this._model,
+                prompt: text,
+            }),
+        });
+        if (!response.ok) {
+            const error = await response.text();
+            throw new Error(`Ollama API error: ${response.status} ${error}`);
+        }
+        const data = await response.json();
+        if (!Array.isArray(data.embedding)) {
+            throw new Error(`Ollama API error: invalid response format`);
+        }
+        this.setDimensionFromEmbedding(data.embedding);
+        return data.embedding;
+    }
+}
+exports.OllamaEmbeddings = OllamaEmbeddings;
+// Known model dimensions for popular Ollama embedding models
+OllamaEmbeddings.OLLAMA_MODEL_DIMENSIONS = {
+    // General purpose
+    'nomic-embed-text': 768,
+    'nomic-embed-text:v1': 768,
+    'nomic-embed-text:v1.5': 768,
+    'mxbai-embed-large': 1024,
+    'mxbai-embed-large:v1': 1024,
+    'all-minilm': 384,
+    'all-minilm:l6-v2': 384,
+    'all-minilm:l12-v2': 384,
+    // Snowflake Arctic (various sizes)
+    'snowflake-arctic-embed': 1024,
+    'snowflake-arctic-embed:s': 384,
+    'snowflake-arctic-embed:m': 768,
+    'snowflake-arctic-embed:l': 1024,
+    'snowflake-arctic-embed:335m': 1024,
+    // BGE models
+    'bge-m3': 1024,
+    'bge-large': 1024,
+    'bge-large:en': 1024,
+    'bge-large:en-v1.5': 1024,
+    'bge-base': 768,
+    'bge-base:en': 768,
+    'bge-base:en-v1.5': 768,
+    'bge-small': 384,
+    'bge-small:en': 384,
+    'bge-small:en-v1.5': 384,
+    // Jina embeddings
+    'jina-embeddings-v2-base-en': 768,
+    'jina-embeddings-v2-small-en': 512,
+    // Multilingual
+    'paraphrase-multilingual': 768,
+    'paraphrase-multilingual:mpnet-base-v2': 768,
+    // E5 models
+    'e5-large': 1024,
+    'e5-base': 768,
+    'e5-small': 384,
+    'e5-mistral-7b-instruct': 4096,
+};
 /**
  * Factory function to create an embedding provider.
  *
- * @param provider - One of: 'openai', 'cohere', 'voyage', 'nvidia', 'gemini', 'mistral'
+ * @param provider - One of: 'openai', 'cohere', 'voyage', 'nvidia', 'gemini', 'mistral', 'ollama'
  * @param config - Provider-specific configuration
  * @returns EmbeddingProvider instance
  *
@@ -588,6 +860,8 @@ exports.MistralEmbeddings = MistralEmbeddings;
  * const embedder = getEmbedder('cohere', { model: 'embed-multilingual-v3.0' });
  * const embedder = getEmbedder('gemini'); // Uses GOOGLE_API_KEY or GEMINI_API_KEY
  * const embedder = getEmbedder('mistral'); // Uses MISTRAL_API_KEY
+ * const embedder = getEmbedder('ollama'); // Uses local Ollama server
+ * const embedder = getEmbedder('ollama', { model: 'nomic-embed-text', baseUrl: 'http://gpu:11434' });
  * ```
  */
 function getEmbedder(provider, config) {
@@ -605,7 +879,9 @@ function getEmbedder(provider, config) {
             return new GeminiEmbeddings(config);
         case 'mistral':
             return new MistralEmbeddings(config);
+        case 'ollama':
+            return new OllamaEmbeddings(config);
         default:
-            throw new Error(`Unknown provider: ${provider}. Supported: openai, cohere, voyage, nvidia, gemini, mistral`);
+            throw new Error(`Unknown provider: ${provider}. Supported: openai, cohere, voyage, nvidia, gemini, mistral, ollama`);
     }
 }

package/dist/index.d.ts CHANGED Viewed

@@ -409,7 +409,7 @@ export declare function verifyMemvid(path: string, options?: UseVerifyOptions):
 export declare function doctorMemvid(path: string, options?: UseDoctorOptions): Promise<unknown>;
 export type { AddMemoryCardsResult, Kind, ApiKey, Memvid, MemoryCard, MemoryCardInput, MemoriesResult, MemoriesStats, LockOptions, UseOptions, UnlockOptions, FindInput, VecSearchInput, AskInput, TimelineInput, PutInput, PutManyInput, PutManyOptions, MemvidErrorCode, MemvidErrorDetails, HeatmapEntry, HeatmapResponse, SessionSummary, SessionReplayResult, SessionActionResult, StatsResult, FindHit, FindResult, VecSearchResult, AskResult, AskStats, AskUsage, AskSource, Grounding, FollowUp, TimelineEntry, } from "./types";
 export { MemvidError, CapacityExceededError, TicketInvalidError, TicketReplayError, LexIndexDisabledError, TimeIndexMissingError, VerificationFailedError, LockedError, ApiKeyRequiredError, FileNotFoundError, MemoryAlreadyBoundError, FrameNotFoundError, VecIndexDisabledError, CorruptFileError, VecDimensionMismatchError, EmbeddingFailedError, EncryptionError, QuotaExceededError, getErrorSuggestion, } from "./error";
-export { EmbeddingProvider, OpenAIEmbeddings, OpenAIEmbeddingsConfig, CohereEmbeddings, CohereEmbeddingsConfig, VoyageEmbeddings, VoyageEmbeddingsConfig, NvidiaEmbeddings, NvidiaEmbeddingsConfig, GeminiEmbeddings, GeminiEmbeddingsConfig, MistralEmbeddings, MistralEmbeddingsConfig, getEmbedder, MODEL_DIMENSIONS, LOCAL_EMBEDDING_MODELS, LocalEmbeddingModel, } from "./embeddings";
+export { EmbeddingProvider, OpenAIEmbeddings, OpenAIEmbeddingsConfig, CohereEmbeddings, CohereEmbeddingsConfig, VoyageEmbeddings, VoyageEmbeddingsConfig, NvidiaEmbeddings, NvidiaEmbeddingsConfig, GeminiEmbeddings, GeminiEmbeddingsConfig, MistralEmbeddings, MistralEmbeddingsConfig, OllamaEmbeddings, OllamaEmbeddingsConfig, getEmbedder, MODEL_DIMENSIONS, LOCAL_EMBEDDING_MODELS, LocalEmbeddingModel, } from "./embeddings";
 export { flush as flushAnalytics, isTelemetryEnabled } from "./analytics";
 /**
  * Mask PII (Personally Identifiable Information) in text.

package/dist/index.js CHANGED Viewed

@@ -36,7 +36,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
     for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.isTelemetryEnabled = exports.flushAnalytics = exports.LOCAL_EMBEDDING_MODELS = exports.MODEL_DIMENSIONS = exports.getEmbedder = exports.MistralEmbeddings = exports.GeminiEmbeddings = exports.NvidiaEmbeddings = exports.VoyageEmbeddings = exports.CohereEmbeddings = exports.OpenAIEmbeddings = exports.getErrorSuggestion = exports.QuotaExceededError = exports.EncryptionError = exports.EmbeddingFailedError = exports.VecDimensionMismatchError = exports.CorruptFileError = exports.VecIndexDisabledError = exports.FrameNotFoundError = exports.MemoryAlreadyBoundError = exports.FileNotFoundError = exports.ApiKeyRequiredError = exports.LockedError = exports.VerificationFailedError = exports.TimeIndexMissingError = exports.LexIndexDisabledError = exports.TicketReplayError = exports.TicketInvalidError = exports.CapacityExceededError = exports.MemvidError = exports.use = exports.GeminiEntities = exports.ClaudeEntities = exports.OpenAIEntities = exports.LocalNER = exports.getEntityExtractor = exports.GeminiClip = exports.OpenAIClip = exports.LocalClip = exports.getClipProvider = exports.entities = exports.clip = void 0;
+exports.isTelemetryEnabled = exports.flushAnalytics = exports.LOCAL_EMBEDDING_MODELS = exports.MODEL_DIMENSIONS = exports.getEmbedder = exports.OllamaEmbeddings = exports.MistralEmbeddings = exports.GeminiEmbeddings = exports.NvidiaEmbeddings = exports.VoyageEmbeddings = exports.CohereEmbeddings = exports.OpenAIEmbeddings = exports.getErrorSuggestion = exports.QuotaExceededError = exports.EncryptionError = exports.EmbeddingFailedError = exports.VecDimensionMismatchError = exports.CorruptFileError = exports.VecIndexDisabledError = exports.FrameNotFoundError = exports.MemoryAlreadyBoundError = exports.FileNotFoundError = exports.ApiKeyRequiredError = exports.LockedError = exports.VerificationFailedError = exports.TimeIndexMissingError = exports.LexIndexDisabledError = exports.TicketReplayError = exports.TicketInvalidError = exports.CapacityExceededError = exports.MemvidError = exports.use = exports.GeminiEntities = exports.ClaudeEntities = exports.OpenAIEntities = exports.LocalNER = exports.getEntityExtractor = exports.GeminiClip = exports.OpenAIClip = exports.LocalClip = exports.getClipProvider = exports.entities = exports.clip = void 0;
 exports.configure = configure;
 exports.getConfig = getConfig;
 exports.resetConfig = resetConfig;
@@ -829,6 +829,8 @@ function normalisePutArgs(input) {
         extractDates: input.extractDates,
         vectorCompression: input.vectorCompression,
         timestamp: input.timestamp,
+        embedding: input.embedding,
+        embeddingIdentity: input.embeddingIdentity,
     };
     return payload;
 }
@@ -1149,19 +1151,23 @@ class MemvidImpl {
                 embeddingIdentity: req.embeddingIdentity,
             }));
             // If an external embedder is provided, embeddings are already attached and
-            // native auto-embedding should not run.
+            // native auto-embedding should not run. Explicitly disable to prevent ONNX load.
             const nativeOptions = options
                 ? embedder
                     ? {
                         compressionLevel: options.compressionLevel,
+                        enableEnrichment: options.enableEnrichment,
+                        enableEmbedding: false, // Embeddings already attached, skip native embedding
                     }
                     : {
                         compressionLevel: options.compressionLevel,
                         enableEmbedding: options.enableEmbedding,
                         embeddingModel: options.embeddingModel,
+                        enableEnrichment: options.enableEnrichment,
                     }
                 : undefined;
-            return this.core.putMany(nativeRequests, nativeOptions);
+            const result = await this.core.putMany(nativeRequests, nativeOptions);
+            return result;
         });
     }
     /**
@@ -1873,7 +1879,7 @@ class MemvidImpl {
      * @returns Result with framesAdded count
      */
     async putFile(filePath, options) {
-        const { parse, getDocumentType } = await Promise.resolve().then(() => __importStar(require("./documents/index.js")));
+        const { parse, getDocumentType } = await Promise.resolve().then(() => __importStar(require("./documents/index")));
         const { basename } = await Promise.resolve().then(() => __importStar(require("path")));
         const filename = basename(filePath);
         const docType = getDocumentType(filePath);
@@ -1909,14 +1915,35 @@ class MemvidImpl {
         if (result === null) {
             throw new Error(`Failed to parse document: ${filename}`);
         }
+        // Chunk text into smaller pieces (matches CLI behavior for better retrieval)
+        const chunkSize = options?.chunkSize ?? 1000;
+        const chunkText = (text, size) => {
+            if (text.length <= size)
+                return [text];
+            const chunks = [];
+            const lines = text.split('\n');
+            let current = '';
+            for (const line of lines) {
+                if (current.length + line.length + 1 > size && current.length > 0) {
+                    chunks.push(current.trim());
+                    current = line;
+                }
+                else {
+                    current = current ? current + '\n' + line : line;
+                }
+            }
+            if (current.trim())
+                chunks.push(current.trim());
+            return chunks;
+        };
         // Build items for batch processing with putMany (6x faster than individual put())
         const items = [];
         for (const item of result.items) {
-            let title;
-            let metadata;
+            let baseTitle;
+            let itemMetadata;
             if (result.type === "pdf") {
-                title = `${result.filename} [Page ${item.number}]`;
-                metadata = {
+                baseTitle = `${result.filename} [Page ${item.number}]`;
+                itemMetadata = {
                     ...baseMetadata,
                     doc_name: result.filename,
                     doc_type: result.type,
@@ -1925,8 +1952,8 @@ class MemvidImpl {
                 };
             }
             else if (result.type === "xlsx") {
-                title = `${result.filename} [Sheet: ${item.name}]`;
-                metadata = {
+                baseTitle = `${result.filename} [Sheet: ${item.name}]`;
+                itemMetadata = {
                     ...baseMetadata,
                     doc_name: result.filename,
                     doc_type: result.type,
@@ -1936,8 +1963,8 @@ class MemvidImpl {
                 };
             }
             else if (result.type === "pptx") {
-                title = `${result.filename} [Slide ${item.number}]`;
-                metadata = {
+                baseTitle = `${result.filename} [Slide ${item.number}]`;
+                itemMetadata = {
                     ...baseMetadata,
                     doc_name: result.filename,
                     doc_type: result.type,
@@ -1948,19 +1975,28 @@ class MemvidImpl {
             }
             else {
                 // docx
-                title = result.filename;
-                metadata = {
+                baseTitle = result.filename;
+                itemMetadata = {
                     ...baseMetadata,
                     doc_name: result.filename,
                     doc_type: result.type,
                 };
             }
-            items.push({
-                title,
-                labels: label ? [label] : undefined,
-                text: item.text,
-                metadata,
-            });
+            // Chunk content for better retrieval granularity
+            const chunks = chunkText(item.text, chunkSize);
+            for (let i = 0; i < chunks.length; i++) {
+                const title = chunks.length > 1 ? `${baseTitle} [Chunk ${i + 1}/${chunks.length}]` : baseTitle;
+                items.push({
+                    title,
+                    labels: label ? [label] : undefined,
+                    text: chunks[i],
+                    metadata: {
+                        ...itemMetadata,
+                        chunk_index: i,
+                        total_chunks: chunks.length,
+                    },
+                });
+            }
         }
         // Use putMany for fast batch ingestion
         // Note: Call rebuildTimeIndex() after seal() if using ask() with temporal queries
@@ -1968,6 +2004,7 @@ class MemvidImpl {
             embedder,
             enableEmbedding: embedder ? undefined : options?.enableEmbedding,
             embeddingModel: embedder ? undefined : options?.embeddingModel,
+            enableEnrichment: options?.enableEnrichment,
         });
         (0, analytics_1.trackCommand)(this.filename, "putFile", true);
         return { framesAdded: items.length, type: result.type, filename: result.filename };
@@ -2268,6 +2305,7 @@ Object.defineProperty(exports, "VoyageEmbeddings", { enumerable: true, get: func
 Object.defineProperty(exports, "NvidiaEmbeddings", { enumerable: true, get: function () { return embeddings_1.NvidiaEmbeddings; } });
 Object.defineProperty(exports, "GeminiEmbeddings", { enumerable: true, get: function () { return embeddings_1.GeminiEmbeddings; } });
 Object.defineProperty(exports, "MistralEmbeddings", { enumerable: true, get: function () { return embeddings_1.MistralEmbeddings; } });
+Object.defineProperty(exports, "OllamaEmbeddings", { enumerable: true, get: function () { return embeddings_1.OllamaEmbeddings; } });
 Object.defineProperty(exports, "getEmbedder", { enumerable: true, get: function () { return embeddings_1.getEmbedder; } });
 Object.defineProperty(exports, "MODEL_DIMENSIONS", { enumerable: true, get: function () { return embeddings_1.MODEL_DIMENSIONS; } });
 Object.defineProperty(exports, "LOCAL_EMBEDDING_MODELS", { enumerable: true, get: function () { return embeddings_1.LOCAL_EMBEDDING_MODELS; } });

package/dist/types.d.ts CHANGED Viewed

@@ -43,6 +43,10 @@ export interface PutInput {
      * "Jan 15, 2023", "2023-01-15", "01/15/2023"
      */
     timestamp?: number | string;
+    /** Optional pre-computed embedding vector (use with external embedding providers like Ollama) */
+    embedding?: number[];
+    /** Optional embedding identity metadata for `embedding` (enables CLI/SDK auto-detection). */
+    embeddingIdentity?: EmbeddingIdentity;
 }
 export interface FindInput {
     /**
@@ -154,6 +158,8 @@ export interface PutManyOptions {
     embeddingModel?: string;
     /** Optional external embedder to generate embeddings for requests that omit `embedding`. */
     embedder?: EmbeddingProvider;
+    /** Enable rules-based enrichment (default: true). Set to false for faster ingestion. */
+    enableEnrichment?: boolean;
 }
 /** Options for correct() - stores a correction with retrieval priority boost */
 export interface CorrectOptions {
@@ -191,6 +197,7 @@ export interface NativePutManyOptions {
     compressionLevel?: number;
     enableEmbedding?: boolean;
     embeddingModel?: string;
+    enableEnrichment?: boolean;
 }
 export interface NativePutArgs {
     title?: string;
@@ -211,6 +218,10 @@ export interface NativePutArgs {
     vectorCompression?: boolean;
     /** Timestamp (epoch seconds or human-readable string) */
     timestamp?: number | string;
+    /** Optional pre-computed embedding vector */
+    embedding?: number[];
+    /** Optional embedding identity metadata */
+    embeddingIdentity?: EmbeddingIdentity;
 }
 export interface NativeFindOptions {
     k?: number;
@@ -592,6 +603,49 @@ export interface Memvid {
      * Returns an array of frame IDs for the ingested documents.
      */
     putMany(requests: PutManyInput[], options?: PutManyOptions): Promise<string[]>;
+    /**
+     * Ingest a document file (PDF, XLSX, PPTX, DOCX) with automatic parsing.
+     * Each page/sheet/slide becomes a separate frame with proper metadata.
+     */
+    putFile(filePath: string, options?: {
+        label?: string;
+        metadata?: Record<string, unknown>;
+        enableEmbedding?: boolean;
+        embeddingModel?: string;
+        embedder?: EmbeddingProvider;
+        vectorCompression?: boolean;
+        autoTag?: boolean;
+        extractDates?: boolean;
+        enableEnrichment?: boolean;
+        /** Chunk size in characters (default: 1000, matches CLI behavior) */
+        chunkSize?: number;
+    }): Promise<{
+        framesAdded: number;
+        type: string;
+        filename: string;
+    }>;
+    /**
+     * Ingest multiple document files from a directory.
+     */
+    putFiles(dirPath: string, options?: {
+        label?: string;
+        extensions?: string[];
+        metadata?: Record<string, unknown>;
+        enableEmbedding?: boolean;
+        embeddingModel?: string;
+        embedder?: EmbeddingProvider;
+        vectorCompression?: boolean;
+        autoTag?: boolean;
+        extractDates?: boolean;
+    }): Promise<{
+        filesProcessed: number;
+        framesAdded: number;
+        files: Array<{
+            filename: string;
+            framesAdded: number;
+            type: string;
+        }>;
+    }>;
     /** Search for documents matching a query. */
     find(query: string, opts?: FindInput): Promise<FindResult>;
     /** Vector similarity search using a pre-computed query embedding (offline-safe). */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@memvid/sdk",
-  "version": "2.0.152",
+  "version": "2.0.154",
   "description": "Single-file AI memory system for Node.js. Store, search, and query documents with built-in RAG.",
   "main": "./dist/index.js",
   "types": "./dist/index.d.ts",
@@ -41,11 +41,11 @@
     "node": ">=18"
   },
   "optionalDependencies": {
-    "@memvid/sdk-darwin-arm64": "2.0.152",
-    "@memvid/sdk-darwin-x64": "2.0.152",
-    "@memvid/sdk-linux-x64-gnu": "2.0.152",
-    "@memvid/sdk-linux-arm64-gnu": "2.0.152",
-    "@memvid/sdk-win32-x64-msvc": "2.0.152"
+    "@memvid/sdk-darwin-arm64": "2.0.154",
+    "@memvid/sdk-darwin-x64": "2.0.154",
+    "@memvid/sdk-linux-x64-gnu": "2.0.154",
+    "@memvid/sdk-linux-arm64-gnu": "2.0.154",
+    "@memvid/sdk-win32-x64-msvc": "2.0.154"
   },
   "peerDependencies": {
     "@langchain/core": ">=0.3.0",
@@ -77,9 +77,6 @@
     "typescript": "^5.4.0"
   },
   "dependencies": {
-    "unpdf": "^1.4.0",
-    "exceljs": "^4.4.0",
-    "officeparser": "^6.0.2",
     "@ai-sdk/openai": "^1.0.0",
     "@google/generative-ai": "^0.24.0",
     "@langchain/langgraph": ">=0.2.0",
@@ -87,7 +84,11 @@
     "@llamaindex/core": ">=0.4.0",
     "@llamaindex/openai": ">=0.2.0",
     "ai": ">=4.0.0",
+    "exceljs": "^4.4.0",
     "langchain": ">=0.3.0",
-    "llamaindex": ">=0.12.0"
+    "llamaindex": ">=0.12.0",
+    "officeparser": "^6.0.2",
+    "unpdf": "^1.4.0",
+    "xlsx": "^0.18.5"
   }
 }