@memvid/sdk 2.0.152 → 2.0.154

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -98,8 +98,10 @@ export interface OpenAIEmbeddingsConfig {
98
98
  apiKey?: string;
99
99
  /** Model to use. Default: 'text-embedding-3-small' */
100
100
  model?: string;
101
- /** Number of texts to embed in a single API call. Default: 100 */
101
+ /** Max number of texts to embed in a single API call. Default: 2048 */
102
102
  batchSize?: number;
103
+ /** Max tokens per batch (OpenAI limit is 8191). Default: 8000 (with safety margin) */
104
+ maxTokensPerBatch?: number;
103
105
  }
104
106
  /**
105
107
  * OpenAI embedding provider.
@@ -118,10 +120,29 @@ export declare class OpenAIEmbeddings implements EmbeddingProvider {
118
120
  private readonly _apiKey;
119
121
  private readonly _model;
120
122
  private readonly _batchSize;
123
+ private readonly _maxTokensPerBatch;
121
124
  constructor(config?: OpenAIEmbeddingsConfig);
122
125
  get dimension(): number;
123
126
  get modelName(): string;
124
127
  get provider(): string;
128
+ /**
129
+ * Estimate token count for a text string.
130
+ * Using 3.5 chars/token - balanced for mixed content (prose + data).
131
+ * For pure prose: ~4 chars/token. For numbers/symbols: ~2 chars/token.
132
+ */
133
+ private estimateTokens;
134
+ /**
135
+ * Truncate text to fit within token limit.
136
+ * Preserves beginning of text as it typically contains the most important context.
137
+ * Uses conservative 3.0 chars/token for truncation to handle mixed content safely.
138
+ */
139
+ private truncateToTokenLimit;
140
+ /**
141
+ * Split texts into batches respecting both document count and token limits.
142
+ * This prevents OpenAI API errors when total tokens exceed 8,192.
143
+ * Automatically truncates individual texts that exceed the token limit.
144
+ */
145
+ private createTokenAwareBatches;
125
146
  embedDocuments(texts: string[]): Promise<number[][]>;
126
147
  embedQuery(text: string): Promise<number[]>;
127
148
  }
@@ -285,8 +306,10 @@ export interface MistralEmbeddingsConfig {
285
306
  apiKey?: string;
286
307
  /** Model to use. Default: 'mistral-embed' */
287
308
  model?: string;
288
- /** Number of texts to embed in a single API call. Default: 100 */
309
+ /** Max number of texts to embed in a single API call. Default: 100 */
289
310
  batchSize?: number;
311
+ /** Max tokens per batch (Mistral limit is ~16k). Default: 15000 (with safety margin) */
312
+ maxTokensPerBatch?: number;
290
313
  }
291
314
  /**
292
315
  * Mistral AI embedding provider.
@@ -304,17 +327,87 @@ export declare class MistralEmbeddings implements EmbeddingProvider {
304
327
  private readonly _apiKey;
305
328
  private readonly _model;
306
329
  private readonly _batchSize;
330
+ private readonly _maxTokensPerBatch;
307
331
  constructor(config?: MistralEmbeddingsConfig);
308
332
  get dimension(): number;
309
333
  get modelName(): string;
310
334
  get provider(): string;
335
+ /**
336
+ * Estimate token count for a text string.
337
+ * Using a conservative estimate of 3.5 chars/token.
338
+ */
339
+ private estimateTokens;
340
+ /**
341
+ * Split texts into batches respecting both document count and token limits.
342
+ */
343
+ private createTokenAwareBatches;
344
+ embedDocuments(texts: string[]): Promise<number[][]>;
345
+ embedQuery(text: string): Promise<number[]>;
346
+ }
347
+ /**
348
+ * Ollama embedding provider configuration.
349
+ */
350
+ export interface OllamaEmbeddingsConfig {
351
+ /** Ollama server URL. Default: 'http://localhost:11434' or OLLAMA_HOST env var */
352
+ baseUrl?: string;
353
+ /** Model to use. Default: 'nomic-embed-text' */
354
+ model?: string;
355
+ /** Known embedding dimension. If omitted, auto-detected on first call. */
356
+ dimension?: number;
357
+ }
358
+ /**
359
+ * Ollama embedding provider.
360
+ *
361
+ * Uses a local Ollama server to generate embeddings. Supports any embedding model
362
+ * available in Ollama, including nomic-embed-text, mxbai-embed-large, all-minilm, etc.
363
+ *
364
+ * @example
365
+ * ```typescript
366
+ * // Default: localhost:11434 with nomic-embed-text
367
+ * const embedder = new OllamaEmbeddings();
368
+ *
369
+ * // Custom configuration
370
+ * const embedder = new OllamaEmbeddings({
371
+ * baseUrl: 'http://gpu-server:11434',
372
+ * model: 'nomic-embed-text',
373
+ * });
374
+ *
375
+ * // Use with Memvid
376
+ * const texts = ['Hello world', 'Goodbye world'];
377
+ * const embeddings = await embedder.embedDocuments(texts);
378
+ *
379
+ * // Or embed and store in one step
380
+ * const embedding = await embedder.embedQuery('Document text...');
381
+ * await mem.put({
382
+ * title: 'My Doc',
383
+ * label: 'docs',
384
+ * text: 'Document text...',
385
+ * embedding,
386
+ * embeddingIdentity: {
387
+ * provider: 'ollama',
388
+ * model: 'nomic-embed-text',
389
+ * dimension: embedding.length,
390
+ * },
391
+ * });
392
+ * ```
393
+ */
394
+ export declare class OllamaEmbeddings implements EmbeddingProvider {
395
+ private readonly _baseUrl;
396
+ private readonly _model;
397
+ private _dimension?;
398
+ constructor(config?: OllamaEmbeddingsConfig);
399
+ private static readonly OLLAMA_MODEL_DIMENSIONS;
400
+ get dimension(): number;
401
+ get modelName(): string;
402
+ get provider(): string;
403
+ private setDimensionFromEmbedding;
311
404
  embedDocuments(texts: string[]): Promise<number[][]>;
312
405
  embedQuery(text: string): Promise<number[]>;
313
406
  }
314
407
  /**
315
408
  * Factory function to create an embedding provider.
316
409
  *
317
- * @param provider - One of: 'openai', 'cohere', 'voyage', 'nvidia', 'gemini', 'mistral'
410
+ * @param provider - One of: 'openai', 'cohere', 'voyage', 'nvidia', 'gemini', 'mistral', 'ollama'
318
411
  * @param config - Provider-specific configuration
319
412
  * @returns EmbeddingProvider instance
320
413
  *
@@ -324,6 +417,8 @@ export declare class MistralEmbeddings implements EmbeddingProvider {
324
417
  * const embedder = getEmbedder('cohere', { model: 'embed-multilingual-v3.0' });
325
418
  * const embedder = getEmbedder('gemini'); // Uses GOOGLE_API_KEY or GEMINI_API_KEY
326
419
  * const embedder = getEmbedder('mistral'); // Uses MISTRAL_API_KEY
420
+ * const embedder = getEmbedder('ollama'); // Uses local Ollama server
421
+ * const embedder = getEmbedder('ollama', { model: 'nomic-embed-text', baseUrl: 'http://gpu:11434' });
327
422
  * ```
328
423
  */
329
- export declare function getEmbedder(provider: 'openai' | 'cohere' | 'voyage' | 'nvidia' | 'gemini' | 'mistral', config?: Record<string, unknown>): EmbeddingProvider;
424
+ export declare function getEmbedder(provider: 'openai' | 'cohere' | 'voyage' | 'nvidia' | 'gemini' | 'mistral' | 'ollama', config?: Record<string, unknown>): EmbeddingProvider;
@@ -26,7 +26,7 @@
26
26
  * ```
27
27
  */
28
28
  Object.defineProperty(exports, "__esModule", { value: true });
29
- exports.MistralEmbeddings = exports.GeminiEmbeddings = exports.NvidiaEmbeddings = exports.VoyageEmbeddings = exports.CohereEmbeddings = exports.OpenAIEmbeddings = exports.LOCAL_EMBEDDING_MODELS = exports.MODEL_DIMENSIONS = void 0;
29
+ exports.OllamaEmbeddings = exports.MistralEmbeddings = exports.GeminiEmbeddings = exports.NvidiaEmbeddings = exports.VoyageEmbeddings = exports.CohereEmbeddings = exports.OpenAIEmbeddings = exports.LOCAL_EMBEDDING_MODELS = exports.MODEL_DIMENSIONS = void 0;
30
30
  exports.getEmbedder = getEmbedder;
31
31
  /**
32
32
  * Model dimension mappings for common embedding models.
@@ -114,7 +114,9 @@ class OpenAIEmbeddings {
114
114
  throw new Error('OpenAI API key required. Pass apiKey or set OPENAI_API_KEY environment variable.');
115
115
  }
116
116
  this._model = config.model || 'text-embedding-3-small';
117
- this._batchSize = config.batchSize || 100;
117
+ this._batchSize = config.batchSize || 2048;
118
+ // OpenAI's limit is 8,192 tokens. Use 8,000 as default for max throughput.
119
+ this._maxTokensPerBatch = config.maxTokensPerBatch || 8000;
118
120
  }
119
121
  get dimension() {
120
122
  return exports.MODEL_DIMENSIONS[this._model] || 1536;
@@ -125,35 +127,104 @@ class OpenAIEmbeddings {
125
127
  get provider() {
126
128
  return 'openai';
127
129
  }
130
+ /**
131
+ * Estimate token count for a text string.
132
+ * Using 3.5 chars/token - balanced for mixed content (prose + data).
133
+ * For pure prose: ~4 chars/token. For numbers/symbols: ~2 chars/token.
134
+ */
135
+ estimateTokens(text) {
136
+ return Math.ceil(text.length / 3.5);
137
+ }
138
+ /**
139
+ * Truncate text to fit within token limit.
140
+ * Preserves beginning of text as it typically contains the most important context.
141
+ * Uses conservative 3.0 chars/token for truncation to handle mixed content safely.
142
+ */
143
+ truncateToTokenLimit(text) {
144
+ // Use conservative limit for truncation: 7800 tokens max for single text
145
+ const maxTokensForSingleText = Math.min(this._maxTokensPerBatch, 7800);
146
+ // Use 3.0 chars/token for safe truncation
147
+ const maxChars = Math.floor(maxTokensForSingleText * 3.0);
148
+ if (text.length <= maxChars) {
149
+ return text;
150
+ }
151
+ return text.slice(0, maxChars);
152
+ }
153
+ /**
154
+ * Split texts into batches respecting both document count and token limits.
155
+ * This prevents OpenAI API errors when total tokens exceed 8,192.
156
+ * Automatically truncates individual texts that exceed the token limit.
157
+ */
158
+ createTokenAwareBatches(texts) {
159
+ const batches = [];
160
+ let currentBatch = [];
161
+ let currentTokens = 0;
162
+ for (let text of texts) {
163
+ let textTokens = this.estimateTokens(text);
164
+ // Truncate if single text exceeds token limit
165
+ if (textTokens > this._maxTokensPerBatch) {
166
+ text = this.truncateToTokenLimit(text);
167
+ textTokens = this.estimateTokens(text);
168
+ }
169
+ const wouldExceedTokens = (currentTokens + textTokens) > this._maxTokensPerBatch;
170
+ const wouldExceedCount = currentBatch.length >= this._batchSize;
171
+ if (wouldExceedTokens || wouldExceedCount) {
172
+ if (currentBatch.length > 0) {
173
+ batches.push(currentBatch);
174
+ }
175
+ currentBatch = [text];
176
+ currentTokens = textTokens;
177
+ }
178
+ else {
179
+ currentBatch.push(text);
180
+ currentTokens += textTokens;
181
+ }
182
+ }
183
+ if (currentBatch.length > 0) {
184
+ batches.push(currentBatch);
185
+ }
186
+ return batches;
187
+ }
128
188
  async embedDocuments(texts) {
129
189
  if (texts.length === 0) {
130
190
  return [];
131
191
  }
132
- const allEmbeddings = [];
133
- // Process in batches
134
- for (let i = 0; i < texts.length; i += this._batchSize) {
135
- const batch = texts.slice(i, i + this._batchSize);
136
- const response = await fetch('https://api.openai.com/v1/embeddings', {
137
- method: 'POST',
138
- headers: {
139
- 'Authorization': `Bearer ${this._apiKey}`,
140
- 'Content-Type': 'application/json',
141
- },
142
- body: JSON.stringify({
143
- model: this._model,
144
- input: batch,
145
- }),
192
+ // Create token-aware batches to avoid exceeding OpenAI's 8,192 token limit
193
+ const batches = this.createTokenAwareBatches(texts);
194
+ // Process batches in parallel (OpenAI allows 3000 RPM, 1M TPM)
195
+ // Use high concurrency for maximum throughput
196
+ const CONCURRENCY = 100;
197
+ const results = [];
198
+ for (let i = 0; i < batches.length; i += CONCURRENCY) {
199
+ const batchSlice = batches.slice(i, i + CONCURRENCY);
200
+ const promises = batchSlice.map(async (batch, sliceIndex) => {
201
+ const batchIndex = i + sliceIndex;
202
+ const response = await fetch('https://api.openai.com/v1/embeddings', {
203
+ method: 'POST',
204
+ headers: {
205
+ 'Authorization': `Bearer ${this._apiKey}`,
206
+ 'Content-Type': 'application/json',
207
+ },
208
+ body: JSON.stringify({
209
+ model: this._model,
210
+ input: batch,
211
+ }),
212
+ });
213
+ if (!response.ok) {
214
+ const error = await response.text();
215
+ throw new Error(`OpenAI API error: ${response.status} ${error}`);
216
+ }
217
+ const data = await response.json();
218
+ // Sort by index to ensure correct order within batch
219
+ const sorted = data.data.sort((a, b) => a.index - b.index);
220
+ return { batchIndex, embeddings: sorted.map(e => e.embedding) };
146
221
  });
147
- if (!response.ok) {
148
- const error = await response.text();
149
- throw new Error(`OpenAI API error: ${response.status} ${error}`);
150
- }
151
- const data = await response.json();
152
- // Sort by index to ensure correct order
153
- const sorted = data.data.sort((a, b) => a.index - b.index);
154
- allEmbeddings.push(...sorted.map(e => e.embedding));
222
+ const batchResults = await Promise.all(promises);
223
+ results.push(...batchResults);
155
224
  }
156
- return allEmbeddings;
225
+ // Sort by batch index and flatten
226
+ results.sort((a, b) => a.batchIndex - b.batchIndex);
227
+ return results.flatMap(r => r.embeddings);
157
228
  }
158
229
  async embedQuery(text) {
159
230
  const response = await fetch('https://api.openai.com/v1/embeddings', {
@@ -529,6 +600,8 @@ class MistralEmbeddings {
529
600
  }
530
601
  this._model = config.model || 'mistral-embed';
531
602
  this._batchSize = config.batchSize || 100;
603
+ // Mistral's limit is ~16k tokens. Use 15000 as default with safety margin.
604
+ this._maxTokensPerBatch = config.maxTokensPerBatch || 15000;
532
605
  }
533
606
  get dimension() {
534
607
  return exports.MODEL_DIMENSIONS[this._model] || 1024;
@@ -539,14 +612,59 @@ class MistralEmbeddings {
539
612
  get provider() {
540
613
  return 'mistral';
541
614
  }
615
+ /**
616
+ * Estimate token count for a text string.
617
+ * Using a conservative estimate of 3.5 chars/token.
618
+ */
619
+ estimateTokens(text) {
620
+ return Math.ceil(text.length / 3.5);
621
+ }
622
+ /**
623
+ * Split texts into batches respecting both document count and token limits.
624
+ */
625
+ createTokenAwareBatches(texts) {
626
+ const batches = [];
627
+ let currentBatch = [];
628
+ let currentTokens = 0;
629
+ for (const text of texts) {
630
+ const textTokens = this.estimateTokens(text);
631
+ // If single text exceeds token limit, send it alone
632
+ if (textTokens > this._maxTokensPerBatch) {
633
+ if (currentBatch.length > 0) {
634
+ batches.push(currentBatch);
635
+ currentBatch = [];
636
+ currentTokens = 0;
637
+ }
638
+ batches.push([text]);
639
+ continue;
640
+ }
641
+ const wouldExceedTokens = (currentTokens + textTokens) > this._maxTokensPerBatch;
642
+ const wouldExceedCount = currentBatch.length >= this._batchSize;
643
+ if (wouldExceedTokens || wouldExceedCount) {
644
+ if (currentBatch.length > 0) {
645
+ batches.push(currentBatch);
646
+ }
647
+ currentBatch = [text];
648
+ currentTokens = textTokens;
649
+ }
650
+ else {
651
+ currentBatch.push(text);
652
+ currentTokens += textTokens;
653
+ }
654
+ }
655
+ if (currentBatch.length > 0) {
656
+ batches.push(currentBatch);
657
+ }
658
+ return batches;
659
+ }
542
660
  async embedDocuments(texts) {
543
661
  if (texts.length === 0) {
544
662
  return [];
545
663
  }
546
664
  const allEmbeddings = [];
547
- // Process in batches
548
- for (let i = 0; i < texts.length; i += this._batchSize) {
549
- const batch = texts.slice(i, i + this._batchSize);
665
+ // Create token-aware batches
666
+ const batches = this.createTokenAwareBatches(texts);
667
+ for (const batch of batches) {
550
668
  const response = await fetch('https://api.mistral.ai/v1/embeddings', {
551
669
  method: 'POST',
552
670
  headers: {
@@ -575,10 +693,164 @@ class MistralEmbeddings {
575
693
  }
576
694
  }
577
695
  exports.MistralEmbeddings = MistralEmbeddings;
696
+ /**
697
+ * Ollama embedding provider.
698
+ *
699
+ * Uses a local Ollama server to generate embeddings. Supports any embedding model
700
+ * available in Ollama, including nomic-embed-text, mxbai-embed-large, all-minilm, etc.
701
+ *
702
+ * @example
703
+ * ```typescript
704
+ * // Default: localhost:11434 with nomic-embed-text
705
+ * const embedder = new OllamaEmbeddings();
706
+ *
707
+ * // Custom configuration
708
+ * const embedder = new OllamaEmbeddings({
709
+ * baseUrl: 'http://gpu-server:11434',
710
+ * model: 'nomic-embed-text',
711
+ * });
712
+ *
713
+ * // Use with Memvid
714
+ * const texts = ['Hello world', 'Goodbye world'];
715
+ * const embeddings = await embedder.embedDocuments(texts);
716
+ *
717
+ * // Or embed and store in one step
718
+ * const embedding = await embedder.embedQuery('Document text...');
719
+ * await mem.put({
720
+ * title: 'My Doc',
721
+ * label: 'docs',
722
+ * text: 'Document text...',
723
+ * embedding,
724
+ * embeddingIdentity: {
725
+ * provider: 'ollama',
726
+ * model: 'nomic-embed-text',
727
+ * dimension: embedding.length,
728
+ * },
729
+ * });
730
+ * ```
731
+ */
732
+ class OllamaEmbeddings {
733
+ constructor(config = {}) {
734
+ const defaultHost = process.env.OLLAMA_HOST || 'http://localhost:11434';
735
+ this._baseUrl = (config.baseUrl || defaultHost).trim().replace(/\/+$/, '');
736
+ this._model = config.model || 'nomic-embed-text';
737
+ this._dimension = config.dimension;
738
+ }
739
+ get dimension() {
740
+ if (this._dimension)
741
+ return this._dimension;
742
+ return OllamaEmbeddings.OLLAMA_MODEL_DIMENSIONS[this._model] || 768;
743
+ }
744
+ get modelName() {
745
+ return this._model;
746
+ }
747
+ get provider() {
748
+ return 'ollama';
749
+ }
750
+ setDimensionFromEmbedding(embedding) {
751
+ if (!this._dimension && embedding.length > 0) {
752
+ this._dimension = embedding.length;
753
+ }
754
+ }
755
+ async embedDocuments(texts) {
756
+ if (texts.length === 0) {
757
+ return [];
758
+ }
759
+ // Ollama doesn't support batch embedding, so we process one at a time
760
+ // For better performance, consider using Promise.all with concurrency limit
761
+ const embeddings = [];
762
+ for (const text of texts) {
763
+ const response = await fetch(`${this._baseUrl}/api/embeddings`, {
764
+ method: 'POST',
765
+ headers: {
766
+ 'Content-Type': 'application/json',
767
+ },
768
+ body: JSON.stringify({
769
+ model: this._model,
770
+ prompt: text,
771
+ }),
772
+ });
773
+ if (!response.ok) {
774
+ const error = await response.text();
775
+ throw new Error(`Ollama API error: ${response.status} ${error}`);
776
+ }
777
+ const data = await response.json();
778
+ if (!Array.isArray(data.embedding)) {
779
+ throw new Error(`Ollama API error: invalid response format`);
780
+ }
781
+ this.setDimensionFromEmbedding(data.embedding);
782
+ embeddings.push(data.embedding);
783
+ }
784
+ return embeddings;
785
+ }
786
+ async embedQuery(text) {
787
+ const response = await fetch(`${this._baseUrl}/api/embeddings`, {
788
+ method: 'POST',
789
+ headers: {
790
+ 'Content-Type': 'application/json',
791
+ },
792
+ body: JSON.stringify({
793
+ model: this._model,
794
+ prompt: text,
795
+ }),
796
+ });
797
+ if (!response.ok) {
798
+ const error = await response.text();
799
+ throw new Error(`Ollama API error: ${response.status} ${error}`);
800
+ }
801
+ const data = await response.json();
802
+ if (!Array.isArray(data.embedding)) {
803
+ throw new Error(`Ollama API error: invalid response format`);
804
+ }
805
+ this.setDimensionFromEmbedding(data.embedding);
806
+ return data.embedding;
807
+ }
808
+ }
809
+ exports.OllamaEmbeddings = OllamaEmbeddings;
810
+ // Known model dimensions for popular Ollama embedding models
811
+ OllamaEmbeddings.OLLAMA_MODEL_DIMENSIONS = {
812
+ // General purpose
813
+ 'nomic-embed-text': 768,
814
+ 'nomic-embed-text:v1': 768,
815
+ 'nomic-embed-text:v1.5': 768,
816
+ 'mxbai-embed-large': 1024,
817
+ 'mxbai-embed-large:v1': 1024,
818
+ 'all-minilm': 384,
819
+ 'all-minilm:l6-v2': 384,
820
+ 'all-minilm:l12-v2': 384,
821
+ // Snowflake Arctic (various sizes)
822
+ 'snowflake-arctic-embed': 1024,
823
+ 'snowflake-arctic-embed:s': 384,
824
+ 'snowflake-arctic-embed:m': 768,
825
+ 'snowflake-arctic-embed:l': 1024,
826
+ 'snowflake-arctic-embed:335m': 1024,
827
+ // BGE models
828
+ 'bge-m3': 1024,
829
+ 'bge-large': 1024,
830
+ 'bge-large:en': 1024,
831
+ 'bge-large:en-v1.5': 1024,
832
+ 'bge-base': 768,
833
+ 'bge-base:en': 768,
834
+ 'bge-base:en-v1.5': 768,
835
+ 'bge-small': 384,
836
+ 'bge-small:en': 384,
837
+ 'bge-small:en-v1.5': 384,
838
+ // Jina embeddings
839
+ 'jina-embeddings-v2-base-en': 768,
840
+ 'jina-embeddings-v2-small-en': 512,
841
+ // Multilingual
842
+ 'paraphrase-multilingual': 768,
843
+ 'paraphrase-multilingual:mpnet-base-v2': 768,
844
+ // E5 models
845
+ 'e5-large': 1024,
846
+ 'e5-base': 768,
847
+ 'e5-small': 384,
848
+ 'e5-mistral-7b-instruct': 4096,
849
+ };
578
850
  /**
579
851
  * Factory function to create an embedding provider.
580
852
  *
581
- * @param provider - One of: 'openai', 'cohere', 'voyage', 'nvidia', 'gemini', 'mistral'
853
+ * @param provider - One of: 'openai', 'cohere', 'voyage', 'nvidia', 'gemini', 'mistral', 'ollama'
582
854
  * @param config - Provider-specific configuration
583
855
  * @returns EmbeddingProvider instance
584
856
  *
@@ -588,6 +860,8 @@ exports.MistralEmbeddings = MistralEmbeddings;
588
860
  * const embedder = getEmbedder('cohere', { model: 'embed-multilingual-v3.0' });
589
861
  * const embedder = getEmbedder('gemini'); // Uses GOOGLE_API_KEY or GEMINI_API_KEY
590
862
  * const embedder = getEmbedder('mistral'); // Uses MISTRAL_API_KEY
863
+ * const embedder = getEmbedder('ollama'); // Uses local Ollama server
864
+ * const embedder = getEmbedder('ollama', { model: 'nomic-embed-text', baseUrl: 'http://gpu:11434' });
591
865
  * ```
592
866
  */
593
867
  function getEmbedder(provider, config) {
@@ -605,7 +879,9 @@ function getEmbedder(provider, config) {
605
879
  return new GeminiEmbeddings(config);
606
880
  case 'mistral':
607
881
  return new MistralEmbeddings(config);
882
+ case 'ollama':
883
+ return new OllamaEmbeddings(config);
608
884
  default:
609
- throw new Error(`Unknown provider: ${provider}. Supported: openai, cohere, voyage, nvidia, gemini, mistral`);
885
+ throw new Error(`Unknown provider: ${provider}. Supported: openai, cohere, voyage, nvidia, gemini, mistral, ollama`);
610
886
  }
611
887
  }
package/dist/index.d.ts CHANGED
@@ -409,7 +409,7 @@ export declare function verifyMemvid(path: string, options?: UseVerifyOptions):
409
409
  export declare function doctorMemvid(path: string, options?: UseDoctorOptions): Promise<unknown>;
410
410
  export type { AddMemoryCardsResult, Kind, ApiKey, Memvid, MemoryCard, MemoryCardInput, MemoriesResult, MemoriesStats, LockOptions, UseOptions, UnlockOptions, FindInput, VecSearchInput, AskInput, TimelineInput, PutInput, PutManyInput, PutManyOptions, MemvidErrorCode, MemvidErrorDetails, HeatmapEntry, HeatmapResponse, SessionSummary, SessionReplayResult, SessionActionResult, StatsResult, FindHit, FindResult, VecSearchResult, AskResult, AskStats, AskUsage, AskSource, Grounding, FollowUp, TimelineEntry, } from "./types";
411
411
  export { MemvidError, CapacityExceededError, TicketInvalidError, TicketReplayError, LexIndexDisabledError, TimeIndexMissingError, VerificationFailedError, LockedError, ApiKeyRequiredError, FileNotFoundError, MemoryAlreadyBoundError, FrameNotFoundError, VecIndexDisabledError, CorruptFileError, VecDimensionMismatchError, EmbeddingFailedError, EncryptionError, QuotaExceededError, getErrorSuggestion, } from "./error";
412
- export { EmbeddingProvider, OpenAIEmbeddings, OpenAIEmbeddingsConfig, CohereEmbeddings, CohereEmbeddingsConfig, VoyageEmbeddings, VoyageEmbeddingsConfig, NvidiaEmbeddings, NvidiaEmbeddingsConfig, GeminiEmbeddings, GeminiEmbeddingsConfig, MistralEmbeddings, MistralEmbeddingsConfig, getEmbedder, MODEL_DIMENSIONS, LOCAL_EMBEDDING_MODELS, LocalEmbeddingModel, } from "./embeddings";
412
+ export { EmbeddingProvider, OpenAIEmbeddings, OpenAIEmbeddingsConfig, CohereEmbeddings, CohereEmbeddingsConfig, VoyageEmbeddings, VoyageEmbeddingsConfig, NvidiaEmbeddings, NvidiaEmbeddingsConfig, GeminiEmbeddings, GeminiEmbeddingsConfig, MistralEmbeddings, MistralEmbeddingsConfig, OllamaEmbeddings, OllamaEmbeddingsConfig, getEmbedder, MODEL_DIMENSIONS, LOCAL_EMBEDDING_MODELS, LocalEmbeddingModel, } from "./embeddings";
413
413
  export { flush as flushAnalytics, isTelemetryEnabled } from "./analytics";
414
414
  /**
415
415
  * Mask PII (Personally Identifiable Information) in text.
package/dist/index.js CHANGED
@@ -36,7 +36,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
36
36
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
37
37
  };
38
38
  Object.defineProperty(exports, "__esModule", { value: true });
39
- exports.isTelemetryEnabled = exports.flushAnalytics = exports.LOCAL_EMBEDDING_MODELS = exports.MODEL_DIMENSIONS = exports.getEmbedder = exports.MistralEmbeddings = exports.GeminiEmbeddings = exports.NvidiaEmbeddings = exports.VoyageEmbeddings = exports.CohereEmbeddings = exports.OpenAIEmbeddings = exports.getErrorSuggestion = exports.QuotaExceededError = exports.EncryptionError = exports.EmbeddingFailedError = exports.VecDimensionMismatchError = exports.CorruptFileError = exports.VecIndexDisabledError = exports.FrameNotFoundError = exports.MemoryAlreadyBoundError = exports.FileNotFoundError = exports.ApiKeyRequiredError = exports.LockedError = exports.VerificationFailedError = exports.TimeIndexMissingError = exports.LexIndexDisabledError = exports.TicketReplayError = exports.TicketInvalidError = exports.CapacityExceededError = exports.MemvidError = exports.use = exports.GeminiEntities = exports.ClaudeEntities = exports.OpenAIEntities = exports.LocalNER = exports.getEntityExtractor = exports.GeminiClip = exports.OpenAIClip = exports.LocalClip = exports.getClipProvider = exports.entities = exports.clip = void 0;
39
+ exports.isTelemetryEnabled = exports.flushAnalytics = exports.LOCAL_EMBEDDING_MODELS = exports.MODEL_DIMENSIONS = exports.getEmbedder = exports.OllamaEmbeddings = exports.MistralEmbeddings = exports.GeminiEmbeddings = exports.NvidiaEmbeddings = exports.VoyageEmbeddings = exports.CohereEmbeddings = exports.OpenAIEmbeddings = exports.getErrorSuggestion = exports.QuotaExceededError = exports.EncryptionError = exports.EmbeddingFailedError = exports.VecDimensionMismatchError = exports.CorruptFileError = exports.VecIndexDisabledError = exports.FrameNotFoundError = exports.MemoryAlreadyBoundError = exports.FileNotFoundError = exports.ApiKeyRequiredError = exports.LockedError = exports.VerificationFailedError = exports.TimeIndexMissingError = exports.LexIndexDisabledError = exports.TicketReplayError = exports.TicketInvalidError = exports.CapacityExceededError = exports.MemvidError = exports.use = exports.GeminiEntities = exports.ClaudeEntities = exports.OpenAIEntities = exports.LocalNER = exports.getEntityExtractor = exports.GeminiClip = exports.OpenAIClip = exports.LocalClip = exports.getClipProvider = exports.entities = exports.clip = void 0;
40
40
  exports.configure = configure;
41
41
  exports.getConfig = getConfig;
42
42
  exports.resetConfig = resetConfig;
@@ -829,6 +829,8 @@ function normalisePutArgs(input) {
829
829
  extractDates: input.extractDates,
830
830
  vectorCompression: input.vectorCompression,
831
831
  timestamp: input.timestamp,
832
+ embedding: input.embedding,
833
+ embeddingIdentity: input.embeddingIdentity,
832
834
  };
833
835
  return payload;
834
836
  }
@@ -1149,19 +1151,23 @@ class MemvidImpl {
1149
1151
  embeddingIdentity: req.embeddingIdentity,
1150
1152
  }));
1151
1153
  // If an external embedder is provided, embeddings are already attached and
1152
- // native auto-embedding should not run.
1154
+ // native auto-embedding should not run. Explicitly disable to prevent ONNX load.
1153
1155
  const nativeOptions = options
1154
1156
  ? embedder
1155
1157
  ? {
1156
1158
  compressionLevel: options.compressionLevel,
1159
+ enableEnrichment: options.enableEnrichment,
1160
+ enableEmbedding: false, // Embeddings already attached, skip native embedding
1157
1161
  }
1158
1162
  : {
1159
1163
  compressionLevel: options.compressionLevel,
1160
1164
  enableEmbedding: options.enableEmbedding,
1161
1165
  embeddingModel: options.embeddingModel,
1166
+ enableEnrichment: options.enableEnrichment,
1162
1167
  }
1163
1168
  : undefined;
1164
- return this.core.putMany(nativeRequests, nativeOptions);
1169
+ const result = await this.core.putMany(nativeRequests, nativeOptions);
1170
+ return result;
1165
1171
  });
1166
1172
  }
1167
1173
  /**
@@ -1873,7 +1879,7 @@ class MemvidImpl {
1873
1879
  * @returns Result with framesAdded count
1874
1880
  */
1875
1881
  async putFile(filePath, options) {
1876
- const { parse, getDocumentType } = await Promise.resolve().then(() => __importStar(require("./documents/index.js")));
1882
+ const { parse, getDocumentType } = await Promise.resolve().then(() => __importStar(require("./documents/index")));
1877
1883
  const { basename } = await Promise.resolve().then(() => __importStar(require("path")));
1878
1884
  const filename = basename(filePath);
1879
1885
  const docType = getDocumentType(filePath);
@@ -1909,14 +1915,35 @@ class MemvidImpl {
1909
1915
  if (result === null) {
1910
1916
  throw new Error(`Failed to parse document: ${filename}`);
1911
1917
  }
1918
+ // Chunk text into smaller pieces (matches CLI behavior for better retrieval)
1919
+ const chunkSize = options?.chunkSize ?? 1000;
1920
+ const chunkText = (text, size) => {
1921
+ if (text.length <= size)
1922
+ return [text];
1923
+ const chunks = [];
1924
+ const lines = text.split('\n');
1925
+ let current = '';
1926
+ for (const line of lines) {
1927
+ if (current.length + line.length + 1 > size && current.length > 0) {
1928
+ chunks.push(current.trim());
1929
+ current = line;
1930
+ }
1931
+ else {
1932
+ current = current ? current + '\n' + line : line;
1933
+ }
1934
+ }
1935
+ if (current.trim())
1936
+ chunks.push(current.trim());
1937
+ return chunks;
1938
+ };
1912
1939
  // Build items for batch processing with putMany (6x faster than individual put())
1913
1940
  const items = [];
1914
1941
  for (const item of result.items) {
1915
- let title;
1916
- let metadata;
1942
+ let baseTitle;
1943
+ let itemMetadata;
1917
1944
  if (result.type === "pdf") {
1918
- title = `${result.filename} [Page ${item.number}]`;
1919
- metadata = {
1945
+ baseTitle = `${result.filename} [Page ${item.number}]`;
1946
+ itemMetadata = {
1920
1947
  ...baseMetadata,
1921
1948
  doc_name: result.filename,
1922
1949
  doc_type: result.type,
@@ -1925,8 +1952,8 @@ class MemvidImpl {
1925
1952
  };
1926
1953
  }
1927
1954
  else if (result.type === "xlsx") {
1928
- title = `${result.filename} [Sheet: ${item.name}]`;
1929
- metadata = {
1955
+ baseTitle = `${result.filename} [Sheet: ${item.name}]`;
1956
+ itemMetadata = {
1930
1957
  ...baseMetadata,
1931
1958
  doc_name: result.filename,
1932
1959
  doc_type: result.type,
@@ -1936,8 +1963,8 @@ class MemvidImpl {
1936
1963
  };
1937
1964
  }
1938
1965
  else if (result.type === "pptx") {
1939
- title = `${result.filename} [Slide ${item.number}]`;
1940
- metadata = {
1966
+ baseTitle = `${result.filename} [Slide ${item.number}]`;
1967
+ itemMetadata = {
1941
1968
  ...baseMetadata,
1942
1969
  doc_name: result.filename,
1943
1970
  doc_type: result.type,
@@ -1948,19 +1975,28 @@ class MemvidImpl {
1948
1975
  }
1949
1976
  else {
1950
1977
  // docx
1951
- title = result.filename;
1952
- metadata = {
1978
+ baseTitle = result.filename;
1979
+ itemMetadata = {
1953
1980
  ...baseMetadata,
1954
1981
  doc_name: result.filename,
1955
1982
  doc_type: result.type,
1956
1983
  };
1957
1984
  }
1958
- items.push({
1959
- title,
1960
- labels: label ? [label] : undefined,
1961
- text: item.text,
1962
- metadata,
1963
- });
1985
+ // Chunk content for better retrieval granularity
1986
+ const chunks = chunkText(item.text, chunkSize);
1987
+ for (let i = 0; i < chunks.length; i++) {
1988
+ const title = chunks.length > 1 ? `${baseTitle} [Chunk ${i + 1}/${chunks.length}]` : baseTitle;
1989
+ items.push({
1990
+ title,
1991
+ labels: label ? [label] : undefined,
1992
+ text: chunks[i],
1993
+ metadata: {
1994
+ ...itemMetadata,
1995
+ chunk_index: i,
1996
+ total_chunks: chunks.length,
1997
+ },
1998
+ });
1999
+ }
1964
2000
  }
1965
2001
  // Use putMany for fast batch ingestion
1966
2002
  // Note: Call rebuildTimeIndex() after seal() if using ask() with temporal queries
@@ -1968,6 +2004,7 @@ class MemvidImpl {
1968
2004
  embedder,
1969
2005
  enableEmbedding: embedder ? undefined : options?.enableEmbedding,
1970
2006
  embeddingModel: embedder ? undefined : options?.embeddingModel,
2007
+ enableEnrichment: options?.enableEnrichment,
1971
2008
  });
1972
2009
  (0, analytics_1.trackCommand)(this.filename, "putFile", true);
1973
2010
  return { framesAdded: items.length, type: result.type, filename: result.filename };
@@ -2268,6 +2305,7 @@ Object.defineProperty(exports, "VoyageEmbeddings", { enumerable: true, get: func
2268
2305
  Object.defineProperty(exports, "NvidiaEmbeddings", { enumerable: true, get: function () { return embeddings_1.NvidiaEmbeddings; } });
2269
2306
  Object.defineProperty(exports, "GeminiEmbeddings", { enumerable: true, get: function () { return embeddings_1.GeminiEmbeddings; } });
2270
2307
  Object.defineProperty(exports, "MistralEmbeddings", { enumerable: true, get: function () { return embeddings_1.MistralEmbeddings; } });
2308
+ Object.defineProperty(exports, "OllamaEmbeddings", { enumerable: true, get: function () { return embeddings_1.OllamaEmbeddings; } });
2271
2309
  Object.defineProperty(exports, "getEmbedder", { enumerable: true, get: function () { return embeddings_1.getEmbedder; } });
2272
2310
  Object.defineProperty(exports, "MODEL_DIMENSIONS", { enumerable: true, get: function () { return embeddings_1.MODEL_DIMENSIONS; } });
2273
2311
  Object.defineProperty(exports, "LOCAL_EMBEDDING_MODELS", { enumerable: true, get: function () { return embeddings_1.LOCAL_EMBEDDING_MODELS; } });
package/dist/types.d.ts CHANGED
@@ -43,6 +43,10 @@ export interface PutInput {
43
43
  * "Jan 15, 2023", "2023-01-15", "01/15/2023"
44
44
  */
45
45
  timestamp?: number | string;
46
+ /** Optional pre-computed embedding vector (use with external embedding providers like Ollama) */
47
+ embedding?: number[];
48
+ /** Optional embedding identity metadata for `embedding` (enables CLI/SDK auto-detection). */
49
+ embeddingIdentity?: EmbeddingIdentity;
46
50
  }
47
51
  export interface FindInput {
48
52
  /**
@@ -154,6 +158,8 @@ export interface PutManyOptions {
154
158
  embeddingModel?: string;
155
159
  /** Optional external embedder to generate embeddings for requests that omit `embedding`. */
156
160
  embedder?: EmbeddingProvider;
161
+ /** Enable rules-based enrichment (default: true). Set to false for faster ingestion. */
162
+ enableEnrichment?: boolean;
157
163
  }
158
164
  /** Options for correct() - stores a correction with retrieval priority boost */
159
165
  export interface CorrectOptions {
@@ -191,6 +197,7 @@ export interface NativePutManyOptions {
191
197
  compressionLevel?: number;
192
198
  enableEmbedding?: boolean;
193
199
  embeddingModel?: string;
200
+ enableEnrichment?: boolean;
194
201
  }
195
202
  export interface NativePutArgs {
196
203
  title?: string;
@@ -211,6 +218,10 @@ export interface NativePutArgs {
211
218
  vectorCompression?: boolean;
212
219
  /** Timestamp (epoch seconds or human-readable string) */
213
220
  timestamp?: number | string;
221
+ /** Optional pre-computed embedding vector */
222
+ embedding?: number[];
223
+ /** Optional embedding identity metadata */
224
+ embeddingIdentity?: EmbeddingIdentity;
214
225
  }
215
226
  export interface NativeFindOptions {
216
227
  k?: number;
@@ -592,6 +603,49 @@ export interface Memvid {
592
603
  * Returns an array of frame IDs for the ingested documents.
593
604
  */
594
605
  putMany(requests: PutManyInput[], options?: PutManyOptions): Promise<string[]>;
606
+ /**
607
+ * Ingest a document file (PDF, XLSX, PPTX, DOCX) with automatic parsing.
608
+ * Each page/sheet/slide becomes a separate frame with proper metadata.
609
+ */
610
+ putFile(filePath: string, options?: {
611
+ label?: string;
612
+ metadata?: Record<string, unknown>;
613
+ enableEmbedding?: boolean;
614
+ embeddingModel?: string;
615
+ embedder?: EmbeddingProvider;
616
+ vectorCompression?: boolean;
617
+ autoTag?: boolean;
618
+ extractDates?: boolean;
619
+ enableEnrichment?: boolean;
620
+ /** Chunk size in characters (default: 1000, matches CLI behavior) */
621
+ chunkSize?: number;
622
+ }): Promise<{
623
+ framesAdded: number;
624
+ type: string;
625
+ filename: string;
626
+ }>;
627
+ /**
628
+ * Ingest multiple document files from a directory.
629
+ */
630
+ putFiles(dirPath: string, options?: {
631
+ label?: string;
632
+ extensions?: string[];
633
+ metadata?: Record<string, unknown>;
634
+ enableEmbedding?: boolean;
635
+ embeddingModel?: string;
636
+ embedder?: EmbeddingProvider;
637
+ vectorCompression?: boolean;
638
+ autoTag?: boolean;
639
+ extractDates?: boolean;
640
+ }): Promise<{
641
+ filesProcessed: number;
642
+ framesAdded: number;
643
+ files: Array<{
644
+ filename: string;
645
+ framesAdded: number;
646
+ type: string;
647
+ }>;
648
+ }>;
595
649
  /** Search for documents matching a query. */
596
650
  find(query: string, opts?: FindInput): Promise<FindResult>;
597
651
  /** Vector similarity search using a pre-computed query embedding (offline-safe). */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@memvid/sdk",
3
- "version": "2.0.152",
3
+ "version": "2.0.154",
4
4
  "description": "Single-file AI memory system for Node.js. Store, search, and query documents with built-in RAG.",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
@@ -41,11 +41,11 @@
41
41
  "node": ">=18"
42
42
  },
43
43
  "optionalDependencies": {
44
- "@memvid/sdk-darwin-arm64": "2.0.152",
45
- "@memvid/sdk-darwin-x64": "2.0.152",
46
- "@memvid/sdk-linux-x64-gnu": "2.0.152",
47
- "@memvid/sdk-linux-arm64-gnu": "2.0.152",
48
- "@memvid/sdk-win32-x64-msvc": "2.0.152"
44
+ "@memvid/sdk-darwin-arm64": "2.0.154",
45
+ "@memvid/sdk-darwin-x64": "2.0.154",
46
+ "@memvid/sdk-linux-x64-gnu": "2.0.154",
47
+ "@memvid/sdk-linux-arm64-gnu": "2.0.154",
48
+ "@memvid/sdk-win32-x64-msvc": "2.0.154"
49
49
  },
50
50
  "peerDependencies": {
51
51
  "@langchain/core": ">=0.3.0",
@@ -77,9 +77,6 @@
77
77
  "typescript": "^5.4.0"
78
78
  },
79
79
  "dependencies": {
80
- "unpdf": "^1.4.0",
81
- "exceljs": "^4.4.0",
82
- "officeparser": "^6.0.2",
83
80
  "@ai-sdk/openai": "^1.0.0",
84
81
  "@google/generative-ai": "^0.24.0",
85
82
  "@langchain/langgraph": ">=0.2.0",
@@ -87,7 +84,11 @@
87
84
  "@llamaindex/core": ">=0.4.0",
88
85
  "@llamaindex/openai": ">=0.2.0",
89
86
  "ai": ">=4.0.0",
87
+ "exceljs": "^4.4.0",
90
88
  "langchain": ">=0.3.0",
91
- "llamaindex": ">=0.12.0"
89
+ "llamaindex": ">=0.12.0",
90
+ "officeparser": "^6.0.2",
91
+ "unpdf": "^1.4.0",
92
+ "xlsx": "^0.18.5"
92
93
  }
93
94
  }