@memvid/sdk 2.0.153 → 2.0.154

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -98,8 +98,10 @@ export interface OpenAIEmbeddingsConfig {
98
98
  apiKey?: string;
99
99
  /** Model to use. Default: 'text-embedding-3-small' */
100
100
  model?: string;
101
- /** Number of texts to embed in a single API call. Default: 100 */
101
+ /** Max number of texts to embed in a single API call. Default: 2048 */
102
102
  batchSize?: number;
103
+ /** Max tokens per batch (OpenAI limit is 8191). Default: 8000 (with safety margin) */
104
+ maxTokensPerBatch?: number;
103
105
  }
104
106
  /**
105
107
  * OpenAI embedding provider.
@@ -118,10 +120,29 @@ export declare class OpenAIEmbeddings implements EmbeddingProvider {
118
120
  private readonly _apiKey;
119
121
  private readonly _model;
120
122
  private readonly _batchSize;
123
+ private readonly _maxTokensPerBatch;
121
124
  constructor(config?: OpenAIEmbeddingsConfig);
122
125
  get dimension(): number;
123
126
  get modelName(): string;
124
127
  get provider(): string;
128
+ /**
129
+ * Estimate token count for a text string.
130
+ * Using 3.5 chars/token - balanced for mixed content (prose + data).
131
+ * For pure prose: ~4 chars/token. For numbers/symbols: ~2 chars/token.
132
+ */
133
+ private estimateTokens;
134
+ /**
135
+ * Truncate text to fit within token limit.
136
+ * Preserves beginning of text as it typically contains the most important context.
137
+ * Uses conservative 3.0 chars/token for truncation to handle mixed content safely.
138
+ */
139
+ private truncateToTokenLimit;
140
+ /**
141
+ * Split texts into batches respecting both document count and token limits.
142
+ * This prevents OpenAI API errors when total tokens exceed 8,192.
143
+ * Automatically truncates individual texts that exceed the token limit.
144
+ */
145
+ private createTokenAwareBatches;
125
146
  embedDocuments(texts: string[]): Promise<number[][]>;
126
147
  embedQuery(text: string): Promise<number[]>;
127
148
  }
@@ -285,8 +306,10 @@ export interface MistralEmbeddingsConfig {
285
306
  apiKey?: string;
286
307
  /** Model to use. Default: 'mistral-embed' */
287
308
  model?: string;
288
- /** Number of texts to embed in a single API call. Default: 100 */
309
+ /** Max number of texts to embed in a single API call. Default: 100 */
289
310
  batchSize?: number;
311
+ /** Max tokens per batch (Mistral limit is ~16k). Default: 15000 (with safety margin) */
312
+ maxTokensPerBatch?: number;
290
313
  }
291
314
  /**
292
315
  * Mistral AI embedding provider.
@@ -304,10 +327,20 @@ export declare class MistralEmbeddings implements EmbeddingProvider {
304
327
  private readonly _apiKey;
305
328
  private readonly _model;
306
329
  private readonly _batchSize;
330
+ private readonly _maxTokensPerBatch;
307
331
  constructor(config?: MistralEmbeddingsConfig);
308
332
  get dimension(): number;
309
333
  get modelName(): string;
310
334
  get provider(): string;
335
+ /**
336
+ * Estimate token count for a text string.
337
+ * Using a conservative estimate of 3.5 chars/token.
338
+ */
339
+ private estimateTokens;
340
+ /**
341
+ * Split texts into batches respecting both document count and token limits.
342
+ */
343
+ private createTokenAwareBatches;
311
344
  embedDocuments(texts: string[]): Promise<number[][]>;
312
345
  embedQuery(text: string): Promise<number[]>;
313
346
  }
@@ -114,7 +114,9 @@ class OpenAIEmbeddings {
114
114
  throw new Error('OpenAI API key required. Pass apiKey or set OPENAI_API_KEY environment variable.');
115
115
  }
116
116
  this._model = config.model || 'text-embedding-3-small';
117
- this._batchSize = config.batchSize || 100;
117
+ this._batchSize = config.batchSize || 2048;
118
+ // OpenAI's limit is 8,192 tokens. Use 8,000 as default for max throughput.
119
+ this._maxTokensPerBatch = config.maxTokensPerBatch || 8000;
118
120
  }
119
121
  get dimension() {
120
122
  return exports.MODEL_DIMENSIONS[this._model] || 1536;
@@ -125,35 +127,104 @@ class OpenAIEmbeddings {
125
127
  get provider() {
126
128
  return 'openai';
127
129
  }
130
+ /**
131
+ * Estimate token count for a text string.
132
+ * Using 3.5 chars/token - balanced for mixed content (prose + data).
133
+ * For pure prose: ~4 chars/token. For numbers/symbols: ~2 chars/token.
134
+ */
135
+ estimateTokens(text) {
136
+ return Math.ceil(text.length / 3.5);
137
+ }
138
+ /**
139
+ * Truncate text to fit within token limit.
140
+ * Preserves beginning of text as it typically contains the most important context.
141
+ * Uses conservative 3.0 chars/token for truncation to handle mixed content safely.
142
+ */
143
+ truncateToTokenLimit(text) {
144
+ // Use conservative limit for truncation: 7800 tokens max for single text
145
+ const maxTokensForSingleText = Math.min(this._maxTokensPerBatch, 7800);
146
+ // Use 3.0 chars/token for safe truncation
147
+ const maxChars = Math.floor(maxTokensForSingleText * 3.0);
148
+ if (text.length <= maxChars) {
149
+ return text;
150
+ }
151
+ return text.slice(0, maxChars);
152
+ }
153
+ /**
154
+ * Split texts into batches respecting both document count and token limits.
155
+ * This prevents OpenAI API errors when total tokens exceed 8,192.
156
+ * Automatically truncates individual texts that exceed the token limit.
157
+ */
158
+ createTokenAwareBatches(texts) {
159
+ const batches = [];
160
+ let currentBatch = [];
161
+ let currentTokens = 0;
162
+ for (let text of texts) {
163
+ let textTokens = this.estimateTokens(text);
164
+ // Truncate if single text exceeds token limit
165
+ if (textTokens > this._maxTokensPerBatch) {
166
+ text = this.truncateToTokenLimit(text);
167
+ textTokens = this.estimateTokens(text);
168
+ }
169
+ const wouldExceedTokens = (currentTokens + textTokens) > this._maxTokensPerBatch;
170
+ const wouldExceedCount = currentBatch.length >= this._batchSize;
171
+ if (wouldExceedTokens || wouldExceedCount) {
172
+ if (currentBatch.length > 0) {
173
+ batches.push(currentBatch);
174
+ }
175
+ currentBatch = [text];
176
+ currentTokens = textTokens;
177
+ }
178
+ else {
179
+ currentBatch.push(text);
180
+ currentTokens += textTokens;
181
+ }
182
+ }
183
+ if (currentBatch.length > 0) {
184
+ batches.push(currentBatch);
185
+ }
186
+ return batches;
187
+ }
128
188
  async embedDocuments(texts) {
129
189
  if (texts.length === 0) {
130
190
  return [];
131
191
  }
132
- const allEmbeddings = [];
133
- // Process in batches
134
- for (let i = 0; i < texts.length; i += this._batchSize) {
135
- const batch = texts.slice(i, i + this._batchSize);
136
- const response = await fetch('https://api.openai.com/v1/embeddings', {
137
- method: 'POST',
138
- headers: {
139
- 'Authorization': `Bearer ${this._apiKey}`,
140
- 'Content-Type': 'application/json',
141
- },
142
- body: JSON.stringify({
143
- model: this._model,
144
- input: batch,
145
- }),
192
+ // Create token-aware batches to avoid exceeding OpenAI's 8,192 token limit
193
+ const batches = this.createTokenAwareBatches(texts);
194
+ // Process batches in parallel (OpenAI allows 3000 RPM, 1M TPM)
195
+ // Use high concurrency for maximum throughput
196
+ const CONCURRENCY = 100;
197
+ const results = [];
198
+ for (let i = 0; i < batches.length; i += CONCURRENCY) {
199
+ const batchSlice = batches.slice(i, i + CONCURRENCY);
200
+ const promises = batchSlice.map(async (batch, sliceIndex) => {
201
+ const batchIndex = i + sliceIndex;
202
+ const response = await fetch('https://api.openai.com/v1/embeddings', {
203
+ method: 'POST',
204
+ headers: {
205
+ 'Authorization': `Bearer ${this._apiKey}`,
206
+ 'Content-Type': 'application/json',
207
+ },
208
+ body: JSON.stringify({
209
+ model: this._model,
210
+ input: batch,
211
+ }),
212
+ });
213
+ if (!response.ok) {
214
+ const error = await response.text();
215
+ throw new Error(`OpenAI API error: ${response.status} ${error}`);
216
+ }
217
+ const data = await response.json();
218
+ // Sort by index to ensure correct order within batch
219
+ const sorted = data.data.sort((a, b) => a.index - b.index);
220
+ return { batchIndex, embeddings: sorted.map(e => e.embedding) };
146
221
  });
147
- if (!response.ok) {
148
- const error = await response.text();
149
- throw new Error(`OpenAI API error: ${response.status} ${error}`);
150
- }
151
- const data = await response.json();
152
- // Sort by index to ensure correct order
153
- const sorted = data.data.sort((a, b) => a.index - b.index);
154
- allEmbeddings.push(...sorted.map(e => e.embedding));
222
+ const batchResults = await Promise.all(promises);
223
+ results.push(...batchResults);
155
224
  }
156
- return allEmbeddings;
225
+ // Sort by batch index and flatten
226
+ results.sort((a, b) => a.batchIndex - b.batchIndex);
227
+ return results.flatMap(r => r.embeddings);
157
228
  }
158
229
  async embedQuery(text) {
159
230
  const response = await fetch('https://api.openai.com/v1/embeddings', {
@@ -529,6 +600,8 @@ class MistralEmbeddings {
529
600
  }
530
601
  this._model = config.model || 'mistral-embed';
531
602
  this._batchSize = config.batchSize || 100;
603
+ // Mistral's limit is ~16k tokens. Use 15000 as default with safety margin.
604
+ this._maxTokensPerBatch = config.maxTokensPerBatch || 15000;
532
605
  }
533
606
  get dimension() {
534
607
  return exports.MODEL_DIMENSIONS[this._model] || 1024;
@@ -539,14 +612,59 @@ class MistralEmbeddings {
539
612
  get provider() {
540
613
  return 'mistral';
541
614
  }
615
+ /**
616
+ * Estimate token count for a text string.
617
+ * Using a conservative estimate of 3.5 chars/token.
618
+ */
619
+ estimateTokens(text) {
620
+ return Math.ceil(text.length / 3.5);
621
+ }
622
+ /**
623
+ * Split texts into batches respecting both document count and token limits.
624
+ */
625
+ createTokenAwareBatches(texts) {
626
+ const batches = [];
627
+ let currentBatch = [];
628
+ let currentTokens = 0;
629
+ for (const text of texts) {
630
+ const textTokens = this.estimateTokens(text);
631
+ // If single text exceeds token limit, send it alone
632
+ if (textTokens > this._maxTokensPerBatch) {
633
+ if (currentBatch.length > 0) {
634
+ batches.push(currentBatch);
635
+ currentBatch = [];
636
+ currentTokens = 0;
637
+ }
638
+ batches.push([text]);
639
+ continue;
640
+ }
641
+ const wouldExceedTokens = (currentTokens + textTokens) > this._maxTokensPerBatch;
642
+ const wouldExceedCount = currentBatch.length >= this._batchSize;
643
+ if (wouldExceedTokens || wouldExceedCount) {
644
+ if (currentBatch.length > 0) {
645
+ batches.push(currentBatch);
646
+ }
647
+ currentBatch = [text];
648
+ currentTokens = textTokens;
649
+ }
650
+ else {
651
+ currentBatch.push(text);
652
+ currentTokens += textTokens;
653
+ }
654
+ }
655
+ if (currentBatch.length > 0) {
656
+ batches.push(currentBatch);
657
+ }
658
+ return batches;
659
+ }
542
660
  async embedDocuments(texts) {
543
661
  if (texts.length === 0) {
544
662
  return [];
545
663
  }
546
664
  const allEmbeddings = [];
547
- // Process in batches
548
- for (let i = 0; i < texts.length; i += this._batchSize) {
549
- const batch = texts.slice(i, i + this._batchSize);
665
+ // Create token-aware batches
666
+ const batches = this.createTokenAwareBatches(texts);
667
+ for (const batch of batches) {
550
668
  const response = await fetch('https://api.mistral.ai/v1/embeddings', {
551
669
  method: 'POST',
552
670
  headers: {
package/dist/index.js CHANGED
@@ -1151,19 +1151,23 @@ class MemvidImpl {
1151
1151
  embeddingIdentity: req.embeddingIdentity,
1152
1152
  }));
1153
1153
  // If an external embedder is provided, embeddings are already attached and
1154
- // native auto-embedding should not run.
1154
+ // native auto-embedding should not run. Explicitly disable to prevent ONNX load.
1155
1155
  const nativeOptions = options
1156
1156
  ? embedder
1157
1157
  ? {
1158
1158
  compressionLevel: options.compressionLevel,
1159
+ enableEnrichment: options.enableEnrichment,
1160
+ enableEmbedding: false, // Embeddings already attached, skip native embedding
1159
1161
  }
1160
1162
  : {
1161
1163
  compressionLevel: options.compressionLevel,
1162
1164
  enableEmbedding: options.enableEmbedding,
1163
1165
  embeddingModel: options.embeddingModel,
1166
+ enableEnrichment: options.enableEnrichment,
1164
1167
  }
1165
1168
  : undefined;
1166
- return this.core.putMany(nativeRequests, nativeOptions);
1169
+ const result = await this.core.putMany(nativeRequests, nativeOptions);
1170
+ return result;
1167
1171
  });
1168
1172
  }
1169
1173
  /**
@@ -1875,7 +1879,7 @@ class MemvidImpl {
1875
1879
  * @returns Result with framesAdded count
1876
1880
  */
1877
1881
  async putFile(filePath, options) {
1878
- const { parse, getDocumentType } = await Promise.resolve().then(() => __importStar(require("./documents/index.js")));
1882
+ const { parse, getDocumentType } = await Promise.resolve().then(() => __importStar(require("./documents/index")));
1879
1883
  const { basename } = await Promise.resolve().then(() => __importStar(require("path")));
1880
1884
  const filename = basename(filePath);
1881
1885
  const docType = getDocumentType(filePath);
@@ -1911,14 +1915,35 @@ class MemvidImpl {
1911
1915
  if (result === null) {
1912
1916
  throw new Error(`Failed to parse document: ${filename}`);
1913
1917
  }
1918
+ // Chunk text into smaller pieces (matches CLI behavior for better retrieval)
1919
+ const chunkSize = options?.chunkSize ?? 1000;
1920
+ const chunkText = (text, size) => {
1921
+ if (text.length <= size)
1922
+ return [text];
1923
+ const chunks = [];
1924
+ const lines = text.split('\n');
1925
+ let current = '';
1926
+ for (const line of lines) {
1927
+ if (current.length + line.length + 1 > size && current.length > 0) {
1928
+ chunks.push(current.trim());
1929
+ current = line;
1930
+ }
1931
+ else {
1932
+ current = current ? current + '\n' + line : line;
1933
+ }
1934
+ }
1935
+ if (current.trim())
1936
+ chunks.push(current.trim());
1937
+ return chunks;
1938
+ };
1914
1939
  // Build items for batch processing with putMany (6x faster than individual put())
1915
1940
  const items = [];
1916
1941
  for (const item of result.items) {
1917
- let title;
1918
- let metadata;
1942
+ let baseTitle;
1943
+ let itemMetadata;
1919
1944
  if (result.type === "pdf") {
1920
- title = `${result.filename} [Page ${item.number}]`;
1921
- metadata = {
1945
+ baseTitle = `${result.filename} [Page ${item.number}]`;
1946
+ itemMetadata = {
1922
1947
  ...baseMetadata,
1923
1948
  doc_name: result.filename,
1924
1949
  doc_type: result.type,
@@ -1927,8 +1952,8 @@ class MemvidImpl {
1927
1952
  };
1928
1953
  }
1929
1954
  else if (result.type === "xlsx") {
1930
- title = `${result.filename} [Sheet: ${item.name}]`;
1931
- metadata = {
1955
+ baseTitle = `${result.filename} [Sheet: ${item.name}]`;
1956
+ itemMetadata = {
1932
1957
  ...baseMetadata,
1933
1958
  doc_name: result.filename,
1934
1959
  doc_type: result.type,
@@ -1938,8 +1963,8 @@ class MemvidImpl {
1938
1963
  };
1939
1964
  }
1940
1965
  else if (result.type === "pptx") {
1941
- title = `${result.filename} [Slide ${item.number}]`;
1942
- metadata = {
1966
+ baseTitle = `${result.filename} [Slide ${item.number}]`;
1967
+ itemMetadata = {
1943
1968
  ...baseMetadata,
1944
1969
  doc_name: result.filename,
1945
1970
  doc_type: result.type,
@@ -1950,19 +1975,28 @@ class MemvidImpl {
1950
1975
  }
1951
1976
  else {
1952
1977
  // docx
1953
- title = result.filename;
1954
- metadata = {
1978
+ baseTitle = result.filename;
1979
+ itemMetadata = {
1955
1980
  ...baseMetadata,
1956
1981
  doc_name: result.filename,
1957
1982
  doc_type: result.type,
1958
1983
  };
1959
1984
  }
1960
- items.push({
1961
- title,
1962
- labels: label ? [label] : undefined,
1963
- text: item.text,
1964
- metadata,
1965
- });
1985
+ // Chunk content for better retrieval granularity
1986
+ const chunks = chunkText(item.text, chunkSize);
1987
+ for (let i = 0; i < chunks.length; i++) {
1988
+ const title = chunks.length > 1 ? `${baseTitle} [Chunk ${i + 1}/${chunks.length}]` : baseTitle;
1989
+ items.push({
1990
+ title,
1991
+ labels: label ? [label] : undefined,
1992
+ text: chunks[i],
1993
+ metadata: {
1994
+ ...itemMetadata,
1995
+ chunk_index: i,
1996
+ total_chunks: chunks.length,
1997
+ },
1998
+ });
1999
+ }
1966
2000
  }
1967
2001
  // Use putMany for fast batch ingestion
1968
2002
  // Note: Call rebuildTimeIndex() after seal() if using ask() with temporal queries
@@ -1970,6 +2004,7 @@ class MemvidImpl {
1970
2004
  embedder,
1971
2005
  enableEmbedding: embedder ? undefined : options?.enableEmbedding,
1972
2006
  embeddingModel: embedder ? undefined : options?.embeddingModel,
2007
+ enableEnrichment: options?.enableEnrichment,
1973
2008
  });
1974
2009
  (0, analytics_1.trackCommand)(this.filename, "putFile", true);
1975
2010
  return { framesAdded: items.length, type: result.type, filename: result.filename };
package/dist/types.d.ts CHANGED
@@ -158,6 +158,8 @@ export interface PutManyOptions {
158
158
  embeddingModel?: string;
159
159
  /** Optional external embedder to generate embeddings for requests that omit `embedding`. */
160
160
  embedder?: EmbeddingProvider;
161
+ /** Enable rules-based enrichment (default: true). Set to false for faster ingestion. */
162
+ enableEnrichment?: boolean;
161
163
  }
162
164
  /** Options for correct() - stores a correction with retrieval priority boost */
163
165
  export interface CorrectOptions {
@@ -195,6 +197,7 @@ export interface NativePutManyOptions {
195
197
  compressionLevel?: number;
196
198
  enableEmbedding?: boolean;
197
199
  embeddingModel?: string;
200
+ enableEnrichment?: boolean;
198
201
  }
199
202
  export interface NativePutArgs {
200
203
  title?: string;
@@ -600,6 +603,49 @@ export interface Memvid {
600
603
  * Returns an array of frame IDs for the ingested documents.
601
604
  */
602
605
  putMany(requests: PutManyInput[], options?: PutManyOptions): Promise<string[]>;
606
+ /**
607
+ * Ingest a document file (PDF, XLSX, PPTX, DOCX) with automatic parsing.
608
+ * Each page/sheet/slide becomes a separate frame with proper metadata.
609
+ */
610
+ putFile(filePath: string, options?: {
611
+ label?: string;
612
+ metadata?: Record<string, unknown>;
613
+ enableEmbedding?: boolean;
614
+ embeddingModel?: string;
615
+ embedder?: EmbeddingProvider;
616
+ vectorCompression?: boolean;
617
+ autoTag?: boolean;
618
+ extractDates?: boolean;
619
+ enableEnrichment?: boolean;
620
+ /** Chunk size in characters (default: 1000, matches CLI behavior) */
621
+ chunkSize?: number;
622
+ }): Promise<{
623
+ framesAdded: number;
624
+ type: string;
625
+ filename: string;
626
+ }>;
627
+ /**
628
+ * Ingest multiple document files from a directory.
629
+ */
630
+ putFiles(dirPath: string, options?: {
631
+ label?: string;
632
+ extensions?: string[];
633
+ metadata?: Record<string, unknown>;
634
+ enableEmbedding?: boolean;
635
+ embeddingModel?: string;
636
+ embedder?: EmbeddingProvider;
637
+ vectorCompression?: boolean;
638
+ autoTag?: boolean;
639
+ extractDates?: boolean;
640
+ }): Promise<{
641
+ filesProcessed: number;
642
+ framesAdded: number;
643
+ files: Array<{
644
+ filename: string;
645
+ framesAdded: number;
646
+ type: string;
647
+ }>;
648
+ }>;
603
649
  /** Search for documents matching a query. */
604
650
  find(query: string, opts?: FindInput): Promise<FindResult>;
605
651
  /** Vector similarity search using a pre-computed query embedding (offline-safe). */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@memvid/sdk",
3
- "version": "2.0.153",
3
+ "version": "2.0.154",
4
4
  "description": "Single-file AI memory system for Node.js. Store, search, and query documents with built-in RAG.",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
@@ -41,11 +41,11 @@
41
41
  "node": ">=18"
42
42
  },
43
43
  "optionalDependencies": {
44
- "@memvid/sdk-darwin-arm64": "2.0.153",
45
- "@memvid/sdk-darwin-x64": "2.0.153",
46
- "@memvid/sdk-linux-x64-gnu": "2.0.153",
47
- "@memvid/sdk-linux-arm64-gnu": "2.0.153",
48
- "@memvid/sdk-win32-x64-msvc": "2.0.153"
44
+ "@memvid/sdk-darwin-arm64": "2.0.154",
45
+ "@memvid/sdk-darwin-x64": "2.0.154",
46
+ "@memvid/sdk-linux-x64-gnu": "2.0.154",
47
+ "@memvid/sdk-linux-arm64-gnu": "2.0.154",
48
+ "@memvid/sdk-win32-x64-msvc": "2.0.154"
49
49
  },
50
50
  "peerDependencies": {
51
51
  "@langchain/core": ">=0.3.0",
@@ -77,9 +77,6 @@
77
77
  "typescript": "^5.4.0"
78
78
  },
79
79
  "dependencies": {
80
- "unpdf": "^1.4.0",
81
- "exceljs": "^4.4.0",
82
- "officeparser": "^6.0.2",
83
80
  "@ai-sdk/openai": "^1.0.0",
84
81
  "@google/generative-ai": "^0.24.0",
85
82
  "@langchain/langgraph": ">=0.2.0",
@@ -87,7 +84,11 @@
87
84
  "@llamaindex/core": ">=0.4.0",
88
85
  "@llamaindex/openai": ">=0.2.0",
89
86
  "ai": ">=4.0.0",
87
+ "exceljs": "^4.4.0",
90
88
  "langchain": ">=0.3.0",
91
- "llamaindex": ">=0.12.0"
89
+ "llamaindex": ">=0.12.0",
90
+ "officeparser": "^6.0.2",
91
+ "unpdf": "^1.4.0",
92
+ "xlsx": "^0.18.5"
92
93
  }
93
94
  }