@memvid/sdk 2.0.153 → 2.0.155

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -98,8 +98,14 @@ export interface OpenAIEmbeddingsConfig {
98
98
  apiKey?: string;
99
99
  /** Model to use. Default: 'text-embedding-3-small' */
100
100
  model?: string;
101
- /** Number of texts to embed in a single API call. Default: 100 */
101
+ /** Max number of texts to embed in a single API call. Default: 2048 (OpenAI hard limit) */
102
102
  batchSize?: number;
103
+ /** Max tokens per individual input text (OpenAI limit is 8191). Default: 8000 (with safety margin).
104
+ * Note: this is a per-INPUT limit, not a per-batch total. Each input in a batch
105
+ * must individually be under this limit, but the batch total can be much higher. */
106
+ maxTokensPerInput?: number;
107
+ /** @deprecated Use maxTokensPerInput instead */
108
+ maxTokensPerBatch?: number;
103
109
  }
104
110
  /**
105
111
  * OpenAI embedding provider.
@@ -118,10 +124,31 @@ export declare class OpenAIEmbeddings implements EmbeddingProvider {
118
124
  private readonly _apiKey;
119
125
  private readonly _model;
120
126
  private readonly _batchSize;
127
+ private readonly _maxTokensPerInput;
121
128
  constructor(config?: OpenAIEmbeddingsConfig);
122
129
  get dimension(): number;
123
130
  get modelName(): string;
124
131
  get provider(): string;
132
+ /**
133
+ * Estimate token count for a text string.
134
+ * Using 3.5 chars/token - balanced for mixed content (prose + data).
135
+ * For pure prose: ~4 chars/token. For numbers/symbols: ~2 chars/token.
136
+ */
137
+ private estimateTokens;
138
+ /**
139
+ * Truncate a single input text to fit within the per-input token limit.
140
+ * Preserves beginning of text as it typically contains the most important context.
141
+ * Uses conservative 2.0 chars/token for truncation to handle data-heavy content
142
+ * (spreadsheets, numbers, cell refs) where tokenization is denser than prose.
143
+ */
144
+ private truncateToTokenLimit;
145
+ /**
146
+ * Split texts into batches respecting:
147
+ * 1. Per-input token limit (8,192 for text-embedding-3-small) — truncate oversized inputs
148
+ * 2. Per-request token limit (300K for most tiers) — split into multiple requests
149
+ * 3. Per-request input count (2,048 max inputs per request)
150
+ */
151
+ private createTokenAwareBatches;
125
152
  embedDocuments(texts: string[]): Promise<number[][]>;
126
153
  embedQuery(text: string): Promise<number[]>;
127
154
  }
@@ -285,8 +312,10 @@ export interface MistralEmbeddingsConfig {
285
312
  apiKey?: string;
286
313
  /** Model to use. Default: 'mistral-embed' */
287
314
  model?: string;
288
- /** Number of texts to embed in a single API call. Default: 100 */
315
+ /** Max number of texts to embed in a single API call. Default: 100 */
289
316
  batchSize?: number;
317
+ /** Max tokens per batch (Mistral limit is ~16k). Default: 15000 (with safety margin) */
318
+ maxTokensPerBatch?: number;
290
319
  }
291
320
  /**
292
321
  * Mistral AI embedding provider.
@@ -304,10 +333,20 @@ export declare class MistralEmbeddings implements EmbeddingProvider {
304
333
  private readonly _apiKey;
305
334
  private readonly _model;
306
335
  private readonly _batchSize;
336
+ private readonly _maxTokensPerBatch;
307
337
  constructor(config?: MistralEmbeddingsConfig);
308
338
  get dimension(): number;
309
339
  get modelName(): string;
310
340
  get provider(): string;
341
+ /**
342
+ * Estimate token count for a text string.
343
+ * Using a conservative estimate of 3.5 chars/token.
344
+ */
345
+ private estimateTokens;
346
+ /**
347
+ * Split texts into batches respecting both document count and token limits.
348
+ */
349
+ private createTokenAwareBatches;
311
350
  embedDocuments(texts: string[]): Promise<number[][]>;
312
351
  embedQuery(text: string): Promise<number[]>;
313
352
  }
@@ -114,7 +114,10 @@ class OpenAIEmbeddings {
114
114
  throw new Error('OpenAI API key required. Pass apiKey or set OPENAI_API_KEY environment variable.');
115
115
  }
116
116
  this._model = config.model || 'text-embedding-3-small';
117
- this._batchSize = config.batchSize || 100;
117
+ this._batchSize = config.batchSize || 2048;
118
+ // OpenAI's limit is 8,192 tokens PER INPUT (not per batch).
119
+ // You can send up to 2048 inputs per request regardless of total tokens.
120
+ this._maxTokensPerInput = config.maxTokensPerInput || config.maxTokensPerBatch || 8000;
118
121
  }
119
122
  get dimension() {
120
123
  return exports.MODEL_DIMENSIONS[this._model] || 1536;
@@ -125,35 +128,107 @@ class OpenAIEmbeddings {
125
128
  get provider() {
126
129
  return 'openai';
127
130
  }
131
+ /**
132
+ * Estimate token count for a text string.
133
+ * Using 3.5 chars/token - balanced for mixed content (prose + data).
134
+ * For pure prose: ~4 chars/token. For numbers/symbols: ~2 chars/token.
135
+ */
136
+ estimateTokens(text) {
137
+ return Math.ceil(text.length / 3.5);
138
+ }
139
+ /**
140
+ * Truncate a single input text to fit within the per-input token limit.
141
+ * Preserves beginning of text as it typically contains the most important context.
142
+ * Uses conservative 2.0 chars/token for truncation to handle data-heavy content
143
+ * (spreadsheets, numbers, cell refs) where tokenization is denser than prose.
144
+ */
145
+ truncateToTokenLimit(text) {
146
+ const maxTokens = Math.min(this._maxTokensPerInput, 7800);
147
+ // Use 2.0 chars/token for safe truncation — handles spreadsheet data,
148
+ // numbers, and special characters which tokenize at ~2.2 chars/token
149
+ const maxChars = Math.floor(maxTokens * 2.0);
150
+ if (text.length <= maxChars) {
151
+ return text;
152
+ }
153
+ return text.slice(0, maxChars);
154
+ }
155
+ /**
156
+ * Split texts into batches respecting:
157
+ * 1. Per-input token limit (8,192 for text-embedding-3-small) — truncate oversized inputs
158
+ * 2. Per-request token limit (300K for most tiers) — split into multiple requests
159
+ * 3. Per-request input count (2,048 max inputs per request)
160
+ */
161
+ createTokenAwareBatches(texts) {
162
+ // OpenAI enforces a per-request total token limit (typically 300K).
163
+ // Use 250K as a safe default to account for token estimation inaccuracy.
164
+ const MAX_TOKENS_PER_REQUEST = 250000;
165
+ const batches = [];
166
+ let currentBatch = [];
167
+ let currentBatchTokens = 0;
168
+ for (let text of texts) {
169
+ // Truncate individual texts that exceed the per-input token limit
170
+ let textTokens = this.estimateTokens(text);
171
+ if (textTokens > this._maxTokensPerInput) {
172
+ text = this.truncateToTokenLimit(text);
173
+ textTokens = this.estimateTokens(text);
174
+ }
175
+ const wouldExceedRequestTokens = (currentBatchTokens + textTokens) > MAX_TOKENS_PER_REQUEST;
176
+ const wouldExceedCount = currentBatch.length >= this._batchSize;
177
+ if ((wouldExceedRequestTokens || wouldExceedCount) && currentBatch.length > 0) {
178
+ batches.push(currentBatch);
179
+ currentBatch = [text];
180
+ currentBatchTokens = textTokens;
181
+ }
182
+ else {
183
+ currentBatch.push(text);
184
+ currentBatchTokens += textTokens;
185
+ }
186
+ }
187
+ if (currentBatch.length > 0) {
188
+ batches.push(currentBatch);
189
+ }
190
+ return batches;
191
+ }
128
192
  async embedDocuments(texts) {
129
193
  if (texts.length === 0) {
130
194
  return [];
131
195
  }
132
- const allEmbeddings = [];
133
- // Process in batches
134
- for (let i = 0; i < texts.length; i += this._batchSize) {
135
- const batch = texts.slice(i, i + this._batchSize);
136
- const response = await fetch('https://api.openai.com/v1/embeddings', {
137
- method: 'POST',
138
- headers: {
139
- 'Authorization': `Bearer ${this._apiKey}`,
140
- 'Content-Type': 'application/json',
141
- },
142
- body: JSON.stringify({
143
- model: this._model,
144
- input: batch,
145
- }),
196
+ // Create token-aware batches to avoid exceeding OpenAI's 8,192 token limit
197
+ const batches = this.createTokenAwareBatches(texts);
198
+ // Process batches in parallel (OpenAI allows 3000 RPM, 1M TPM)
199
+ // Use high concurrency for maximum throughput
200
+ const CONCURRENCY = 100;
201
+ const results = [];
202
+ for (let i = 0; i < batches.length; i += CONCURRENCY) {
203
+ const batchSlice = batches.slice(i, i + CONCURRENCY);
204
+ const promises = batchSlice.map(async (batch, sliceIndex) => {
205
+ const batchIndex = i + sliceIndex;
206
+ const response = await fetch('https://api.openai.com/v1/embeddings', {
207
+ method: 'POST',
208
+ headers: {
209
+ 'Authorization': `Bearer ${this._apiKey}`,
210
+ 'Content-Type': 'application/json',
211
+ },
212
+ body: JSON.stringify({
213
+ model: this._model,
214
+ input: batch,
215
+ }),
216
+ });
217
+ if (!response.ok) {
218
+ const error = await response.text();
219
+ throw new Error(`OpenAI API error: ${response.status} ${error}`);
220
+ }
221
+ const data = await response.json();
222
+ // Sort by index to ensure correct order within batch
223
+ const sorted = data.data.sort((a, b) => a.index - b.index);
224
+ return { batchIndex, embeddings: sorted.map(e => e.embedding) };
146
225
  });
147
- if (!response.ok) {
148
- const error = await response.text();
149
- throw new Error(`OpenAI API error: ${response.status} ${error}`);
150
- }
151
- const data = await response.json();
152
- // Sort by index to ensure correct order
153
- const sorted = data.data.sort((a, b) => a.index - b.index);
154
- allEmbeddings.push(...sorted.map(e => e.embedding));
226
+ const batchResults = await Promise.all(promises);
227
+ results.push(...batchResults);
155
228
  }
156
- return allEmbeddings;
229
+ // Sort by batch index and flatten
230
+ results.sort((a, b) => a.batchIndex - b.batchIndex);
231
+ return results.flatMap(r => r.embeddings);
157
232
  }
158
233
  async embedQuery(text) {
159
234
  const response = await fetch('https://api.openai.com/v1/embeddings', {
@@ -529,6 +604,8 @@ class MistralEmbeddings {
529
604
  }
530
605
  this._model = config.model || 'mistral-embed';
531
606
  this._batchSize = config.batchSize || 100;
607
+ // Mistral's limit is ~16k tokens. Use 15000 as default with safety margin.
608
+ this._maxTokensPerBatch = config.maxTokensPerBatch || 15000;
532
609
  }
533
610
  get dimension() {
534
611
  return exports.MODEL_DIMENSIONS[this._model] || 1024;
@@ -539,14 +616,59 @@ class MistralEmbeddings {
539
616
  get provider() {
540
617
  return 'mistral';
541
618
  }
619
+ /**
620
+ * Estimate token count for a text string.
621
+ * Using a conservative estimate of 3.5 chars/token.
622
+ */
623
+ estimateTokens(text) {
624
+ return Math.ceil(text.length / 3.5);
625
+ }
626
+ /**
627
+ * Split texts into batches respecting both document count and token limits.
628
+ */
629
+ createTokenAwareBatches(texts) {
630
+ const batches = [];
631
+ let currentBatch = [];
632
+ let currentTokens = 0;
633
+ for (const text of texts) {
634
+ const textTokens = this.estimateTokens(text);
635
+ // If single text exceeds token limit, send it alone
636
+ if (textTokens > this._maxTokensPerBatch) {
637
+ if (currentBatch.length > 0) {
638
+ batches.push(currentBatch);
639
+ currentBatch = [];
640
+ currentTokens = 0;
641
+ }
642
+ batches.push([text]);
643
+ continue;
644
+ }
645
+ const wouldExceedTokens = (currentTokens + textTokens) > this._maxTokensPerBatch;
646
+ const wouldExceedCount = currentBatch.length >= this._batchSize;
647
+ if (wouldExceedTokens || wouldExceedCount) {
648
+ if (currentBatch.length > 0) {
649
+ batches.push(currentBatch);
650
+ }
651
+ currentBatch = [text];
652
+ currentTokens = textTokens;
653
+ }
654
+ else {
655
+ currentBatch.push(text);
656
+ currentTokens += textTokens;
657
+ }
658
+ }
659
+ if (currentBatch.length > 0) {
660
+ batches.push(currentBatch);
661
+ }
662
+ return batches;
663
+ }
542
664
  async embedDocuments(texts) {
543
665
  if (texts.length === 0) {
544
666
  return [];
545
667
  }
546
668
  const allEmbeddings = [];
547
- // Process in batches
548
- for (let i = 0; i < texts.length; i += this._batchSize) {
549
- const batch = texts.slice(i, i + this._batchSize);
669
+ // Create token-aware batches
670
+ const batches = this.createTokenAwareBatches(texts);
671
+ for (const batch of batches) {
550
672
  const response = await fetch('https://api.mistral.ai/v1/embeddings', {
551
673
  method: 'POST',
552
674
  headers: {
package/dist/index.js CHANGED
@@ -1151,19 +1151,23 @@ class MemvidImpl {
1151
1151
  embeddingIdentity: req.embeddingIdentity,
1152
1152
  }));
1153
1153
  // If an external embedder is provided, embeddings are already attached and
1154
- // native auto-embedding should not run.
1154
+ // native auto-embedding should not run. Explicitly disable to prevent ONNX load.
1155
1155
  const nativeOptions = options
1156
1156
  ? embedder
1157
1157
  ? {
1158
1158
  compressionLevel: options.compressionLevel,
1159
+ enableEnrichment: options.enableEnrichment,
1160
+ enableEmbedding: false, // Embeddings already attached, skip native embedding
1159
1161
  }
1160
1162
  : {
1161
1163
  compressionLevel: options.compressionLevel,
1162
1164
  enableEmbedding: options.enableEmbedding,
1163
1165
  embeddingModel: options.embeddingModel,
1166
+ enableEnrichment: options.enableEnrichment,
1164
1167
  }
1165
1168
  : undefined;
1166
- return this.core.putMany(nativeRequests, nativeOptions);
1169
+ const result = await this.core.putMany(nativeRequests, nativeOptions);
1170
+ return result;
1167
1171
  });
1168
1172
  }
1169
1173
  /**
@@ -1875,7 +1879,7 @@ class MemvidImpl {
1875
1879
  * @returns Result with framesAdded count
1876
1880
  */
1877
1881
  async putFile(filePath, options) {
1878
- const { parse, getDocumentType } = await Promise.resolve().then(() => __importStar(require("./documents/index.js")));
1882
+ const { parse, getDocumentType } = await Promise.resolve().then(() => __importStar(require("./documents/index")));
1879
1883
  const { basename } = await Promise.resolve().then(() => __importStar(require("path")));
1880
1884
  const filename = basename(filePath);
1881
1885
  const docType = getDocumentType(filePath);
@@ -1911,14 +1915,56 @@ class MemvidImpl {
1911
1915
  if (result === null) {
1912
1916
  throw new Error(`Failed to parse document: ${filename}`);
1913
1917
  }
1918
+ // Chunk text into smaller pieces (matches CLI behavior for better retrieval)
1919
+ const chunkSize = options?.chunkSize ?? 1000;
1920
+ const chunkText = (text, size) => {
1921
+ if (text.length <= size)
1922
+ return [text];
1923
+ const chunks = [];
1924
+ const lines = text.split('\n');
1925
+ let current = '';
1926
+ for (const line of lines) {
1927
+ // Handle lines longer than chunkSize (e.g. wide spreadsheet rows)
1928
+ if (line.length > size) {
1929
+ if (current.trim()) {
1930
+ chunks.push(current.trim());
1931
+ current = '';
1932
+ }
1933
+ // Split long line at delimiter boundaries (" | " for XLSX rows)
1934
+ let remaining = line;
1935
+ while (remaining.length > size) {
1936
+ let splitAt = remaining.lastIndexOf(' | ', size);
1937
+ if (splitAt <= 0)
1938
+ splitAt = remaining.lastIndexOf(' ', size);
1939
+ if (splitAt <= 0)
1940
+ splitAt = size;
1941
+ chunks.push(remaining.slice(0, splitAt).trim());
1942
+ remaining = remaining.slice(splitAt).replace(/^\s*\|\s*/, '');
1943
+ }
1944
+ if (remaining.trim())
1945
+ current = remaining;
1946
+ continue;
1947
+ }
1948
+ if (current.length + line.length + 1 > size && current.length > 0) {
1949
+ chunks.push(current.trim());
1950
+ current = line;
1951
+ }
1952
+ else {
1953
+ current = current ? current + '\n' + line : line;
1954
+ }
1955
+ }
1956
+ if (current.trim())
1957
+ chunks.push(current.trim());
1958
+ return chunks;
1959
+ };
1914
1960
  // Build items for batch processing with putMany (6x faster than individual put())
1915
1961
  const items = [];
1916
1962
  for (const item of result.items) {
1917
- let title;
1918
- let metadata;
1963
+ let baseTitle;
1964
+ let itemMetadata;
1919
1965
  if (result.type === "pdf") {
1920
- title = `${result.filename} [Page ${item.number}]`;
1921
- metadata = {
1966
+ baseTitle = `${result.filename} [Page ${item.number}]`;
1967
+ itemMetadata = {
1922
1968
  ...baseMetadata,
1923
1969
  doc_name: result.filename,
1924
1970
  doc_type: result.type,
@@ -1927,8 +1973,8 @@ class MemvidImpl {
1927
1973
  };
1928
1974
  }
1929
1975
  else if (result.type === "xlsx") {
1930
- title = `${result.filename} [Sheet: ${item.name}]`;
1931
- metadata = {
1976
+ baseTitle = `${result.filename} [Sheet: ${item.name}]`;
1977
+ itemMetadata = {
1932
1978
  ...baseMetadata,
1933
1979
  doc_name: result.filename,
1934
1980
  doc_type: result.type,
@@ -1938,8 +1984,8 @@ class MemvidImpl {
1938
1984
  };
1939
1985
  }
1940
1986
  else if (result.type === "pptx") {
1941
- title = `${result.filename} [Slide ${item.number}]`;
1942
- metadata = {
1987
+ baseTitle = `${result.filename} [Slide ${item.number}]`;
1988
+ itemMetadata = {
1943
1989
  ...baseMetadata,
1944
1990
  doc_name: result.filename,
1945
1991
  doc_type: result.type,
@@ -1950,19 +1996,28 @@ class MemvidImpl {
1950
1996
  }
1951
1997
  else {
1952
1998
  // docx
1953
- title = result.filename;
1954
- metadata = {
1999
+ baseTitle = result.filename;
2000
+ itemMetadata = {
1955
2001
  ...baseMetadata,
1956
2002
  doc_name: result.filename,
1957
2003
  doc_type: result.type,
1958
2004
  };
1959
2005
  }
1960
- items.push({
1961
- title,
1962
- labels: label ? [label] : undefined,
1963
- text: item.text,
1964
- metadata,
1965
- });
2006
+ // Chunk content for better retrieval granularity
2007
+ const chunks = chunkText(item.text, chunkSize);
2008
+ for (let i = 0; i < chunks.length; i++) {
2009
+ const title = chunks.length > 1 ? `${baseTitle} [Chunk ${i + 1}/${chunks.length}]` : baseTitle;
2010
+ items.push({
2011
+ title,
2012
+ labels: label ? [label] : undefined,
2013
+ text: chunks[i],
2014
+ metadata: {
2015
+ ...itemMetadata,
2016
+ chunk_index: i,
2017
+ total_chunks: chunks.length,
2018
+ },
2019
+ });
2020
+ }
1966
2021
  }
1967
2022
  // Use putMany for fast batch ingestion
1968
2023
  // Note: Call rebuildTimeIndex() after seal() if using ask() with temporal queries
@@ -1970,6 +2025,7 @@ class MemvidImpl {
1970
2025
  embedder,
1971
2026
  enableEmbedding: embedder ? undefined : options?.enableEmbedding,
1972
2027
  embeddingModel: embedder ? undefined : options?.embeddingModel,
2028
+ enableEnrichment: options?.enableEnrichment,
1973
2029
  });
1974
2030
  (0, analytics_1.trackCommand)(this.filename, "putFile", true);
1975
2031
  return { framesAdded: items.length, type: result.type, filename: result.filename };
package/dist/types.d.ts CHANGED
@@ -158,6 +158,8 @@ export interface PutManyOptions {
158
158
  embeddingModel?: string;
159
159
  /** Optional external embedder to generate embeddings for requests that omit `embedding`. */
160
160
  embedder?: EmbeddingProvider;
161
+ /** Enable rules-based enrichment (default: true). Set to false for faster ingestion. */
162
+ enableEnrichment?: boolean;
161
163
  }
162
164
  /** Options for correct() - stores a correction with retrieval priority boost */
163
165
  export interface CorrectOptions {
@@ -195,6 +197,7 @@ export interface NativePutManyOptions {
195
197
  compressionLevel?: number;
196
198
  enableEmbedding?: boolean;
197
199
  embeddingModel?: string;
200
+ enableEnrichment?: boolean;
198
201
  }
199
202
  export interface NativePutArgs {
200
203
  title?: string;
@@ -600,6 +603,49 @@ export interface Memvid {
600
603
  * Returns an array of frame IDs for the ingested documents.
601
604
  */
602
605
  putMany(requests: PutManyInput[], options?: PutManyOptions): Promise<string[]>;
606
+ /**
607
+ * Ingest a document file (PDF, XLSX, PPTX, DOCX) with automatic parsing.
608
+ * Each page/sheet/slide becomes a separate frame with proper metadata.
609
+ */
610
+ putFile(filePath: string, options?: {
611
+ label?: string;
612
+ metadata?: Record<string, unknown>;
613
+ enableEmbedding?: boolean;
614
+ embeddingModel?: string;
615
+ embedder?: EmbeddingProvider;
616
+ vectorCompression?: boolean;
617
+ autoTag?: boolean;
618
+ extractDates?: boolean;
619
+ enableEnrichment?: boolean;
620
+ /** Chunk size in characters (default: 1000, matches CLI behavior) */
621
+ chunkSize?: number;
622
+ }): Promise<{
623
+ framesAdded: number;
624
+ type: string;
625
+ filename: string;
626
+ }>;
627
+ /**
628
+ * Ingest multiple document files from a directory.
629
+ */
630
+ putFiles(dirPath: string, options?: {
631
+ label?: string;
632
+ extensions?: string[];
633
+ metadata?: Record<string, unknown>;
634
+ enableEmbedding?: boolean;
635
+ embeddingModel?: string;
636
+ embedder?: EmbeddingProvider;
637
+ vectorCompression?: boolean;
638
+ autoTag?: boolean;
639
+ extractDates?: boolean;
640
+ }): Promise<{
641
+ filesProcessed: number;
642
+ framesAdded: number;
643
+ files: Array<{
644
+ filename: string;
645
+ framesAdded: number;
646
+ type: string;
647
+ }>;
648
+ }>;
603
649
  /** Search for documents matching a query. */
604
650
  find(query: string, opts?: FindInput): Promise<FindResult>;
605
651
  /** Vector similarity search using a pre-computed query embedding (offline-safe). */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@memvid/sdk",
3
- "version": "2.0.153",
3
+ "version": "2.0.155",
4
4
  "description": "Single-file AI memory system for Node.js. Store, search, and query documents with built-in RAG.",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
@@ -41,11 +41,11 @@
41
41
  "node": ">=18"
42
42
  },
43
43
  "optionalDependencies": {
44
- "@memvid/sdk-darwin-arm64": "2.0.153",
45
- "@memvid/sdk-darwin-x64": "2.0.153",
46
- "@memvid/sdk-linux-x64-gnu": "2.0.153",
47
- "@memvid/sdk-linux-arm64-gnu": "2.0.153",
48
- "@memvid/sdk-win32-x64-msvc": "2.0.153"
44
+ "@memvid/sdk-darwin-arm64": "2.0.155",
45
+ "@memvid/sdk-darwin-x64": "2.0.155",
46
+ "@memvid/sdk-linux-x64-gnu": "2.0.155",
47
+ "@memvid/sdk-linux-arm64-gnu": "2.0.155",
48
+ "@memvid/sdk-win32-x64-msvc": "2.0.155"
49
49
  },
50
50
  "peerDependencies": {
51
51
  "@langchain/core": ">=0.3.0",
@@ -77,9 +77,6 @@
77
77
  "typescript": "^5.4.0"
78
78
  },
79
79
  "dependencies": {
80
- "unpdf": "^1.4.0",
81
- "exceljs": "^4.4.0",
82
- "officeparser": "^6.0.2",
83
80
  "@ai-sdk/openai": "^1.0.0",
84
81
  "@google/generative-ai": "^0.24.0",
85
82
  "@langchain/langgraph": ">=0.2.0",
@@ -87,7 +84,11 @@
87
84
  "@llamaindex/core": ">=0.4.0",
88
85
  "@llamaindex/openai": ">=0.2.0",
89
86
  "ai": ">=4.0.0",
87
+ "exceljs": "^4.4.0",
90
88
  "langchain": ">=0.3.0",
91
- "llamaindex": ">=0.12.0"
89
+ "llamaindex": ">=0.12.0",
90
+ "officeparser": "^6.0.2",
91
+ "unpdf": "^1.4.0",
92
+ "xlsx": "^0.18.5"
92
93
  }
93
94
  }