@memvid/sdk 2.0.154 → 2.0.155

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -98,9 +98,13 @@ export interface OpenAIEmbeddingsConfig {
98
98
  apiKey?: string;
99
99
  /** Model to use. Default: 'text-embedding-3-small' */
100
100
  model?: string;
101
- /** Max number of texts to embed in a single API call. Default: 2048 */
101
+ /** Max number of texts to embed in a single API call. Default: 2048 (OpenAI hard limit) */
102
102
  batchSize?: number;
103
- /** Max tokens per batch (OpenAI limit is 8191). Default: 8000 (with safety margin) */
103
+ /** Max tokens per individual input text (OpenAI limit is 8191). Default: 8000 (with safety margin).
104
+ * Note: this is a per-INPUT limit, not a per-batch total. Each input in a batch
105
+ * must individually be under this limit, but the batch total can be much higher. */
106
+ maxTokensPerInput?: number;
107
+ /** @deprecated Use maxTokensPerInput instead */
104
108
  maxTokensPerBatch?: number;
105
109
  }
106
110
  /**
@@ -120,7 +124,7 @@ export declare class OpenAIEmbeddings implements EmbeddingProvider {
120
124
  private readonly _apiKey;
121
125
  private readonly _model;
122
126
  private readonly _batchSize;
123
- private readonly _maxTokensPerBatch;
127
+ private readonly _maxTokensPerInput;
124
128
  constructor(config?: OpenAIEmbeddingsConfig);
125
129
  get dimension(): number;
126
130
  get modelName(): string;
@@ -132,15 +136,17 @@ export declare class OpenAIEmbeddings implements EmbeddingProvider {
132
136
  */
133
137
  private estimateTokens;
134
138
  /**
135
- * Truncate text to fit within token limit.
139
+ * Truncate a single input text to fit within the per-input token limit.
136
140
  * Preserves beginning of text as it typically contains the most important context.
137
- * Uses conservative 3.0 chars/token for truncation to handle mixed content safely.
141
+ * Uses conservative 2.0 chars/token for truncation to handle data-heavy content
142
+ * (spreadsheets, numbers, cell refs) where tokenization is denser than prose.
138
143
  */
139
144
  private truncateToTokenLimit;
140
145
  /**
141
- * Split texts into batches respecting both document count and token limits.
142
- * This prevents OpenAI API errors when total tokens exceed 8,192.
143
- * Automatically truncates individual texts that exceed the token limit.
146
+ * Split texts into batches respecting:
147
+ * 1. Per-input token limit (8,192 for text-embedding-3-small) truncate oversized inputs
148
+ * 2. Per-request token limit (300K for most tiers) split into multiple requests
149
+ * 3. Per-request input count (2,048 max inputs per request)
144
150
  */
145
151
  private createTokenAwareBatches;
146
152
  embedDocuments(texts: string[]): Promise<number[][]>;
@@ -115,8 +115,9 @@ class OpenAIEmbeddings {
115
115
  }
116
116
  this._model = config.model || 'text-embedding-3-small';
117
117
  this._batchSize = config.batchSize || 2048;
118
- // OpenAI's limit is 8,192 tokens. Use 8,000 as default for max throughput.
119
- this._maxTokensPerBatch = config.maxTokensPerBatch || 8000;
118
+ // OpenAI's limit is 8,192 tokens PER INPUT (not per batch).
119
+ // You can send up to 2048 inputs per request regardless of total tokens.
120
+ this._maxTokensPerInput = config.maxTokensPerInput || config.maxTokensPerBatch || 8000;
120
121
  }
121
122
  get dimension() {
122
123
  return exports.MODEL_DIMENSIONS[this._model] || 1536;
@@ -136,48 +137,51 @@ class OpenAIEmbeddings {
136
137
  return Math.ceil(text.length / 3.5);
137
138
  }
138
139
  /**
139
- * Truncate text to fit within token limit.
140
+ * Truncate a single input text to fit within the per-input token limit.
140
141
  * Preserves beginning of text as it typically contains the most important context.
141
- * Uses conservative 3.0 chars/token for truncation to handle mixed content safely.
142
+ * Uses conservative 2.0 chars/token for truncation to handle data-heavy content
143
+ * (spreadsheets, numbers, cell refs) where tokenization is denser than prose.
142
144
  */
143
145
  truncateToTokenLimit(text) {
144
- // Use conservative limit for truncation: 7800 tokens max for single text
145
- const maxTokensForSingleText = Math.min(this._maxTokensPerBatch, 7800);
146
- // Use 3.0 chars/token for safe truncation
147
- const maxChars = Math.floor(maxTokensForSingleText * 3.0);
146
+ const maxTokens = Math.min(this._maxTokensPerInput, 7800);
147
+ // Use 2.0 chars/token for safe truncation — handles spreadsheet data,
148
+ // numbers, and special characters which tokenize at ~2.2 chars/token
149
+ const maxChars = Math.floor(maxTokens * 2.0);
148
150
  if (text.length <= maxChars) {
149
151
  return text;
150
152
  }
151
153
  return text.slice(0, maxChars);
152
154
  }
153
155
  /**
154
- * Split texts into batches respecting both document count and token limits.
155
- * This prevents OpenAI API errors when total tokens exceed 8,192.
156
- * Automatically truncates individual texts that exceed the token limit.
156
+ * Split texts into batches respecting:
157
+ * 1. Per-input token limit (8,192 for text-embedding-3-small) truncate oversized inputs
158
+ * 2. Per-request token limit (300K for most tiers) split into multiple requests
159
+ * 3. Per-request input count (2,048 max inputs per request)
157
160
  */
158
161
  createTokenAwareBatches(texts) {
162
+ // OpenAI enforces a per-request total token limit (typically 300K).
163
+ // Use 250K as a safe default to account for token estimation inaccuracy.
164
+ const MAX_TOKENS_PER_REQUEST = 250000;
159
165
  const batches = [];
160
166
  let currentBatch = [];
161
- let currentTokens = 0;
167
+ let currentBatchTokens = 0;
162
168
  for (let text of texts) {
169
+ // Truncate individual texts that exceed the per-input token limit
163
170
  let textTokens = this.estimateTokens(text);
164
- // Truncate if single text exceeds token limit
165
- if (textTokens > this._maxTokensPerBatch) {
171
+ if (textTokens > this._maxTokensPerInput) {
166
172
  text = this.truncateToTokenLimit(text);
167
173
  textTokens = this.estimateTokens(text);
168
174
  }
169
- const wouldExceedTokens = (currentTokens + textTokens) > this._maxTokensPerBatch;
175
+ const wouldExceedRequestTokens = (currentBatchTokens + textTokens) > MAX_TOKENS_PER_REQUEST;
170
176
  const wouldExceedCount = currentBatch.length >= this._batchSize;
171
- if (wouldExceedTokens || wouldExceedCount) {
172
- if (currentBatch.length > 0) {
173
- batches.push(currentBatch);
174
- }
177
+ if ((wouldExceedRequestTokens || wouldExceedCount) && currentBatch.length > 0) {
178
+ batches.push(currentBatch);
175
179
  currentBatch = [text];
176
- currentTokens = textTokens;
180
+ currentBatchTokens = textTokens;
177
181
  }
178
182
  else {
179
183
  currentBatch.push(text);
180
- currentTokens += textTokens;
184
+ currentBatchTokens += textTokens;
181
185
  }
182
186
  }
183
187
  if (currentBatch.length > 0) {
package/dist/index.js CHANGED
@@ -1924,6 +1924,27 @@ class MemvidImpl {
1924
1924
  const lines = text.split('\n');
1925
1925
  let current = '';
1926
1926
  for (const line of lines) {
1927
+ // Handle lines longer than chunkSize (e.g. wide spreadsheet rows)
1928
+ if (line.length > size) {
1929
+ if (current.trim()) {
1930
+ chunks.push(current.trim());
1931
+ current = '';
1932
+ }
1933
+ // Split long line at delimiter boundaries (" | " for XLSX rows)
1934
+ let remaining = line;
1935
+ while (remaining.length > size) {
1936
+ let splitAt = remaining.lastIndexOf(' | ', size);
1937
+ if (splitAt <= 0)
1938
+ splitAt = remaining.lastIndexOf(' ', size);
1939
+ if (splitAt <= 0)
1940
+ splitAt = size;
1941
+ chunks.push(remaining.slice(0, splitAt).trim());
1942
+ remaining = remaining.slice(splitAt).replace(/^\s*\|\s*/, '');
1943
+ }
1944
+ if (remaining.trim())
1945
+ current = remaining;
1946
+ continue;
1947
+ }
1927
1948
  if (current.length + line.length + 1 > size && current.length > 0) {
1928
1949
  chunks.push(current.trim());
1929
1950
  current = line;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@memvid/sdk",
3
- "version": "2.0.154",
3
+ "version": "2.0.155",
4
4
  "description": "Single-file AI memory system for Node.js. Store, search, and query documents with built-in RAG.",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
@@ -41,11 +41,11 @@
41
41
  "node": ">=18"
42
42
  },
43
43
  "optionalDependencies": {
44
- "@memvid/sdk-darwin-arm64": "2.0.154",
45
- "@memvid/sdk-darwin-x64": "2.0.154",
46
- "@memvid/sdk-linux-x64-gnu": "2.0.154",
47
- "@memvid/sdk-linux-arm64-gnu": "2.0.154",
48
- "@memvid/sdk-win32-x64-msvc": "2.0.154"
44
+ "@memvid/sdk-darwin-arm64": "2.0.155",
45
+ "@memvid/sdk-darwin-x64": "2.0.155",
46
+ "@memvid/sdk-linux-x64-gnu": "2.0.155",
47
+ "@memvid/sdk-linux-arm64-gnu": "2.0.155",
48
+ "@memvid/sdk-win32-x64-msvc": "2.0.155"
49
49
  },
50
50
  "peerDependencies": {
51
51
  "@langchain/core": ">=0.3.0",