@memvid/sdk 2.0.154 → 2.0.155
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/embeddings.d.ts +14 -8
- package/dist/embeddings.js +25 -21
- package/dist/index.js +21 -0
- package/package.json +6 -6
package/dist/embeddings.d.ts
CHANGED
|
@@ -98,9 +98,13 @@ export interface OpenAIEmbeddingsConfig {
|
|
|
98
98
|
apiKey?: string;
|
|
99
99
|
/** Model to use. Default: 'text-embedding-3-small' */
|
|
100
100
|
model?: string;
|
|
101
|
-
/** Max number of texts to embed in a single API call. Default: 2048 */
|
|
101
|
+
/** Max number of texts to embed in a single API call. Default: 2048 (OpenAI hard limit) */
|
|
102
102
|
batchSize?: number;
|
|
103
|
-
/** Max tokens per
|
|
103
|
+
/** Max tokens per individual input text (OpenAI limit is 8191). Default: 8000 (with safety margin).
|
|
104
|
+
* Note: this is a per-INPUT limit, not a per-batch total. Each input in a batch
|
|
105
|
+
* must individually be under this limit, but the batch total can be much higher. */
|
|
106
|
+
maxTokensPerInput?: number;
|
|
107
|
+
/** @deprecated Use maxTokensPerInput instead */
|
|
104
108
|
maxTokensPerBatch?: number;
|
|
105
109
|
}
|
|
106
110
|
/**
|
|
@@ -120,7 +124,7 @@ export declare class OpenAIEmbeddings implements EmbeddingProvider {
|
|
|
120
124
|
private readonly _apiKey;
|
|
121
125
|
private readonly _model;
|
|
122
126
|
private readonly _batchSize;
|
|
123
|
-
private readonly
|
|
127
|
+
private readonly _maxTokensPerInput;
|
|
124
128
|
constructor(config?: OpenAIEmbeddingsConfig);
|
|
125
129
|
get dimension(): number;
|
|
126
130
|
get modelName(): string;
|
|
@@ -132,15 +136,17 @@ export declare class OpenAIEmbeddings implements EmbeddingProvider {
|
|
|
132
136
|
*/
|
|
133
137
|
private estimateTokens;
|
|
134
138
|
/**
|
|
135
|
-
* Truncate text to fit within token limit.
|
|
139
|
+
* Truncate a single input text to fit within the per-input token limit.
|
|
136
140
|
* Preserves beginning of text as it typically contains the most important context.
|
|
137
|
-
* Uses conservative
|
|
141
|
+
* Uses conservative 2.0 chars/token for truncation to handle data-heavy content
|
|
142
|
+
* (spreadsheets, numbers, cell refs) where tokenization is denser than prose.
|
|
138
143
|
*/
|
|
139
144
|
private truncateToTokenLimit;
|
|
140
145
|
/**
|
|
141
|
-
* Split texts into batches respecting
|
|
142
|
-
*
|
|
143
|
-
*
|
|
146
|
+
* Split texts into batches respecting:
|
|
147
|
+
* 1. Per-input token limit (8,192 for text-embedding-3-small) — truncate oversized inputs
|
|
148
|
+
* 2. Per-request token limit (300K for most tiers) — split into multiple requests
|
|
149
|
+
* 3. Per-request input count (2,048 max inputs per request)
|
|
144
150
|
*/
|
|
145
151
|
private createTokenAwareBatches;
|
|
146
152
|
embedDocuments(texts: string[]): Promise<number[][]>;
|
package/dist/embeddings.js
CHANGED
|
@@ -115,8 +115,9 @@ class OpenAIEmbeddings {
|
|
|
115
115
|
}
|
|
116
116
|
this._model = config.model || 'text-embedding-3-small';
|
|
117
117
|
this._batchSize = config.batchSize || 2048;
|
|
118
|
-
// OpenAI's limit is 8,192 tokens
|
|
119
|
-
|
|
118
|
+
// OpenAI's limit is 8,192 tokens PER INPUT (not per batch).
|
|
119
|
+
// You can send up to 2048 inputs per request regardless of total tokens.
|
|
120
|
+
this._maxTokensPerInput = config.maxTokensPerInput || config.maxTokensPerBatch || 8000;
|
|
120
121
|
}
|
|
121
122
|
get dimension() {
|
|
122
123
|
return exports.MODEL_DIMENSIONS[this._model] || 1536;
|
|
@@ -136,48 +137,51 @@ class OpenAIEmbeddings {
|
|
|
136
137
|
return Math.ceil(text.length / 3.5);
|
|
137
138
|
}
|
|
138
139
|
/**
|
|
139
|
-
* Truncate text to fit within token limit.
|
|
140
|
+
* Truncate a single input text to fit within the per-input token limit.
|
|
140
141
|
* Preserves beginning of text as it typically contains the most important context.
|
|
141
|
-
* Uses conservative
|
|
142
|
+
* Uses conservative 2.0 chars/token for truncation to handle data-heavy content
|
|
143
|
+
* (spreadsheets, numbers, cell refs) where tokenization is denser than prose.
|
|
142
144
|
*/
|
|
143
145
|
truncateToTokenLimit(text) {
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
//
|
|
147
|
-
const maxChars = Math.floor(
|
|
146
|
+
const maxTokens = Math.min(this._maxTokensPerInput, 7800);
|
|
147
|
+
// Use 2.0 chars/token for safe truncation — handles spreadsheet data,
|
|
148
|
+
// numbers, and special characters which tokenize at ~2.2 chars/token
|
|
149
|
+
const maxChars = Math.floor(maxTokens * 2.0);
|
|
148
150
|
if (text.length <= maxChars) {
|
|
149
151
|
return text;
|
|
150
152
|
}
|
|
151
153
|
return text.slice(0, maxChars);
|
|
152
154
|
}
|
|
153
155
|
/**
|
|
154
|
-
* Split texts into batches respecting
|
|
155
|
-
*
|
|
156
|
-
*
|
|
156
|
+
* Split texts into batches respecting:
|
|
157
|
+
* 1. Per-input token limit (8,192 for text-embedding-3-small) — truncate oversized inputs
|
|
158
|
+
* 2. Per-request token limit (300K for most tiers) — split into multiple requests
|
|
159
|
+
* 3. Per-request input count (2,048 max inputs per request)
|
|
157
160
|
*/
|
|
158
161
|
createTokenAwareBatches(texts) {
|
|
162
|
+
// OpenAI enforces a per-request total token limit (typically 300K).
|
|
163
|
+
// Use 250K as a safe default to account for token estimation inaccuracy.
|
|
164
|
+
const MAX_TOKENS_PER_REQUEST = 250000;
|
|
159
165
|
const batches = [];
|
|
160
166
|
let currentBatch = [];
|
|
161
|
-
let
|
|
167
|
+
let currentBatchTokens = 0;
|
|
162
168
|
for (let text of texts) {
|
|
169
|
+
// Truncate individual texts that exceed the per-input token limit
|
|
163
170
|
let textTokens = this.estimateTokens(text);
|
|
164
|
-
|
|
165
|
-
if (textTokens > this._maxTokensPerBatch) {
|
|
171
|
+
if (textTokens > this._maxTokensPerInput) {
|
|
166
172
|
text = this.truncateToTokenLimit(text);
|
|
167
173
|
textTokens = this.estimateTokens(text);
|
|
168
174
|
}
|
|
169
|
-
const
|
|
175
|
+
const wouldExceedRequestTokens = (currentBatchTokens + textTokens) > MAX_TOKENS_PER_REQUEST;
|
|
170
176
|
const wouldExceedCount = currentBatch.length >= this._batchSize;
|
|
171
|
-
if (
|
|
172
|
-
|
|
173
|
-
batches.push(currentBatch);
|
|
174
|
-
}
|
|
177
|
+
if ((wouldExceedRequestTokens || wouldExceedCount) && currentBatch.length > 0) {
|
|
178
|
+
batches.push(currentBatch);
|
|
175
179
|
currentBatch = [text];
|
|
176
|
-
|
|
180
|
+
currentBatchTokens = textTokens;
|
|
177
181
|
}
|
|
178
182
|
else {
|
|
179
183
|
currentBatch.push(text);
|
|
180
|
-
|
|
184
|
+
currentBatchTokens += textTokens;
|
|
181
185
|
}
|
|
182
186
|
}
|
|
183
187
|
if (currentBatch.length > 0) {
|
package/dist/index.js
CHANGED
|
@@ -1924,6 +1924,27 @@ class MemvidImpl {
|
|
|
1924
1924
|
const lines = text.split('\n');
|
|
1925
1925
|
let current = '';
|
|
1926
1926
|
for (const line of lines) {
|
|
1927
|
+
// Handle lines longer than chunkSize (e.g. wide spreadsheet rows)
|
|
1928
|
+
if (line.length > size) {
|
|
1929
|
+
if (current.trim()) {
|
|
1930
|
+
chunks.push(current.trim());
|
|
1931
|
+
current = '';
|
|
1932
|
+
}
|
|
1933
|
+
// Split long line at delimiter boundaries (" | " for XLSX rows)
|
|
1934
|
+
let remaining = line;
|
|
1935
|
+
while (remaining.length > size) {
|
|
1936
|
+
let splitAt = remaining.lastIndexOf(' | ', size);
|
|
1937
|
+
if (splitAt <= 0)
|
|
1938
|
+
splitAt = remaining.lastIndexOf(' ', size);
|
|
1939
|
+
if (splitAt <= 0)
|
|
1940
|
+
splitAt = size;
|
|
1941
|
+
chunks.push(remaining.slice(0, splitAt).trim());
|
|
1942
|
+
remaining = remaining.slice(splitAt).replace(/^\s*\|\s*/, '');
|
|
1943
|
+
}
|
|
1944
|
+
if (remaining.trim())
|
|
1945
|
+
current = remaining;
|
|
1946
|
+
continue;
|
|
1947
|
+
}
|
|
1927
1948
|
if (current.length + line.length + 1 > size && current.length > 0) {
|
|
1928
1949
|
chunks.push(current.trim());
|
|
1929
1950
|
current = line;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@memvid/sdk",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.155",
|
|
4
4
|
"description": "Single-file AI memory system for Node.js. Store, search, and query documents with built-in RAG.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"types": "./dist/index.d.ts",
|
|
@@ -41,11 +41,11 @@
|
|
|
41
41
|
"node": ">=18"
|
|
42
42
|
},
|
|
43
43
|
"optionalDependencies": {
|
|
44
|
-
"@memvid/sdk-darwin-arm64": "2.0.
|
|
45
|
-
"@memvid/sdk-darwin-x64": "2.0.
|
|
46
|
-
"@memvid/sdk-linux-x64-gnu": "2.0.
|
|
47
|
-
"@memvid/sdk-linux-arm64-gnu": "2.0.
|
|
48
|
-
"@memvid/sdk-win32-x64-msvc": "2.0.
|
|
44
|
+
"@memvid/sdk-darwin-arm64": "2.0.155",
|
|
45
|
+
"@memvid/sdk-darwin-x64": "2.0.155",
|
|
46
|
+
"@memvid/sdk-linux-x64-gnu": "2.0.155",
|
|
47
|
+
"@memvid/sdk-linux-arm64-gnu": "2.0.155",
|
|
48
|
+
"@memvid/sdk-win32-x64-msvc": "2.0.155"
|
|
49
49
|
},
|
|
50
50
|
"peerDependencies": {
|
|
51
51
|
"@langchain/core": ">=0.3.0",
|