pdf-brain 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.ts +49 -17
- package/src/services/EmbeddingQueue.test.ts +351 -0
- package/src/services/EmbeddingQueue.ts +236 -0
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -4,21 +4,21 @@
|
|
|
4
4
|
* Built with Effect for robust error handling and composability.
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
-
import { Effect } from "effect";
|
|
7
|
+
import { Duration, Effect } from "effect";
|
|
8
8
|
import { createHash } from "node:crypto";
|
|
9
9
|
import { statSync } from "node:fs";
|
|
10
10
|
import { basename } from "node:path";
|
|
11
11
|
|
|
12
12
|
import {
|
|
13
|
-
Document,
|
|
14
|
-
PDFDocument,
|
|
15
|
-
SearchResult,
|
|
16
|
-
SearchOptions,
|
|
17
13
|
AddOptions,
|
|
18
|
-
|
|
14
|
+
Document,
|
|
19
15
|
DocumentExistsError,
|
|
20
16
|
DocumentNotFoundError,
|
|
17
|
+
LibraryConfig,
|
|
18
|
+
SearchOptions,
|
|
19
|
+
SearchResult,
|
|
21
20
|
} from "./types.js";
|
|
21
|
+
import { DEFAULT_QUEUE_CONFIG } from "./services/EmbeddingQueue.js";
|
|
22
22
|
|
|
23
23
|
import { Ollama, OllamaLive } from "./services/Ollama.js";
|
|
24
24
|
import { PDFExtractor, PDFExtractorLive } from "./services/PDFExtractor.js";
|
|
@@ -215,22 +215,54 @@ export class PDFLibrary extends Effect.Service<PDFLibrary>()("PDFLibrary", {
|
|
|
215
215
|
}));
|
|
216
216
|
yield* db.addChunks(chunkRecords);
|
|
217
217
|
|
|
218
|
-
// Generate embeddings with
|
|
218
|
+
// Generate embeddings with gated batching to prevent WASM OOM
|
|
219
|
+
// This processes in batches of 50, checkpointing after each batch
|
|
220
|
+
// to keep WAL size bounded and prevent daemon crashes
|
|
221
|
+
const batchSize = DEFAULT_QUEUE_CONFIG.batchSize;
|
|
219
222
|
yield* Effect.log(
|
|
220
|
-
`Generating embeddings for ${chunks.length} chunks...`
|
|
223
|
+
`Generating embeddings for ${chunks.length} chunks (batch size: ${batchSize})...`
|
|
221
224
|
);
|
|
225
|
+
|
|
222
226
|
const contents = chunks.map((c) => c.content);
|
|
223
|
-
const embeddings = yield* ollama.embedBatch(contents, 5);
|
|
224
227
|
|
|
225
|
-
//
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
228
|
+
// Process embeddings in gated batches
|
|
229
|
+
// Each batch: generate embeddings → write to DB → checkpoint
|
|
230
|
+
let batchStart = 0;
|
|
231
|
+
|
|
232
|
+
while (batchStart < contents.length) {
|
|
233
|
+
const batchEnd = Math.min(batchStart + batchSize, contents.length);
|
|
234
|
+
const batchContents = contents.slice(batchStart, batchEnd);
|
|
231
235
|
|
|
232
|
-
|
|
233
|
-
|
|
236
|
+
// Generate embeddings for this batch with bounded concurrency
|
|
237
|
+
const batchEmbeddings = yield* ollama.embedBatch(
|
|
238
|
+
batchContents,
|
|
239
|
+
DEFAULT_QUEUE_CONFIG.concurrency
|
|
240
|
+
);
|
|
241
|
+
|
|
242
|
+
// Store this batch's embeddings
|
|
243
|
+
const embeddingRecords = batchEmbeddings.map((emb, i) => ({
|
|
244
|
+
chunkId: `${id}-${batchStart + i}`,
|
|
245
|
+
embedding: emb,
|
|
246
|
+
}));
|
|
247
|
+
yield* db.addEmbeddings(embeddingRecords);
|
|
248
|
+
|
|
249
|
+
// CRITICAL: Checkpoint after each batch to flush WAL
|
|
250
|
+
// This prevents WASM OOM from unbounded WAL growth
|
|
251
|
+
yield* db.checkpoint();
|
|
252
|
+
|
|
253
|
+
yield* Effect.log(
|
|
254
|
+
` Processed ${batchEnd}/${contents.length} embeddings`
|
|
255
|
+
);
|
|
256
|
+
|
|
257
|
+
batchStart = batchEnd;
|
|
258
|
+
|
|
259
|
+
// Backpressure: small delay between batches to let GC run
|
|
260
|
+
if (batchStart < contents.length) {
|
|
261
|
+
yield* Effect.sleep(
|
|
262
|
+
Duration.millis(DEFAULT_QUEUE_CONFIG.batchDelayMs)
|
|
263
|
+
);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
234
266
|
|
|
235
267
|
return doc;
|
|
236
268
|
}),
|
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* EmbeddingQueue Tests
|
|
3
|
+
*
|
|
4
|
+
* Tests for gated batch processing with backpressure.
|
|
5
|
+
* These tests verify the queue prevents WASM OOM under heavy load.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { describe, expect, it } from "bun:test";
|
|
9
|
+
import { Effect } from "effect";
|
|
10
|
+
import {
|
|
11
|
+
processInBatches,
|
|
12
|
+
createEmbeddingProcessor,
|
|
13
|
+
getAdaptiveBatchSize,
|
|
14
|
+
DEFAULT_QUEUE_CONFIG,
|
|
15
|
+
type BatchProgress,
|
|
16
|
+
type EmbeddingQueueConfig,
|
|
17
|
+
} from "./EmbeddingQueue.js";
|
|
18
|
+
|
|
19
|
+
describe("EmbeddingQueue", () => {
|
|
20
|
+
describe("processInBatches", () => {
|
|
21
|
+
it("processes all items", async () => {
|
|
22
|
+
const items = [1, 2, 3, 4, 5];
|
|
23
|
+
const process = (n: number) => Effect.succeed(n * 2);
|
|
24
|
+
|
|
25
|
+
const result = await Effect.runPromise(
|
|
26
|
+
processInBatches(items, process, {
|
|
27
|
+
...DEFAULT_QUEUE_CONFIG,
|
|
28
|
+
batchSize: 2,
|
|
29
|
+
})
|
|
30
|
+
);
|
|
31
|
+
|
|
32
|
+
expect(result).toEqual([2, 4, 6, 8, 10]);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("respects batch size", async () => {
|
|
36
|
+
const items = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
|
|
37
|
+
const batchesProcessed: number[] = [];
|
|
38
|
+
let currentBatch = 0;
|
|
39
|
+
|
|
40
|
+
const process = (n: number) =>
|
|
41
|
+
Effect.sync(() => {
|
|
42
|
+
if (!batchesProcessed.includes(currentBatch)) {
|
|
43
|
+
batchesProcessed.push(currentBatch);
|
|
44
|
+
}
|
|
45
|
+
return n;
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
const afterBatch = () =>
|
|
49
|
+
Effect.sync(() => {
|
|
50
|
+
currentBatch++;
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
const config: EmbeddingQueueConfig = {
|
|
54
|
+
batchSize: 3,
|
|
55
|
+
concurrency: 1,
|
|
56
|
+
batchDelayMs: 0,
|
|
57
|
+
checkpointAfterBatch: true,
|
|
58
|
+
adaptiveBatchSize: false,
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
await Effect.runPromise(
|
|
62
|
+
processInBatches(items, process, config, afterBatch)
|
|
63
|
+
);
|
|
64
|
+
|
|
65
|
+
// 10 items / 3 per batch = 4 batches (3+3+3+1)
|
|
66
|
+
expect(currentBatch).toBe(4);
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it("calls afterBatch hook after each batch", async () => {
|
|
70
|
+
const items = [1, 2, 3, 4, 5, 6];
|
|
71
|
+
let checkpointCount = 0;
|
|
72
|
+
|
|
73
|
+
const process = (n: number) => Effect.succeed(n);
|
|
74
|
+
const afterBatch = () =>
|
|
75
|
+
Effect.sync(() => {
|
|
76
|
+
checkpointCount++;
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
const config: EmbeddingQueueConfig = {
|
|
80
|
+
batchSize: 2,
|
|
81
|
+
concurrency: 1,
|
|
82
|
+
batchDelayMs: 0,
|
|
83
|
+
checkpointAfterBatch: true,
|
|
84
|
+
adaptiveBatchSize: false,
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
await Effect.runPromise(
|
|
88
|
+
processInBatches(items, process, config, afterBatch)
|
|
89
|
+
);
|
|
90
|
+
|
|
91
|
+
// 6 items / 2 per batch = 3 batches = 3 checkpoints
|
|
92
|
+
expect(checkpointCount).toBe(3);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it("skips afterBatch when checkpointAfterBatch is false", async () => {
|
|
96
|
+
const items = [1, 2, 3, 4];
|
|
97
|
+
let checkpointCount = 0;
|
|
98
|
+
|
|
99
|
+
const process = (n: number) => Effect.succeed(n);
|
|
100
|
+
const afterBatch = () =>
|
|
101
|
+
Effect.sync(() => {
|
|
102
|
+
checkpointCount++;
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
const config: EmbeddingQueueConfig = {
|
|
106
|
+
batchSize: 2,
|
|
107
|
+
concurrency: 1,
|
|
108
|
+
batchDelayMs: 0,
|
|
109
|
+
checkpointAfterBatch: false,
|
|
110
|
+
adaptiveBatchSize: false,
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
await Effect.runPromise(
|
|
114
|
+
processInBatches(items, process, config, afterBatch)
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
expect(checkpointCount).toBe(0);
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
it("reports progress correctly", async () => {
|
|
121
|
+
const items = [1, 2, 3, 4, 5, 6];
|
|
122
|
+
const progressReports: BatchProgress[] = [];
|
|
123
|
+
|
|
124
|
+
const process = (n: number) => Effect.succeed(n);
|
|
125
|
+
const onProgress = (p: BatchProgress) => progressReports.push({ ...p });
|
|
126
|
+
|
|
127
|
+
const config: EmbeddingQueueConfig = {
|
|
128
|
+
batchSize: 2,
|
|
129
|
+
concurrency: 1,
|
|
130
|
+
batchDelayMs: 0,
|
|
131
|
+
checkpointAfterBatch: false,
|
|
132
|
+
adaptiveBatchSize: false,
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
await Effect.runPromise(
|
|
136
|
+
processInBatches(items, process, config, undefined, onProgress)
|
|
137
|
+
);
|
|
138
|
+
|
|
139
|
+
expect(progressReports).toHaveLength(3);
|
|
140
|
+
|
|
141
|
+
// First batch
|
|
142
|
+
expect(progressReports[0]).toEqual({
|
|
143
|
+
batch: 1,
|
|
144
|
+
totalBatches: 3,
|
|
145
|
+
processed: 2,
|
|
146
|
+
total: 6,
|
|
147
|
+
percent: 33,
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
// Second batch
|
|
151
|
+
expect(progressReports[1]).toEqual({
|
|
152
|
+
batch: 2,
|
|
153
|
+
totalBatches: 3,
|
|
154
|
+
processed: 4,
|
|
155
|
+
total: 6,
|
|
156
|
+
percent: 67,
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
// Third batch
|
|
160
|
+
expect(progressReports[2]).toEqual({
|
|
161
|
+
batch: 3,
|
|
162
|
+
totalBatches: 3,
|
|
163
|
+
processed: 6,
|
|
164
|
+
total: 6,
|
|
165
|
+
percent: 100,
|
|
166
|
+
});
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
it("handles empty input", async () => {
|
|
170
|
+
const items: number[] = [];
|
|
171
|
+
const process = (n: number) => Effect.succeed(n);
|
|
172
|
+
|
|
173
|
+
const result = await Effect.runPromise(
|
|
174
|
+
processInBatches(items, process, DEFAULT_QUEUE_CONFIG)
|
|
175
|
+
);
|
|
176
|
+
|
|
177
|
+
expect(result).toEqual([]);
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
it("handles single item", async () => {
|
|
181
|
+
const items = [42];
|
|
182
|
+
const process = (n: number) => Effect.succeed(n * 2);
|
|
183
|
+
|
|
184
|
+
const result = await Effect.runPromise(
|
|
185
|
+
processInBatches(items, process, DEFAULT_QUEUE_CONFIG)
|
|
186
|
+
);
|
|
187
|
+
|
|
188
|
+
expect(result).toEqual([84]);
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
it("propagates errors from process function", async () => {
|
|
192
|
+
const items = [1, 2, 3];
|
|
193
|
+
const process = (n: number) =>
|
|
194
|
+
n === 2 ? Effect.fail(new Error("boom")) : Effect.succeed(n);
|
|
195
|
+
|
|
196
|
+
const result = await Effect.runPromise(
|
|
197
|
+
processInBatches(items, process, {
|
|
198
|
+
...DEFAULT_QUEUE_CONFIG,
|
|
199
|
+
batchSize: 10,
|
|
200
|
+
}).pipe(Effect.either)
|
|
201
|
+
);
|
|
202
|
+
|
|
203
|
+
expect(result._tag).toBe("Left");
|
|
204
|
+
if (result._tag === "Left") {
|
|
205
|
+
expect((result.left as Error).message).toBe("boom");
|
|
206
|
+
}
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
it("respects concurrency within batch", async () => {
|
|
210
|
+
const items = [1, 2, 3, 4, 5, 6];
|
|
211
|
+
let maxConcurrent = 0;
|
|
212
|
+
let currentConcurrent = 0;
|
|
213
|
+
|
|
214
|
+
const process = (n: number) =>
|
|
215
|
+
Effect.gen(function* () {
|
|
216
|
+
currentConcurrent++;
|
|
217
|
+
maxConcurrent = Math.max(maxConcurrent, currentConcurrent);
|
|
218
|
+
// Simulate async work
|
|
219
|
+
yield* Effect.sleep("1 millis");
|
|
220
|
+
currentConcurrent--;
|
|
221
|
+
return n;
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
const config: EmbeddingQueueConfig = {
|
|
225
|
+
batchSize: 6, // All in one batch
|
|
226
|
+
concurrency: 3, // Max 3 concurrent
|
|
227
|
+
batchDelayMs: 0,
|
|
228
|
+
checkpointAfterBatch: false,
|
|
229
|
+
adaptiveBatchSize: false,
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
await Effect.runPromise(processInBatches(items, process, config));
|
|
233
|
+
|
|
234
|
+
// Should never exceed concurrency limit
|
|
235
|
+
expect(maxConcurrent).toBeLessThanOrEqual(3);
|
|
236
|
+
// Should use concurrency (not just 1)
|
|
237
|
+
expect(maxConcurrent).toBeGreaterThan(1);
|
|
238
|
+
});
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
describe("getAdaptiveBatchSize", () => {
|
|
242
|
+
it("returns base size when memory is low", () => {
|
|
243
|
+
// Can't easily mock process.memoryUsage, so just test the function exists
|
|
244
|
+
// and returns a reasonable value
|
|
245
|
+
const result = getAdaptiveBatchSize(50);
|
|
246
|
+
expect(result).toBeGreaterThanOrEqual(10);
|
|
247
|
+
expect(result).toBeLessThanOrEqual(50);
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
it("never returns less than 10", () => {
|
|
251
|
+
// Even with tiny base size, should return at least 10
|
|
252
|
+
const result = getAdaptiveBatchSize(5);
|
|
253
|
+
// If memory is low, returns base (5), otherwise scaled
|
|
254
|
+
expect(result).toBeGreaterThanOrEqual(5);
|
|
255
|
+
});
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
describe("createEmbeddingProcessor", () => {
|
|
259
|
+
it("creates a processor with embedBatch method", async () => {
|
|
260
|
+
const embedFn = (_text: string) => Effect.succeed([1, 2, 3]);
|
|
261
|
+
const checkpointFn = (): Effect.Effect<void> => Effect.void;
|
|
262
|
+
|
|
263
|
+
const processor = createEmbeddingProcessor(embedFn, checkpointFn);
|
|
264
|
+
|
|
265
|
+
expect(processor.embedBatch).toBeDefined();
|
|
266
|
+
expect(processor.getConfig).toBeDefined();
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
it("processes texts through embedBatch", async () => {
|
|
270
|
+
const embedFn = (text: string) => Effect.succeed([text.length]);
|
|
271
|
+
const checkpointFn = (): Effect.Effect<void> => Effect.void;
|
|
272
|
+
|
|
273
|
+
const processor = createEmbeddingProcessor(embedFn, checkpointFn, {
|
|
274
|
+
batchSize: 2,
|
|
275
|
+
batchDelayMs: 0,
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
const result = await Effect.runPromise(
|
|
279
|
+
processor.embedBatch(["a", "bb", "ccc"])
|
|
280
|
+
);
|
|
281
|
+
|
|
282
|
+
expect(result).toEqual([[1], [2], [3]]);
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
it("calls checkpoint after each batch", async () => {
|
|
286
|
+
let checkpointCount = 0;
|
|
287
|
+
const embedFn = (_text: string) => Effect.succeed([1]);
|
|
288
|
+
const checkpointFn = () =>
|
|
289
|
+
Effect.sync(() => {
|
|
290
|
+
checkpointCount++;
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
const processor = createEmbeddingProcessor(embedFn, checkpointFn, {
|
|
294
|
+
batchSize: 2,
|
|
295
|
+
batchDelayMs: 0,
|
|
296
|
+
adaptiveBatchSize: false,
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
await Effect.runPromise(processor.embedBatch(["a", "b", "c", "d", "e"]));
|
|
300
|
+
|
|
301
|
+
// 5 items / 2 per batch = 3 batches = 3 checkpoints
|
|
302
|
+
expect(checkpointCount).toBe(3);
|
|
303
|
+
});
|
|
304
|
+
|
|
305
|
+
it("reports progress", async () => {
|
|
306
|
+
const embedFn = (_text: string) => Effect.succeed([1]);
|
|
307
|
+
const checkpointFn = (): Effect.Effect<void> => Effect.void;
|
|
308
|
+
const progressReports: BatchProgress[] = [];
|
|
309
|
+
|
|
310
|
+
const processor = createEmbeddingProcessor(embedFn, checkpointFn, {
|
|
311
|
+
batchSize: 2,
|
|
312
|
+
batchDelayMs: 0,
|
|
313
|
+
adaptiveBatchSize: false,
|
|
314
|
+
});
|
|
315
|
+
|
|
316
|
+
await Effect.runPromise(
|
|
317
|
+
processor.embedBatch(["a", "b", "c", "d"], (p) =>
|
|
318
|
+
progressReports.push({ ...p })
|
|
319
|
+
)
|
|
320
|
+
);
|
|
321
|
+
|
|
322
|
+
expect(progressReports).toHaveLength(2);
|
|
323
|
+
expect(progressReports[0].percent).toBe(50);
|
|
324
|
+
expect(progressReports[1].percent).toBe(100);
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
it("uses custom config", () => {
|
|
328
|
+
const embedFn = (_text: string) => Effect.succeed([1]);
|
|
329
|
+
const checkpointFn = (): Effect.Effect<void> => Effect.void;
|
|
330
|
+
|
|
331
|
+
const processor = createEmbeddingProcessor(embedFn, checkpointFn, {
|
|
332
|
+
batchSize: 100,
|
|
333
|
+
concurrency: 10,
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
const config = processor.getConfig();
|
|
337
|
+
expect(config.batchSize).toBe(100);
|
|
338
|
+
expect(config.concurrency).toBe(10);
|
|
339
|
+
});
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
describe("DEFAULT_QUEUE_CONFIG", () => {
|
|
343
|
+
it("has sensible defaults", () => {
|
|
344
|
+
expect(DEFAULT_QUEUE_CONFIG.batchSize).toBe(50);
|
|
345
|
+
expect(DEFAULT_QUEUE_CONFIG.concurrency).toBe(5);
|
|
346
|
+
expect(DEFAULT_QUEUE_CONFIG.batchDelayMs).toBe(10);
|
|
347
|
+
expect(DEFAULT_QUEUE_CONFIG.checkpointAfterBatch).toBe(true);
|
|
348
|
+
expect(DEFAULT_QUEUE_CONFIG.adaptiveBatchSize).toBe(true);
|
|
349
|
+
});
|
|
350
|
+
});
|
|
351
|
+
});
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Queue Service - Gated batch processing with backpressure
|
|
3
|
+
*
|
|
4
|
+
* Solves PGlite daemon crashes under heavy embedding load by:
|
|
5
|
+
* 1. Processing embeddings in small batches (default: 50)
|
|
6
|
+
* 2. Checkpointing after each batch to flush WAL
|
|
7
|
+
* 3. Yielding to event loop between batches (backpressure)
|
|
8
|
+
*
|
|
9
|
+
* WASM Memory Constraints:
|
|
10
|
+
* - PGlite runs in WASM with ~2GB memory limit
|
|
11
|
+
* - Each 1024-dim embedding = 4KB
|
|
12
|
+
* - WAL accumulates until CHECKPOINT
|
|
13
|
+
* - HNSW index updates consume memory during inserts
|
|
14
|
+
*
|
|
15
|
+
* Without gating: 5000 embeddings = 20MB vectors + unbounded WAL = OOM
|
|
16
|
+
* With gating: 50 embeddings/batch + checkpoint = bounded memory
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { Chunk, Duration, Effect, Stream } from "effect";
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Configuration for embedding batch processing
|
|
23
|
+
*/
|
|
24
|
+
export interface EmbeddingQueueConfig {
|
|
25
|
+
/**
|
|
26
|
+
* Maximum embeddings per batch before checkpoint
|
|
27
|
+
* Lower = more checkpoints, less memory pressure
|
|
28
|
+
* Higher = fewer checkpoints, more throughput
|
|
29
|
+
* Default: 50 (good balance for 1024-dim vectors)
|
|
30
|
+
*/
|
|
31
|
+
batchSize: number;
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Concurrency for Ollama embedding calls within a batch
|
|
35
|
+
* Limited by Ollama's capacity and network
|
|
36
|
+
* Default: 5
|
|
37
|
+
*/
|
|
38
|
+
concurrency: number;
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Delay between batches (milliseconds)
|
|
42
|
+
* Allows event loop to breathe and GC to run
|
|
43
|
+
* Default: 10ms
|
|
44
|
+
*/
|
|
45
|
+
batchDelayMs: number;
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Whether to run CHECKPOINT after each batch
|
|
49
|
+
* Essential for preventing WAL accumulation
|
|
50
|
+
* Default: true
|
|
51
|
+
*/
|
|
52
|
+
checkpointAfterBatch: boolean;
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Whether to use adaptive batch sizing based on memory pressure
|
|
56
|
+
* Set to false for predictable behavior in tests
|
|
57
|
+
* Default: true
|
|
58
|
+
*/
|
|
59
|
+
adaptiveBatchSize: boolean;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Default configuration - tuned for stability over speed
|
|
64
|
+
*/
|
|
65
|
+
export const DEFAULT_QUEUE_CONFIG: EmbeddingQueueConfig = {
|
|
66
|
+
batchSize: 50,
|
|
67
|
+
concurrency: 5,
|
|
68
|
+
batchDelayMs: 10,
|
|
69
|
+
checkpointAfterBatch: true,
|
|
70
|
+
adaptiveBatchSize: true,
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Progress callback for batch processing
|
|
75
|
+
*/
|
|
76
|
+
export interface BatchProgress {
|
|
77
|
+
/** Current batch number (1-indexed) */
|
|
78
|
+
batch: number;
|
|
79
|
+
/** Total number of batches */
|
|
80
|
+
totalBatches: number;
|
|
81
|
+
/** Items processed so far */
|
|
82
|
+
processed: number;
|
|
83
|
+
/** Total items to process */
|
|
84
|
+
total: number;
|
|
85
|
+
/** Percentage complete (0-100) */
|
|
86
|
+
percent: number;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Process items in gated batches with backpressure
|
|
91
|
+
*
|
|
92
|
+
* This is the core primitive for preventing WASM OOM. It:
|
|
93
|
+
* 1. Splits input into batches
|
|
94
|
+
* 2. Processes each batch with bounded concurrency
|
|
95
|
+
* 3. Runs afterBatch hook (for checkpoint)
|
|
96
|
+
* 4. Yields between batches (backpressure)
|
|
97
|
+
*
|
|
98
|
+
* @param items - Items to process
|
|
99
|
+
* @param process - Function to process each item
|
|
100
|
+
* @param config - Queue configuration
|
|
101
|
+
* @param afterBatch - Optional hook after each batch (e.g., checkpoint)
|
|
102
|
+
* @param onProgress - Optional progress callback
|
|
103
|
+
* @returns All processed results
|
|
104
|
+
*/
|
|
105
|
+
export function processInBatches<T, R, E>(
|
|
106
|
+
items: readonly T[],
|
|
107
|
+
process: (item: T) => Effect.Effect<R, E>,
|
|
108
|
+
config: EmbeddingQueueConfig = DEFAULT_QUEUE_CONFIG,
|
|
109
|
+
afterBatch?: () => Effect.Effect<void, E>,
|
|
110
|
+
onProgress?: (progress: BatchProgress) => void
|
|
111
|
+
): Effect.Effect<R[], E> {
|
|
112
|
+
return Effect.gen(function* () {
|
|
113
|
+
const results: R[] = [];
|
|
114
|
+
const totalBatches = Math.ceil(items.length / config.batchSize);
|
|
115
|
+
|
|
116
|
+
for (let batchIdx = 0; batchIdx < totalBatches; batchIdx++) {
|
|
117
|
+
const start = batchIdx * config.batchSize;
|
|
118
|
+
const end = Math.min(start + config.batchSize, items.length);
|
|
119
|
+
const batch = items.slice(start, end);
|
|
120
|
+
|
|
121
|
+
// Process batch with bounded concurrency
|
|
122
|
+
const batchResults = yield* Stream.fromIterable(batch).pipe(
|
|
123
|
+
Stream.mapEffect(process, { concurrency: config.concurrency }),
|
|
124
|
+
Stream.runCollect,
|
|
125
|
+
Effect.map(Chunk.toArray)
|
|
126
|
+
);
|
|
127
|
+
|
|
128
|
+
results.push(...batchResults);
|
|
129
|
+
|
|
130
|
+
// Report progress
|
|
131
|
+
if (onProgress) {
|
|
132
|
+
onProgress({
|
|
133
|
+
batch: batchIdx + 1,
|
|
134
|
+
totalBatches,
|
|
135
|
+
processed: results.length,
|
|
136
|
+
total: items.length,
|
|
137
|
+
percent: Math.round((results.length / items.length) * 100),
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Run after-batch hook (checkpoint)
|
|
142
|
+
if (afterBatch && config.checkpointAfterBatch) {
|
|
143
|
+
yield* afterBatch();
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Backpressure: yield to event loop between batches
|
|
147
|
+
if (config.batchDelayMs > 0 && batchIdx < totalBatches - 1) {
|
|
148
|
+
yield* Effect.sleep(Duration.millis(config.batchDelayMs));
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return results;
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Adaptive batch sizing based on memory pressure
|
|
158
|
+
*
|
|
159
|
+
* Monitors process memory and reduces batch size if pressure is high.
|
|
160
|
+
* This is a defense-in-depth measure for edge cases.
|
|
161
|
+
*
|
|
162
|
+
* Memory thresholds (of heap limit):
|
|
163
|
+
* - < 50%: full batch size
|
|
164
|
+
* - 50-70%: 75% batch size
|
|
165
|
+
* - 70-85%: 50% batch size
|
|
166
|
+
* - > 85%: 25% batch size (emergency mode)
|
|
167
|
+
*/
|
|
168
|
+
export function getAdaptiveBatchSize(baseBatchSize: number): number {
|
|
169
|
+
// Only works in Node.js/Bun with v8 heap stats
|
|
170
|
+
if (typeof process !== "undefined" && process.memoryUsage) {
|
|
171
|
+
const mem = process.memoryUsage();
|
|
172
|
+
const heapUsedRatio = mem.heapUsed / mem.heapTotal;
|
|
173
|
+
|
|
174
|
+
if (heapUsedRatio > 0.85) {
|
|
175
|
+
// Emergency: 25% batch size
|
|
176
|
+
return Math.max(10, Math.floor(baseBatchSize * 0.25));
|
|
177
|
+
} else if (heapUsedRatio > 0.7) {
|
|
178
|
+
// High pressure: 50% batch size
|
|
179
|
+
return Math.max(10, Math.floor(baseBatchSize * 0.5));
|
|
180
|
+
} else if (heapUsedRatio > 0.5) {
|
|
181
|
+
// Medium pressure: 75% batch size
|
|
182
|
+
return Math.max(10, Math.floor(baseBatchSize * 0.75));
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return baseBatchSize;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Create a gated embedding processor
|
|
191
|
+
*
|
|
192
|
+
* This is the high-level API for embedding with backpressure.
|
|
193
|
+
* It wraps processInBatches with embedding-specific defaults.
|
|
194
|
+
*
|
|
195
|
+
* @param embedFn - Function to generate a single embedding
|
|
196
|
+
* @param checkpointFn - Function to run CHECKPOINT
|
|
197
|
+
* @param config - Optional configuration overrides
|
|
198
|
+
*/
|
|
199
|
+
export function createEmbeddingProcessor<E>(
|
|
200
|
+
embedFn: (text: string) => Effect.Effect<number[], E>,
|
|
201
|
+
checkpointFn: () => Effect.Effect<void, E>,
|
|
202
|
+
config: Partial<EmbeddingQueueConfig> = {}
|
|
203
|
+
) {
|
|
204
|
+
const fullConfig = { ...DEFAULT_QUEUE_CONFIG, ...config };
|
|
205
|
+
|
|
206
|
+
return {
|
|
207
|
+
/**
|
|
208
|
+
* Process texts into embeddings with gated batching
|
|
209
|
+
*/
|
|
210
|
+
embedBatch: (
|
|
211
|
+
texts: readonly string[],
|
|
212
|
+
onProgress?: (progress: BatchProgress) => void
|
|
213
|
+
): Effect.Effect<number[][], E> => {
|
|
214
|
+
// Use adaptive batch size based on memory pressure (if enabled)
|
|
215
|
+
const adaptiveConfig = {
|
|
216
|
+
...fullConfig,
|
|
217
|
+
batchSize: fullConfig.adaptiveBatchSize
|
|
218
|
+
? getAdaptiveBatchSize(fullConfig.batchSize)
|
|
219
|
+
: fullConfig.batchSize,
|
|
220
|
+
};
|
|
221
|
+
|
|
222
|
+
return processInBatches(
|
|
223
|
+
texts,
|
|
224
|
+
embedFn,
|
|
225
|
+
adaptiveConfig,
|
|
226
|
+
checkpointFn,
|
|
227
|
+
onProgress
|
|
228
|
+
);
|
|
229
|
+
},
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* Get current configuration (for debugging)
|
|
233
|
+
*/
|
|
234
|
+
getConfig: () => fullConfig,
|
|
235
|
+
};
|
|
236
|
+
}
|