npm - pdf-brain - Versions diffs - 0.9.0 → 0.9.1 - Mend

pdf-brain 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/index.ts +49 -17
package/src/services/EmbeddingQueue.test.ts +351 -0
package/src/services/EmbeddingQueue.ts +236 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pdf-brain",
-  "version": "0.9.0",
+  "version": "0.9.1",
   "description": "Local PDF knowledge base with vector search",
   "type": "module",
   "main": "src/index.ts",

package/src/index.ts CHANGED Viewed

@@ -4,21 +4,21 @@
  * Built with Effect for robust error handling and composability.
  */
-import { Effect } from "effect";
+import { Duration, Effect } from "effect";
 import { createHash } from "node:crypto";
 import { statSync } from "node:fs";
 import { basename } from "node:path";
 import {
-  Document,
-  PDFDocument,
-  SearchResult,
-  SearchOptions,
   AddOptions,
-  LibraryConfig,
+  Document,
   DocumentExistsError,
   DocumentNotFoundError,
+  LibraryConfig,
+  SearchOptions,
+  SearchResult,
 } from "./types.js";
+import { DEFAULT_QUEUE_CONFIG } from "./services/EmbeddingQueue.js";
 import { Ollama, OllamaLive } from "./services/Ollama.js";
 import { PDFExtractor, PDFExtractorLive } from "./services/PDFExtractor.js";
@@ -215,22 +215,54 @@ export class PDFLibrary extends Effect.Service<PDFLibrary>()("PDFLibrary", {
           }));
           yield* db.addChunks(chunkRecords);
-          // Generate embeddings with progress
+          // Generate embeddings with gated batching to prevent WASM OOM
+          // This processes in batches of 50, checkpointing after each batch
+          // to keep WAL size bounded and prevent daemon crashes
+          const batchSize = DEFAULT_QUEUE_CONFIG.batchSize;
           yield* Effect.log(
-            `Generating embeddings for ${chunks.length} chunks...`
+            `Generating embeddings for ${chunks.length} chunks (batch size: ${batchSize})...`
           );
           const contents = chunks.map((c) => c.content);
-          const embeddings = yield* ollama.embedBatch(contents, 5);
-          // Store embeddings
-          const embeddingRecords = embeddings.map((emb, i) => ({
-            chunkId: `${id}-${i}`,
-            embedding: emb,
-          }));
-          yield* db.addEmbeddings(embeddingRecords);
+          // Process embeddings in gated batches
+          // Each batch: generate embeddings → write to DB → checkpoint
+          let batchStart = 0;
+          while (batchStart < contents.length) {
+            const batchEnd = Math.min(batchStart + batchSize, contents.length);
+            const batchContents = contents.slice(batchStart, batchEnd);
-          // Note: Checkpoint is now handled by batch operations (e.g., ingest command)
-          // to avoid unnecessary WAL flushes on every single document add
+            // Generate embeddings for this batch with bounded concurrency
+            const batchEmbeddings = yield* ollama.embedBatch(
+              batchContents,
+              DEFAULT_QUEUE_CONFIG.concurrency
+            );
+            // Store this batch's embeddings
+            const embeddingRecords = batchEmbeddings.map((emb, i) => ({
+              chunkId: `${id}-${batchStart + i}`,
+              embedding: emb,
+            }));
+            yield* db.addEmbeddings(embeddingRecords);
+            // CRITICAL: Checkpoint after each batch to flush WAL
+            // This prevents WASM OOM from unbounded WAL growth
+            yield* db.checkpoint();
+            yield* Effect.log(
+              `  Processed ${batchEnd}/${contents.length} embeddings`
+            );
+            batchStart = batchEnd;
+            // Backpressure: small delay between batches to let GC run
+            if (batchStart < contents.length) {
+              yield* Effect.sleep(
+                Duration.millis(DEFAULT_QUEUE_CONFIG.batchDelayMs)
+              );
+            }
+          }
           return doc;
         }),

package/src/services/EmbeddingQueue.test.ts ADDED Viewed

@@ -0,0 +1,351 @@
+/**
+ * EmbeddingQueue Tests
+ *
+ * Tests for gated batch processing with backpressure.
+ * These tests verify the queue prevents WASM OOM under heavy load.
+ */
+import { describe, expect, it } from "bun:test";
+import { Effect } from "effect";
+import {
+  processInBatches,
+  createEmbeddingProcessor,
+  getAdaptiveBatchSize,
+  DEFAULT_QUEUE_CONFIG,
+  type BatchProgress,
+  type EmbeddingQueueConfig,
+} from "./EmbeddingQueue.js";
+describe("EmbeddingQueue", () => {
+  describe("processInBatches", () => {
+    it("processes all items", async () => {
+      const items = [1, 2, 3, 4, 5];
+      const process = (n: number) => Effect.succeed(n * 2);
+      const result = await Effect.runPromise(
+        processInBatches(items, process, {
+          ...DEFAULT_QUEUE_CONFIG,
+          batchSize: 2,
+        })
+      );
+      expect(result).toEqual([2, 4, 6, 8, 10]);
+    });
+    it("respects batch size", async () => {
+      const items = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+      const batchesProcessed: number[] = [];
+      let currentBatch = 0;
+      const process = (n: number) =>
+        Effect.sync(() => {
+          if (!batchesProcessed.includes(currentBatch)) {
+            batchesProcessed.push(currentBatch);
+          }
+          return n;
+        });
+      const afterBatch = () =>
+        Effect.sync(() => {
+          currentBatch++;
+        });
+      const config: EmbeddingQueueConfig = {
+        batchSize: 3,
+        concurrency: 1,
+        batchDelayMs: 0,
+        checkpointAfterBatch: true,
+        adaptiveBatchSize: false,
+      };
+      await Effect.runPromise(
+        processInBatches(items, process, config, afterBatch)
+      );
+      // 10 items / 3 per batch = 4 batches (3+3+3+1)
+      expect(currentBatch).toBe(4);
+    });
+    it("calls afterBatch hook after each batch", async () => {
+      const items = [1, 2, 3, 4, 5, 6];
+      let checkpointCount = 0;
+      const process = (n: number) => Effect.succeed(n);
+      const afterBatch = () =>
+        Effect.sync(() => {
+          checkpointCount++;
+        });
+      const config: EmbeddingQueueConfig = {
+        batchSize: 2,
+        concurrency: 1,
+        batchDelayMs: 0,
+        checkpointAfterBatch: true,
+        adaptiveBatchSize: false,
+      };
+      await Effect.runPromise(
+        processInBatches(items, process, config, afterBatch)
+      );
+      // 6 items / 2 per batch = 3 batches = 3 checkpoints
+      expect(checkpointCount).toBe(3);
+    });
+    it("skips afterBatch when checkpointAfterBatch is false", async () => {
+      const items = [1, 2, 3, 4];
+      let checkpointCount = 0;
+      const process = (n: number) => Effect.succeed(n);
+      const afterBatch = () =>
+        Effect.sync(() => {
+          checkpointCount++;
+        });
+      const config: EmbeddingQueueConfig = {
+        batchSize: 2,
+        concurrency: 1,
+        batchDelayMs: 0,
+        checkpointAfterBatch: false,
+        adaptiveBatchSize: false,
+      };
+      await Effect.runPromise(
+        processInBatches(items, process, config, afterBatch)
+      );
+      expect(checkpointCount).toBe(0);
+    });
+    it("reports progress correctly", async () => {
+      const items = [1, 2, 3, 4, 5, 6];
+      const progressReports: BatchProgress[] = [];
+      const process = (n: number) => Effect.succeed(n);
+      const onProgress = (p: BatchProgress) => progressReports.push({ ...p });
+      const config: EmbeddingQueueConfig = {
+        batchSize: 2,
+        concurrency: 1,
+        batchDelayMs: 0,
+        checkpointAfterBatch: false,
+        adaptiveBatchSize: false,
+      };
+      await Effect.runPromise(
+        processInBatches(items, process, config, undefined, onProgress)
+      );
+      expect(progressReports).toHaveLength(3);
+      // First batch
+      expect(progressReports[0]).toEqual({
+        batch: 1,
+        totalBatches: 3,
+        processed: 2,
+        total: 6,
+        percent: 33,
+      });
+      // Second batch
+      expect(progressReports[1]).toEqual({
+        batch: 2,
+        totalBatches: 3,
+        processed: 4,
+        total: 6,
+        percent: 67,
+      });
+      // Third batch
+      expect(progressReports[2]).toEqual({
+        batch: 3,
+        totalBatches: 3,
+        processed: 6,
+        total: 6,
+        percent: 100,
+      });
+    });
+    it("handles empty input", async () => {
+      const items: number[] = [];
+      const process = (n: number) => Effect.succeed(n);
+      const result = await Effect.runPromise(
+        processInBatches(items, process, DEFAULT_QUEUE_CONFIG)
+      );
+      expect(result).toEqual([]);
+    });
+    it("handles single item", async () => {
+      const items = [42];
+      const process = (n: number) => Effect.succeed(n * 2);
+      const result = await Effect.runPromise(
+        processInBatches(items, process, DEFAULT_QUEUE_CONFIG)
+      );
+      expect(result).toEqual([84]);
+    });
+    it("propagates errors from process function", async () => {
+      const items = [1, 2, 3];
+      const process = (n: number) =>
+        n === 2 ? Effect.fail(new Error("boom")) : Effect.succeed(n);
+      const result = await Effect.runPromise(
+        processInBatches(items, process, {
+          ...DEFAULT_QUEUE_CONFIG,
+          batchSize: 10,
+        }).pipe(Effect.either)
+      );
+      expect(result._tag).toBe("Left");
+      if (result._tag === "Left") {
+        expect((result.left as Error).message).toBe("boom");
+      }
+    });
+    it("respects concurrency within batch", async () => {
+      const items = [1, 2, 3, 4, 5, 6];
+      let maxConcurrent = 0;
+      let currentConcurrent = 0;
+      const process = (n: number) =>
+        Effect.gen(function* () {
+          currentConcurrent++;
+          maxConcurrent = Math.max(maxConcurrent, currentConcurrent);
+          // Simulate async work
+          yield* Effect.sleep("1 millis");
+          currentConcurrent--;
+          return n;
+        });
+      const config: EmbeddingQueueConfig = {
+        batchSize: 6, // All in one batch
+        concurrency: 3, // Max 3 concurrent
+        batchDelayMs: 0,
+        checkpointAfterBatch: false,
+        adaptiveBatchSize: false,
+      };
+      await Effect.runPromise(processInBatches(items, process, config));
+      // Should never exceed concurrency limit
+      expect(maxConcurrent).toBeLessThanOrEqual(3);
+      // Should use concurrency (not just 1)
+      expect(maxConcurrent).toBeGreaterThan(1);
+    });
+  });
+  describe("getAdaptiveBatchSize", () => {
+    it("returns base size when memory is low", () => {
+      // Can't easily mock process.memoryUsage, so just test the function exists
+      // and returns a reasonable value
+      const result = getAdaptiveBatchSize(50);
+      expect(result).toBeGreaterThanOrEqual(10);
+      expect(result).toBeLessThanOrEqual(50);
+    });
+    it("never returns less than 10", () => {
+      // Even with tiny base size, should return at least 10
+      const result = getAdaptiveBatchSize(5);
+      // If memory is low, returns base (5), otherwise scaled
+      expect(result).toBeGreaterThanOrEqual(5);
+    });
+  });
+  describe("createEmbeddingProcessor", () => {
+    it("creates a processor with embedBatch method", async () => {
+      const embedFn = (_text: string) => Effect.succeed([1, 2, 3]);
+      const checkpointFn = (): Effect.Effect<void> => Effect.void;
+      const processor = createEmbeddingProcessor(embedFn, checkpointFn);
+      expect(processor.embedBatch).toBeDefined();
+      expect(processor.getConfig).toBeDefined();
+    });
+    it("processes texts through embedBatch", async () => {
+      const embedFn = (text: string) => Effect.succeed([text.length]);
+      const checkpointFn = (): Effect.Effect<void> => Effect.void;
+      const processor = createEmbeddingProcessor(embedFn, checkpointFn, {
+        batchSize: 2,
+        batchDelayMs: 0,
+      });
+      const result = await Effect.runPromise(
+        processor.embedBatch(["a", "bb", "ccc"])
+      );
+      expect(result).toEqual([[1], [2], [3]]);
+    });
+    it("calls checkpoint after each batch", async () => {
+      let checkpointCount = 0;
+      const embedFn = (_text: string) => Effect.succeed([1]);
+      const checkpointFn = () =>
+        Effect.sync(() => {
+          checkpointCount++;
+        });
+      const processor = createEmbeddingProcessor(embedFn, checkpointFn, {
+        batchSize: 2,
+        batchDelayMs: 0,
+        adaptiveBatchSize: false,
+      });
+      await Effect.runPromise(processor.embedBatch(["a", "b", "c", "d", "e"]));
+      // 5 items / 2 per batch = 3 batches = 3 checkpoints
+      expect(checkpointCount).toBe(3);
+    });
+    it("reports progress", async () => {
+      const embedFn = (_text: string) => Effect.succeed([1]);
+      const checkpointFn = (): Effect.Effect<void> => Effect.void;
+      const progressReports: BatchProgress[] = [];
+      const processor = createEmbeddingProcessor(embedFn, checkpointFn, {
+        batchSize: 2,
+        batchDelayMs: 0,
+        adaptiveBatchSize: false,
+      });
+      await Effect.runPromise(
+        processor.embedBatch(["a", "b", "c", "d"], (p) =>
+          progressReports.push({ ...p })
+        )
+      );
+      expect(progressReports).toHaveLength(2);
+      expect(progressReports[0].percent).toBe(50);
+      expect(progressReports[1].percent).toBe(100);
+    });
+    it("uses custom config", () => {
+      const embedFn = (_text: string) => Effect.succeed([1]);
+      const checkpointFn = (): Effect.Effect<void> => Effect.void;
+      const processor = createEmbeddingProcessor(embedFn, checkpointFn, {
+        batchSize: 100,
+        concurrency: 10,
+      });
+      const config = processor.getConfig();
+      expect(config.batchSize).toBe(100);
+      expect(config.concurrency).toBe(10);
+    });
+  });
+  describe("DEFAULT_QUEUE_CONFIG", () => {
+    it("has sensible defaults", () => {
+      expect(DEFAULT_QUEUE_CONFIG.batchSize).toBe(50);
+      expect(DEFAULT_QUEUE_CONFIG.concurrency).toBe(5);
+      expect(DEFAULT_QUEUE_CONFIG.batchDelayMs).toBe(10);
+      expect(DEFAULT_QUEUE_CONFIG.checkpointAfterBatch).toBe(true);
+      expect(DEFAULT_QUEUE_CONFIG.adaptiveBatchSize).toBe(true);
+    });
+  });
+});

package/src/services/EmbeddingQueue.ts ADDED Viewed

@@ -0,0 +1,236 @@
+/**
+ * Embedding Queue Service - Gated batch processing with backpressure
+ *
+ * Solves PGlite daemon crashes under heavy embedding load by:
+ * 1. Processing embeddings in small batches (default: 50)
+ * 2. Checkpointing after each batch to flush WAL
+ * 3. Yielding to event loop between batches (backpressure)
+ *
+ * WASM Memory Constraints:
+ * - PGlite runs in WASM with ~2GB memory limit
+ * - Each 1024-dim embedding = 4KB
+ * - WAL accumulates until CHECKPOINT
+ * - HNSW index updates consume memory during inserts
+ *
+ * Without gating: 5000 embeddings = 20MB vectors + unbounded WAL = OOM
+ * With gating: 50 embeddings/batch + checkpoint = bounded memory
+ */
+import { Chunk, Duration, Effect, Stream } from "effect";
+/**
+ * Configuration for embedding batch processing
+ */
+export interface EmbeddingQueueConfig {
+  /**
+   * Maximum embeddings per batch before checkpoint
+   * Lower = more checkpoints, less memory pressure
+   * Higher = fewer checkpoints, more throughput
+   * Default: 50 (good balance for 1024-dim vectors)
+   */
+  batchSize: number;
+  /**
+   * Concurrency for Ollama embedding calls within a batch
+   * Limited by Ollama's capacity and network
+   * Default: 5
+   */
+  concurrency: number;
+  /**
+   * Delay between batches (milliseconds)
+   * Allows event loop to breathe and GC to run
+   * Default: 10ms
+   */
+  batchDelayMs: number;
+  /**
+   * Whether to run CHECKPOINT after each batch
+   * Essential for preventing WAL accumulation
+   * Default: true
+   */
+  checkpointAfterBatch: boolean;
+  /**
+   * Whether to use adaptive batch sizing based on memory pressure
+   * Set to false for predictable behavior in tests
+   * Default: true
+   */
+  adaptiveBatchSize: boolean;
+}
+/**
+ * Default configuration - tuned for stability over speed
+ */
+export const DEFAULT_QUEUE_CONFIG: EmbeddingQueueConfig = {
+  batchSize: 50,
+  concurrency: 5,
+  batchDelayMs: 10,
+  checkpointAfterBatch: true,
+  adaptiveBatchSize: true,
+};
+/**
+ * Progress callback for batch processing
+ */
+export interface BatchProgress {
+  /** Current batch number (1-indexed) */
+  batch: number;
+  /** Total number of batches */
+  totalBatches: number;
+  /** Items processed so far */
+  processed: number;
+  /** Total items to process */
+  total: number;
+  /** Percentage complete (0-100) */
+  percent: number;
+}
+/**
+ * Process items in gated batches with backpressure
+ *
+ * This is the core primitive for preventing WASM OOM. It:
+ * 1. Splits input into batches
+ * 2. Processes each batch with bounded concurrency
+ * 3. Runs afterBatch hook (for checkpoint)
+ * 4. Yields between batches (backpressure)
+ *
+ * @param items - Items to process
+ * @param process - Function to process each item
+ * @param config - Queue configuration
+ * @param afterBatch - Optional hook after each batch (e.g., checkpoint)
+ * @param onProgress - Optional progress callback
+ * @returns All processed results
+ */
+export function processInBatches<T, R, E>(
+  items: readonly T[],
+  process: (item: T) => Effect.Effect<R, E>,
+  config: EmbeddingQueueConfig = DEFAULT_QUEUE_CONFIG,
+  afterBatch?: () => Effect.Effect<void, E>,
+  onProgress?: (progress: BatchProgress) => void
+): Effect.Effect<R[], E> {
+  return Effect.gen(function* () {
+    const results: R[] = [];
+    const totalBatches = Math.ceil(items.length / config.batchSize);
+    for (let batchIdx = 0; batchIdx < totalBatches; batchIdx++) {
+      const start = batchIdx * config.batchSize;
+      const end = Math.min(start + config.batchSize, items.length);
+      const batch = items.slice(start, end);
+      // Process batch with bounded concurrency
+      const batchResults = yield* Stream.fromIterable(batch).pipe(
+        Stream.mapEffect(process, { concurrency: config.concurrency }),
+        Stream.runCollect,
+        Effect.map(Chunk.toArray)
+      );
+      results.push(...batchResults);
+      // Report progress
+      if (onProgress) {
+        onProgress({
+          batch: batchIdx + 1,
+          totalBatches,
+          processed: results.length,
+          total: items.length,
+          percent: Math.round((results.length / items.length) * 100),
+        });
+      }
+      // Run after-batch hook (checkpoint)
+      if (afterBatch && config.checkpointAfterBatch) {
+        yield* afterBatch();
+      }
+      // Backpressure: yield to event loop between batches
+      if (config.batchDelayMs > 0 && batchIdx < totalBatches - 1) {
+        yield* Effect.sleep(Duration.millis(config.batchDelayMs));
+      }
+    }
+    return results;
+  });
+}
+/**
+ * Adaptive batch sizing based on memory pressure
+ *
+ * Monitors process memory and reduces batch size if pressure is high.
+ * This is a defense-in-depth measure for edge cases.
+ *
+ * Memory thresholds (of heap limit):
+ * - < 50%: full batch size
+ * - 50-70%: 75% batch size
+ * - 70-85%: 50% batch size
+ * - > 85%: 25% batch size (emergency mode)
+ */
+export function getAdaptiveBatchSize(baseBatchSize: number): number {
+  // Only works in Node.js/Bun with v8 heap stats
+  if (typeof process !== "undefined" && process.memoryUsage) {
+    const mem = process.memoryUsage();
+    const heapUsedRatio = mem.heapUsed / mem.heapTotal;
+    if (heapUsedRatio > 0.85) {
+      // Emergency: 25% batch size
+      return Math.max(10, Math.floor(baseBatchSize * 0.25));
+    } else if (heapUsedRatio > 0.7) {
+      // High pressure: 50% batch size
+      return Math.max(10, Math.floor(baseBatchSize * 0.5));
+    } else if (heapUsedRatio > 0.5) {
+      // Medium pressure: 75% batch size
+      return Math.max(10, Math.floor(baseBatchSize * 0.75));
+    }
+  }
+  return baseBatchSize;
+}
+/**
+ * Create a gated embedding processor
+ *
+ * This is the high-level API for embedding with backpressure.
+ * It wraps processInBatches with embedding-specific defaults.
+ *
+ * @param embedFn - Function to generate a single embedding
+ * @param checkpointFn - Function to run CHECKPOINT
+ * @param config - Optional configuration overrides
+ */
+export function createEmbeddingProcessor<E>(
+  embedFn: (text: string) => Effect.Effect<number[], E>,
+  checkpointFn: () => Effect.Effect<void, E>,
+  config: Partial<EmbeddingQueueConfig> = {}
+) {
+  const fullConfig = { ...DEFAULT_QUEUE_CONFIG, ...config };
+  return {
+    /**
+     * Process texts into embeddings with gated batching
+     */
+    embedBatch: (
+      texts: readonly string[],
+      onProgress?: (progress: BatchProgress) => void
+    ): Effect.Effect<number[][], E> => {
+      // Use adaptive batch size based on memory pressure (if enabled)
+      const adaptiveConfig = {
+        ...fullConfig,
+        batchSize: fullConfig.adaptiveBatchSize
+          ? getAdaptiveBatchSize(fullConfig.batchSize)
+          : fullConfig.batchSize,
+      };
+      return processInBatches(
+        texts,
+        embedFn,
+        adaptiveConfig,
+        checkpointFn,
+        onProgress
+      );
+    },
+    /**
+     * Get current configuration (for debugging)
+     */
+    getConfig: () => fullConfig,
+  };
+}