npm - @pravoobi/llm-cache - Versions diffs - 0.1.0 → 0.3.1 - Mend

@pravoobi/llm-cache 0.1.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -60,6 +60,9 @@ npm install better-sqlite3
 # Postgres / pgvector
 npm install pg
+# In-process ANN index (hnswMemoryStore — for >10k entries without a database)
+npm install hnswlib-node
 ```
 ---
@@ -134,7 +137,50 @@ const result = await cache.wrap(
 | `matchedPrompt` | `string?` | The original prompt that was matched (semantic hits only) |
 | `namespace` | `string?` | The namespace used for this call |
-> **Streaming is not supported.** If `fn()` returns a `ReadableStream` or async iterable, `wrap()` will throw. Collect the full response before passing it to `wrap()`, or use `bypass: true`.
+> **Streaming:** Use `wrapStream()` for streaming LLM calls — see below. Passing a stream directly to `wrap()` will throw.
+---
+### `cache.wrapStream(prompt, fn, options?)`
+For streaming LLM responses. Yields chunks to the caller in real-time while assembling the full response for the cache in the background. On a cache hit, replays the cached response as a synthetic stream so the caller always gets an `AsyncIterable<T>` regardless of hit or miss.
+Returns `{ stream: AsyncIterable<T>, result: Promise<StreamCacheResult> }`.
+```ts
+const { stream, result } = cache.wrapStream(
+  prompt,
+  () => openai.chat.completions.create({ stream: true, ... }),
+  {
+    // Collapse provider-specific chunk shape into the cached value
+    assemble: (chunks) =>
+      chunks.map(c => c.choices[0]?.delta.content ?? '').join(''),
+    // Replay the cached string as a single chunk on a hit
+    reconstruct: async function* (text) {
+      yield { choices: [{ delta: { content: text } }] }
+    },
+    // All CacheOptions (threshold, ttl, namespace, context, bypass) work here too
+  }
+)
+for await (const chunk of stream) {
+  process.stdout.write(chunk.choices[0]?.delta.content ?? '')
+}
+const { hit, layer, similarity } = await result  // resolves after stream ends
+```
+**`StreamCacheResult`**:
+| Field | Type | Description |
+|---|---|---|
+| `hit` | `boolean` | Whether it was served from cache |
+| `layer` | `"exact" \| "semantic" \| "miss"` | Which cache layer matched |
+| `similarity` | `number?` | Cosine similarity score (semantic hits only) |
+| `matchedPrompt` | `string?` | The original prompt matched (semantic hits only) |
+| `namespace` | `string?` | The namespace used for this call |
+If `assemble` / `reconstruct` are omitted, string chunks are joined by default and the assembled string is replayed as a single chunk on a hit.
 ---
@@ -250,7 +296,23 @@ createCache({ embedder: ..., store: memoryStore() })
 // or just omit `store` — memory is the default
 ```
-Not persistent across restarts. Suitable for single-process, development, or short-lived workloads.
+Not persistent across restarts. Suitable for single-process, development, or short-lived workloads. Uses O(n) linear scan for similarity search — switch to `hnswMemoryStore` when entry count exceeds ~10k.
+### In-memory with ANN index (hnswMemoryStore)
+Drop-in replacement for `memoryStore()` that uses an [HNSW](https://github.com/nmslib/hnswlib) index for O(log n) similarity search. No database required.
+```ts
+// Requires: npm install hnswlib-node
+import { createCache, hnswMemoryStore } from '@pravoobi/llm-cache'
+createCache({ embedder: ..., store: hnswMemoryStore() })
+```
+- Index is created lazily on first `set()` — dimension detected automatically
+- One index per namespace, so namespace isolation has no search overhead
+- Automatically resizes when capacity is exceeded
+- Not persistent across restarts
 ### Redis
@@ -281,11 +343,21 @@ import { Pool } from 'pg'
 import { createCache, pgvectorStore } from '@pravoobi/llm-cache'
 const pool = new Pool({ connectionString: process.env.DATABASE_URL })
+// Default dimension (1536) — OpenAI text-embedding-3-small/large, ada-002
 createCache({ embedder: ..., store: pgvectorStore(pool) })
+// Cohere embed-english-v3.0
+createCache({ embedder: ..., store: pgvectorStore(pool, { dimensions: 1024 }) })
+// Local model (Xenova/all-MiniLM-L6-v2)
+createCache({ embedder: ..., store: pgvectorStore(pool, { dimensions: 384 }) })
 ```
 Requires the [`pgvector`](https://github.com/pgvector/pgvector) Postgres extension. Best for multi-process, high-traffic production use. Uses native ANN similarity search via `ivfflat`.
+> **Changing dimensions on an existing table:** `CREATE TABLE IF NOT EXISTS` will not alter an existing column type. If you switch embedding models, run a migration (`ALTER TABLE llm_cache ALTER COLUMN embedding TYPE vector(1024)`) and rebuild the index before updating `dimensions`.
 ---
 ## Namespace and context scoping
@@ -329,7 +401,7 @@ Embedding costs (e.g., `text-embedding-3-small` at $0.02/million tokens) are neg
 - **Highly personalized responses** — If the correct answer genuinely depends on who is asking, use per-user namespaces carefully or disable caching.
 - **Creative or stochastic tasks** — Caching "Write me a poem about autumn" means every user gets the same poem.
 - **Short TTLs with fast-changing data** — If your data changes faster than your TTL, stale hits cause more harm than cost savings justify.
-- **Streaming responses** — Not supported in v0.1. Collect the full response first.
+- **Truly unique streaming responses** — `wrapStream()` assembles and caches the response after the stream ends. If every prompt is unique and never repeated, you pay assembly overhead with no cache benefit; consider `bypass: true` for those calls.
 ---

package/dist/index.d.mts CHANGED Viewed

@@ -10,6 +10,10 @@ interface StoreAdapter {
     delete(key: string): Promise<void>;
     listEmbeddings(namespace?: string): Promise<EmbeddingRecord[]>;
     close?(): Promise<void>;
+    searchSimilar?(query: number[], threshold: number, namespace?: string): Promise<{
+        record: EmbeddingRecord;
+        similarity: number;
+    } | null>;
 }
 interface EmbeddingRecord {
     key: string;
@@ -49,9 +53,24 @@ interface LLMCacheConfig {
     onMiss?: (prompt: string) => void;
     onError?: (err: Error) => void;
 }
+interface CacheStreamOptions<T> extends CacheOptions {
+    assemble?: (chunks: T[]) => unknown;
+    reconstruct?: (cached: unknown) => AsyncIterable<T>;
+}
+interface StreamCacheResult {
+    hit: boolean;
+    layer: 'exact' | 'semantic' | 'miss';
+    similarity?: number;
+    matchedPrompt?: string;
+    namespace?: string;
+}
 declare function createCache(config: LLMCacheConfig): {
     wrap: <T>(prompt: string, fn: () => Promise<T>, options?: CacheOptions) => Promise<CacheResult<T>>;
+    wrapStream: <T>(prompt: string, fn: () => AsyncIterable<T>, options?: CacheStreamOptions<T>) => {
+        stream: AsyncIterable<T>;
+        result: Promise<StreamCacheResult>;
+    };
     invalidate: (prompt: string, options?: Pick<CacheOptions, "namespace" | "context">) => Promise<void>;
     flush: (namespace?: string) => Promise<void>;
     stats: () => {
@@ -66,10 +85,30 @@ declare function createEmbedder(config: EmbedderConfig): EmbedFn;
 declare function memoryStore(): StoreAdapter;
+interface HnswIndex {
+    initIndex(maxElements: number, efConstruction?: number, m?: number): void;
+    addPoint(point: number[], label: number): void;
+    markDelete(label: number): void;
+    searchKnn(query: number[], k: number): {
+        neighbors: number[];
+        distances: number[];
+    };
+    getCurrentCount(): number;
+    getMaxElements(): number;
+    resizeIndex(newSize: number): void;
+}
+interface HnswLib {
+    HierarchicalNSW: new (space: string, dim: number) => HnswIndex;
+}
+declare function hnswMemoryStore(injectedLib?: HnswLib): StoreAdapter;
 declare function redisStore(client: unknown): StoreAdapter;
 declare function sqliteStore(db: unknown): StoreAdapter;
-declare function pgvectorStore(pool: unknown): StoreAdapter;
+interface PgVectorStoreOptions {
+    dimensions?: number;
+}
+declare function pgvectorStore(pool: unknown, options?: PgVectorStoreOptions): StoreAdapter;
-export { type CacheEntry, type CacheOptions, type CacheResult, type EmbedFn, type EmbedderConfig, type EmbeddingRecord, type LLMCacheConfig, type StoreAdapter, createCache, createEmbedder, memoryStore, pgvectorStore, redisStore, sqliteStore };
+export { type CacheEntry, type CacheOptions, type CacheResult, type CacheStreamOptions, type EmbedFn, type EmbedderConfig, type EmbeddingRecord, type LLMCacheConfig, type PgVectorStoreOptions, type StoreAdapter, type StreamCacheResult, createCache, createEmbedder, hnswMemoryStore, memoryStore, pgvectorStore, redisStore, sqliteStore };

package/dist/index.d.ts CHANGED Viewed

@@ -10,6 +10,10 @@ interface StoreAdapter {
     delete(key: string): Promise<void>;
     listEmbeddings(namespace?: string): Promise<EmbeddingRecord[]>;
     close?(): Promise<void>;
+    searchSimilar?(query: number[], threshold: number, namespace?: string): Promise<{
+        record: EmbeddingRecord;
+        similarity: number;
+    } | null>;
 }
 interface EmbeddingRecord {
     key: string;
@@ -49,9 +53,24 @@ interface LLMCacheConfig {
     onMiss?: (prompt: string) => void;
     onError?: (err: Error) => void;
 }
+interface CacheStreamOptions<T> extends CacheOptions {
+    assemble?: (chunks: T[]) => unknown;
+    reconstruct?: (cached: unknown) => AsyncIterable<T>;
+}
+interface StreamCacheResult {
+    hit: boolean;
+    layer: 'exact' | 'semantic' | 'miss';
+    similarity?: number;
+    matchedPrompt?: string;
+    namespace?: string;
+}
 declare function createCache(config: LLMCacheConfig): {
     wrap: <T>(prompt: string, fn: () => Promise<T>, options?: CacheOptions) => Promise<CacheResult<T>>;
+    wrapStream: <T>(prompt: string, fn: () => AsyncIterable<T>, options?: CacheStreamOptions<T>) => {
+        stream: AsyncIterable<T>;
+        result: Promise<StreamCacheResult>;
+    };
     invalidate: (prompt: string, options?: Pick<CacheOptions, "namespace" | "context">) => Promise<void>;
     flush: (namespace?: string) => Promise<void>;
     stats: () => {
@@ -66,10 +85,30 @@ declare function createEmbedder(config: EmbedderConfig): EmbedFn;
 declare function memoryStore(): StoreAdapter;
+interface HnswIndex {
+    initIndex(maxElements: number, efConstruction?: number, m?: number): void;
+    addPoint(point: number[], label: number): void;
+    markDelete(label: number): void;
+    searchKnn(query: number[], k: number): {
+        neighbors: number[];
+        distances: number[];
+    };
+    getCurrentCount(): number;
+    getMaxElements(): number;
+    resizeIndex(newSize: number): void;
+}
+interface HnswLib {
+    HierarchicalNSW: new (space: string, dim: number) => HnswIndex;
+}
+declare function hnswMemoryStore(injectedLib?: HnswLib): StoreAdapter;
 declare function redisStore(client: unknown): StoreAdapter;
 declare function sqliteStore(db: unknown): StoreAdapter;
-declare function pgvectorStore(pool: unknown): StoreAdapter;
+interface PgVectorStoreOptions {
+    dimensions?: number;
+}
+declare function pgvectorStore(pool: unknown, options?: PgVectorStoreOptions): StoreAdapter;
-export { type CacheEntry, type CacheOptions, type CacheResult, type EmbedFn, type EmbedderConfig, type EmbeddingRecord, type LLMCacheConfig, type StoreAdapter, createCache, createEmbedder, memoryStore, pgvectorStore, redisStore, sqliteStore };
+export { type CacheEntry, type CacheOptions, type CacheResult, type CacheStreamOptions, type EmbedFn, type EmbedderConfig, type EmbeddingRecord, type LLMCacheConfig, type PgVectorStoreOptions, type StoreAdapter, type StreamCacheResult, createCache, createEmbedder, hnswMemoryStore, memoryStore, pgvectorStore, redisStore, sqliteStore };

package/dist/index.js CHANGED Viewed

@@ -22,6 +22,7 @@ var index_exports = {};
 __export(index_exports, {
   createCache: () => createCache,
   createEmbedder: () => createEmbedder,
+  hnswMemoryStore: () => hnswMemoryStore,
   memoryStore: () => memoryStore,
   pgvectorStore: () => pgvectorStore,
   redisStore: () => redisStore,
@@ -174,7 +175,7 @@ function isExpired(entry) {
   return Date.now() > entry.expiresAt;
 }
 function computeExpiresAt(ttlSeconds) {
-  if (ttlSeconds === 0) return void 0;
+  if (ttlSeconds <= 0) return void 0;
   return Date.now() + ttlSeconds * 1e3;
 }
@@ -250,7 +251,7 @@ function cosineSimilarity(a, b) {
 function findBestMatch(query, records, threshold) {
   if (records.length > 1e4) {
     console.warn(
-      `[llm-cache] Scanning ${records.length} embeddings in memory. Consider switching to pgvector or a dedicated vector store for better performance.`
+      `[llm-cache] Scanning ${records.length} embeddings with O(n) linear search. Use hnswMemoryStore() for fast in-process ANN, or pgvector for multi-process deployments.`
     );
   }
   let bestSimilarity = -Infinity;
@@ -301,7 +302,7 @@ function createCache(config) {
     if (namespace !== void 0) lifetime.seenNamespaces.add(namespace);
     const normalized = normalizePrompt(prompt);
     const key = hashPrompt(namespace, context, normalized);
-    const embeddingNamespace = context !== void 0 ? `${namespace ?? ""}__ctx__${context}` : namespace;
+    const embeddingNamespace = context !== void 0 ? JSON.stringify([namespace ?? "", context]) : namespace;
     try {
       const cached = await store.get(key);
       if (cached !== null) {
@@ -323,11 +324,9 @@ function createCache(config) {
       return { value: value2, hit: false, layer: "miss" };
     }
     let embedding;
-    let records;
     try {
       const raw = await embed(normalized);
       embedding = Array.from(raw);
-      records = await store.listEmbeddings(embeddingNamespace);
     } catch (err) {
       config.onError?.(err instanceof Error ? err : new Error(String(err)));
       lifetime.misses++;
@@ -336,7 +335,7 @@ function createCache(config) {
       return { value: value2, hit: false, layer: "miss" };
     }
     try {
-      const match = findBestMatch(embedding, records, threshold);
+      const match = typeof store.searchSimilar === "function" ? await store.searchSimilar(embedding, threshold, embeddingNamespace) : findBestMatch(embedding, await store.listEmbeddings(embeddingNamespace), threshold);
       if (match !== null) {
         const matchedEntry = await store.get(match.record.key);
         if (matchedEntry !== null) {
@@ -360,9 +359,9 @@ function createCache(config) {
     lifetime.misses++;
     config.onMiss?.(prompt);
     const value = await fn();
-    if (value instanceof ReadableStream || typeof value === "object" && value !== null && (Symbol.asyncIterator in value || Symbol.iterator in value) && typeof value.text !== "string") {
+    if (value instanceof ReadableStream || typeof value === "object" && value !== null && Symbol.asyncIterator in value) {
       throw new Error(
-        "[llm-cache] Streaming responses cannot be cached. Collect the full response before passing fn() to wrap(), or use bypass: true to skip the cache for streaming calls."
+        "[llm-cache] Streaming responses cannot be cached via wrap(). Use wrapStream() for streaming LLM calls, or collect the full response before passing fn() to wrap()."
       );
     }
     const now = Date.now();
@@ -389,6 +388,120 @@ function createCache(config) {
       ...namespace !== void 0 ? { namespace } : {}
     };
   }
+  function defaultAssemble(chunks) {
+    if (chunks.length > 0 && chunks.every((c) => typeof c === "string")) {
+      return chunks.join("");
+    }
+    return chunks;
+  }
+  async function* defaultReconstruct(cached) {
+    yield cached;
+  }
+  function wrapStream(prompt, fn, options) {
+    const assemble = options?.assemble ?? defaultAssemble;
+    const reconstruct = options?.reconstruct ?? defaultReconstruct;
+    let resolveResult;
+    const result = new Promise((res) => {
+      resolveResult = res;
+    });
+    async function* generate() {
+      if (options?.bypass === true) {
+        yield* fn();
+        resolveResult({ hit: false, layer: "miss" });
+        return;
+      }
+      const namespace = options?.namespace;
+      const context = options?.context;
+      const threshold = options?.threshold ?? globalThreshold;
+      const ttl = options?.ttl ?? globalTtl;
+      if (namespace !== void 0) lifetime.seenNamespaces.add(namespace);
+      const normalized = normalizePrompt(prompt);
+      const key = hashPrompt(namespace, context, normalized);
+      const embeddingNamespace = context !== void 0 ? `${namespace ?? ""}__ctx__${context}` : namespace;
+      try {
+        const cached = await store.get(key);
+        if (cached !== null) {
+          lifetime.hits++;
+          const streamResult = {
+            hit: true,
+            layer: "exact",
+            ...namespace !== void 0 ? { namespace } : {}
+          };
+          config.onHit?.({ ...streamResult, value: cached.response });
+          resolveResult(streamResult);
+          yield* reconstruct(cached.response);
+          return;
+        }
+      } catch (err) {
+        config.onError?.(err instanceof Error ? err : new Error(String(err)));
+        lifetime.misses++;
+        config.onMiss?.(prompt);
+        yield* fn();
+        resolveResult({ hit: false, layer: "miss" });
+        return;
+      }
+      let embedding;
+      try {
+        const raw = await embed(normalized);
+        embedding = Array.from(raw);
+      } catch (err) {
+        config.onError?.(err instanceof Error ? err : new Error(String(err)));
+        lifetime.misses++;
+        config.onMiss?.(prompt);
+        yield* fn();
+        resolveResult({ hit: false, layer: "miss" });
+        return;
+      }
+      try {
+        const match = typeof store.searchSimilar === "function" ? await store.searchSimilar(embedding, threshold, embeddingNamespace) : findBestMatch(embedding, await store.listEmbeddings(embeddingNamespace), threshold);
+        if (match !== null) {
+          const matchedEntry = await store.get(match.record.key);
+          if (matchedEntry !== null) {
+            lifetime.hits++;
+            lifetime.similarities.push(match.similarity);
+            const streamResult = {
+              hit: true,
+              layer: "semantic",
+              similarity: match.similarity,
+              matchedPrompt: matchedEntry.prompt,
+              ...namespace !== void 0 ? { namespace } : {}
+            };
+            config.onHit?.({ ...streamResult, value: matchedEntry.response });
+            resolveResult(streamResult);
+            yield* reconstruct(matchedEntry.response);
+            return;
+          }
+        }
+      } catch (err) {
+        config.onError?.(err instanceof Error ? err : new Error(String(err)));
+      }
+      lifetime.misses++;
+      config.onMiss?.(prompt);
+      const chunks = [];
+      for await (const chunk of fn()) {
+        chunks.push(chunk);
+        yield chunk;
+      }
+      const assembled = assemble(chunks);
+      const now = Date.now();
+      const expiresAt = ttl !== void 0 ? computeExpiresAt(ttl) : void 0;
+      const entry = {
+        prompt: normalized,
+        response: assembled,
+        embedding,
+        createdAt: now,
+        ...embeddingNamespace !== void 0 ? { namespace: embeddingNamespace } : {},
+        ...expiresAt !== void 0 ? { expiresAt } : {}
+      };
+      try {
+        await store.set(key, entry, ttl);
+      } catch (err) {
+        config.onError?.(err instanceof Error ? err : new Error(String(err)));
+      }
+      resolveResult({ hit: false, layer: "miss", ...namespace !== void 0 ? { namespace } : {} });
+    }
+    return { stream: generate(), result };
+  }
   async function invalidate(prompt, options) {
     const normalized = normalizePrompt(prompt);
     const key = hashPrompt(options?.namespace, options?.context, normalized);
@@ -414,7 +527,128 @@ function createCache(config) {
       avgSimilarity
     };
   }
-  return { wrap, invalidate, flush, stats: getStats };
+  return { wrap, wrapStream, invalidate, flush, stats: getStats };
+}
+// src/stores/hnsw-memory.ts
+var INITIAL_CAPACITY = 1024;
+async function loadHnswLib() {
+  try {
+    return await new Function("m", "return import(m)")("hnswlib-node");
+  } catch {
+    throw new Error(
+      "[llm-cache] hnswMemoryStore requires hnswlib-node: npm install hnswlib-node"
+    );
+  }
+}
+function hnswMemoryStore(injectedLib) {
+  const entries = /* @__PURE__ */ new Map();
+  const embeddingRecords = /* @__PURE__ */ new Map();
+  const nsIndices = /* @__PURE__ */ new Map();
+  let dimension = null;
+  let libPromise = injectedLib ? Promise.resolve(injectedLib) : null;
+  function getLib() {
+    if (!libPromise) libPromise = loadHnswLib();
+    return libPromise;
+  }
+  function getOrCreateNsIndex(lib, ns, dim) {
+    let nsIdx = nsIndices.get(ns);
+    if (nsIdx === void 0) {
+      const index = new lib.HierarchicalNSW("cosine", dim);
+      index.initIndex(INITIAL_CAPACITY);
+      nsIdx = { index, keyToLabel: /* @__PURE__ */ new Map(), labelToKey: /* @__PURE__ */ new Map(), nextLabel: 0, maxElements: INITIAL_CAPACITY };
+      nsIndices.set(ns, nsIdx);
+    }
+    return nsIdx;
+  }
+  function nsKey(namespace) {
+    return namespace ?? "__default__";
+  }
+  const self = {
+    async get(key) {
+      const entry = entries.get(key);
+      if (!entry) return null;
+      if (isExpired(entry)) {
+        await self.delete(key);
+        return null;
+      }
+      return entry;
+    },
+    async set(key, entry, _ttlSeconds) {
+      const lib = await getLib();
+      if (dimension === null) dimension = entry.embedding.length;
+      const ns = nsKey(entry.namespace);
+      const nsIdx = getOrCreateNsIndex(lib, ns, dimension);
+      const existingLabel = nsIdx.keyToLabel.get(key);
+      if (existingLabel !== void 0) {
+        try {
+          nsIdx.index.markDelete(existingLabel);
+        } catch {
+        }
+        nsIdx.labelToKey.delete(existingLabel);
+      }
+      if (nsIdx.nextLabel >= nsIdx.maxElements) {
+        nsIdx.maxElements *= 2;
+        nsIdx.index.resizeIndex(nsIdx.maxElements);
+      }
+      const label = nsIdx.nextLabel++;
+      nsIdx.index.addPoint(entry.embedding, label);
+      nsIdx.keyToLabel.set(key, label);
+      nsIdx.labelToKey.set(label, key);
+      entries.set(key, entry);
+      embeddingRecords.set(key, {
+        key,
+        embedding: entry.embedding,
+        createdAt: entry.createdAt,
+        ...entry.namespace !== void 0 ? { namespace: entry.namespace } : {}
+      });
+    },
+    async delete(key) {
+      const entry = entries.get(key);
+      if (entry) {
+        const nsIdx = nsIndices.get(nsKey(entry.namespace));
+        if (nsIdx) {
+          const label = nsIdx.keyToLabel.get(key);
+          if (label !== void 0) {
+            try {
+              nsIdx.index.markDelete(label);
+            } catch {
+            }
+            nsIdx.keyToLabel.delete(key);
+            nsIdx.labelToKey.delete(label);
+          }
+        }
+      }
+      entries.delete(key);
+      embeddingRecords.delete(key);
+    },
+    async listEmbeddings(namespace) {
+      const all = Array.from(embeddingRecords.values());
+      return namespace === void 0 ? all : all.filter((r) => r.namespace === namespace);
+    },
+    async searchSimilar(query, threshold, namespace) {
+      const nsIdx = nsIndices.get(nsKey(namespace));
+      if (!nsIdx || nsIdx.index.getCurrentCount() === 0) return null;
+      const { neighbors, distances } = nsIdx.index.searchKnn(query, 1);
+      const label = neighbors[0];
+      const distance = distances[0];
+      if (label === void 0 || distance === void 0) return null;
+      const similarity = 1 - distance;
+      if (similarity < threshold) return null;
+      const key = nsIdx.labelToKey.get(label);
+      if (!key) return null;
+      const entry = entries.get(key);
+      if (!entry) return null;
+      if (isExpired(entry)) {
+        await self.delete(key);
+        return null;
+      }
+      const record = embeddingRecords.get(key);
+      if (!record) return null;
+      return { record, similarity };
+    }
+  };
+  return self;
 }
 // src/utils/validate.ts
@@ -424,6 +658,12 @@ function assertCacheEntry(val, source) {
   }
   return val;
 }
+function assertEmbeddingRecord(val, source) {
+  if (typeof val !== "object" || val === null || typeof val["key"] !== "string" || !Array.isArray(val["embedding"]) || typeof val["createdAt"] !== "number") {
+    throw new Error(`[llm-cache] Invalid embedding record shape from ${source}`);
+  }
+  return val;
+}
 // src/stores/redis.ts
 var ENTRY_PREFIX = "llm-cache:entry:";
@@ -471,7 +711,7 @@ function redisStore(client) {
     async listEmbeddings(namespace) {
       const hash = await redis.hgetall(nsHashKey(namespace));
       if (!hash) return [];
-      return Object.values(hash).map((v) => JSON.parse(v));
+      return Object.values(hash).map((v) => assertEmbeddingRecord(JSON.parse(v), "redis"));
     },
     async close() {
       await redis.quit();
@@ -545,26 +785,32 @@ function sqliteStore(db) {
     },
     async listEmbeddings(namespace) {
       const rows = namespace !== void 0 ? stmtListByNs.all(namespace) : stmtListAll.all();
-      return rows.map((row) => ({
-        key: row.key,
-        embedding: JSON.parse(row.embedding),
-        createdAt: row.created_at,
-        ...row.namespace !== null ? { namespace: row.namespace } : {}
-      }));
+      return rows.map((row) => {
+        const parsed = assertEmbeddingRecord(
+          {
+            key: row.key,
+            embedding: JSON.parse(row.embedding),
+            createdAt: row.created_at,
+            ...row.namespace !== null ? { namespace: row.namespace } : {}
+          },
+          "sqlite"
+        );
+        return parsed;
+      });
     }
   };
 }
 // src/stores/pgvector.ts
-var VECTOR_DIM = 1536;
-async function initSchema2(pool) {
+var DEFAULT_DIMENSIONS = 1536;
+async function initSchema2(pool, dimensions) {
   await pool.query("CREATE EXTENSION IF NOT EXISTS vector");
   await pool.query(`
     CREATE TABLE IF NOT EXISTS llm_cache (
       key TEXT PRIMARY KEY,
       prompt TEXT NOT NULL,
       response JSONB,
-      embedding vector(${VECTOR_DIM}),
+      embedding vector(${dimensions}),
       namespace TEXT,
       created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
       expires_at TIMESTAMPTZ
@@ -580,9 +826,16 @@ async function initSchema2(pool) {
 function parseEmbedding(raw) {
   return raw.replace(/^\[/, "").replace(/\]$/, "").split(",").map(Number);
 }
-function pgvectorStore(pool) {
+function pgvectorStore(pool, options) {
   const pg = pool;
-  const ready = initSchema2(pg);
+  const rawDimensions = options?.dimensions ?? DEFAULT_DIMENSIONS;
+  if (!Number.isInteger(rawDimensions) || rawDimensions < 1 || rawDimensions > 65535) {
+    throw new RangeError(
+      `[llm-cache] pgvectorStore: dimensions must be a positive integer \u2264 65535, got ${rawDimensions}`
+    );
+  }
+  const dimensions = rawDimensions;
+  const ready = initSchema2(pg, dimensions);
   return {
     async get(key) {
       await ready;
@@ -656,6 +909,7 @@ function pgvectorStore(pool) {
 0 && (module.exports = {
   createCache,
   createEmbedder,
+  hnswMemoryStore,
   memoryStore,
   pgvectorStore,
   redisStore,

package/dist/index.mjs CHANGED Viewed

@@ -143,7 +143,7 @@ function isExpired(entry) {
   return Date.now() > entry.expiresAt;
 }
 function computeExpiresAt(ttlSeconds) {
-  if (ttlSeconds === 0) return void 0;
+  if (ttlSeconds <= 0) return void 0;
   return Date.now() + ttlSeconds * 1e3;
 }
@@ -219,7 +219,7 @@ function cosineSimilarity(a, b) {
 function findBestMatch(query, records, threshold) {
   if (records.length > 1e4) {
     console.warn(
-      `[llm-cache] Scanning ${records.length} embeddings in memory. Consider switching to pgvector or a dedicated vector store for better performance.`
+      `[llm-cache] Scanning ${records.length} embeddings with O(n) linear search. Use hnswMemoryStore() for fast in-process ANN, or pgvector for multi-process deployments.`
     );
   }
   let bestSimilarity = -Infinity;
@@ -270,7 +270,7 @@ function createCache(config) {
     if (namespace !== void 0) lifetime.seenNamespaces.add(namespace);
     const normalized = normalizePrompt(prompt);
     const key = hashPrompt(namespace, context, normalized);
-    const embeddingNamespace = context !== void 0 ? `${namespace ?? ""}__ctx__${context}` : namespace;
+    const embeddingNamespace = context !== void 0 ? JSON.stringify([namespace ?? "", context]) : namespace;
     try {
       const cached = await store.get(key);
       if (cached !== null) {
@@ -292,11 +292,9 @@ function createCache(config) {
       return { value: value2, hit: false, layer: "miss" };
     }
     let embedding;
-    let records;
     try {
       const raw = await embed(normalized);
       embedding = Array.from(raw);
-      records = await store.listEmbeddings(embeddingNamespace);
     } catch (err) {
       config.onError?.(err instanceof Error ? err : new Error(String(err)));
       lifetime.misses++;
@@ -305,7 +303,7 @@ function createCache(config) {
       return { value: value2, hit: false, layer: "miss" };
     }
     try {
-      const match = findBestMatch(embedding, records, threshold);
+      const match = typeof store.searchSimilar === "function" ? await store.searchSimilar(embedding, threshold, embeddingNamespace) : findBestMatch(embedding, await store.listEmbeddings(embeddingNamespace), threshold);
       if (match !== null) {
         const matchedEntry = await store.get(match.record.key);
         if (matchedEntry !== null) {
@@ -329,9 +327,9 @@ function createCache(config) {
     lifetime.misses++;
     config.onMiss?.(prompt);
     const value = await fn();
-    if (value instanceof ReadableStream || typeof value === "object" && value !== null && (Symbol.asyncIterator in value || Symbol.iterator in value) && typeof value.text !== "string") {
+    if (value instanceof ReadableStream || typeof value === "object" && value !== null && Symbol.asyncIterator in value) {
       throw new Error(
-        "[llm-cache] Streaming responses cannot be cached. Collect the full response before passing fn() to wrap(), or use bypass: true to skip the cache for streaming calls."
+        "[llm-cache] Streaming responses cannot be cached via wrap(). Use wrapStream() for streaming LLM calls, or collect the full response before passing fn() to wrap()."
       );
     }
     const now = Date.now();
@@ -358,6 +356,120 @@ function createCache(config) {
       ...namespace !== void 0 ? { namespace } : {}
     };
   }
+  function defaultAssemble(chunks) {
+    if (chunks.length > 0 && chunks.every((c) => typeof c === "string")) {
+      return chunks.join("");
+    }
+    return chunks;
+  }
+  async function* defaultReconstruct(cached) {
+    yield cached;
+  }
+  function wrapStream(prompt, fn, options) {
+    const assemble = options?.assemble ?? defaultAssemble;
+    const reconstruct = options?.reconstruct ?? defaultReconstruct;
+    let resolveResult;
+    const result = new Promise((res) => {
+      resolveResult = res;
+    });
+    async function* generate() {
+      if (options?.bypass === true) {
+        yield* fn();
+        resolveResult({ hit: false, layer: "miss" });
+        return;
+      }
+      const namespace = options?.namespace;
+      const context = options?.context;
+      const threshold = options?.threshold ?? globalThreshold;
+      const ttl = options?.ttl ?? globalTtl;
+      if (namespace !== void 0) lifetime.seenNamespaces.add(namespace);
+      const normalized = normalizePrompt(prompt);
+      const key = hashPrompt(namespace, context, normalized);
+      const embeddingNamespace = context !== void 0 ? `${namespace ?? ""}__ctx__${context}` : namespace;
+      try {
+        const cached = await store.get(key);
+        if (cached !== null) {
+          lifetime.hits++;
+          const streamResult = {
+            hit: true,
+            layer: "exact",
+            ...namespace !== void 0 ? { namespace } : {}
+          };
+          config.onHit?.({ ...streamResult, value: cached.response });
+          resolveResult(streamResult);
+          yield* reconstruct(cached.response);
+          return;
+        }
+      } catch (err) {
+        config.onError?.(err instanceof Error ? err : new Error(String(err)));
+        lifetime.misses++;
+        config.onMiss?.(prompt);
+        yield* fn();
+        resolveResult({ hit: false, layer: "miss" });
+        return;
+      }
+      let embedding;
+      try {
+        const raw = await embed(normalized);
+        embedding = Array.from(raw);
+      } catch (err) {
+        config.onError?.(err instanceof Error ? err : new Error(String(err)));
+        lifetime.misses++;
+        config.onMiss?.(prompt);
+        yield* fn();
+        resolveResult({ hit: false, layer: "miss" });
+        return;
+      }
+      try {
+        const match = typeof store.searchSimilar === "function" ? await store.searchSimilar(embedding, threshold, embeddingNamespace) : findBestMatch(embedding, await store.listEmbeddings(embeddingNamespace), threshold);
+        if (match !== null) {
+          const matchedEntry = await store.get(match.record.key);
+          if (matchedEntry !== null) {
+            lifetime.hits++;
+            lifetime.similarities.push(match.similarity);
+            const streamResult = {
+              hit: true,
+              layer: "semantic",
+              similarity: match.similarity,
+              matchedPrompt: matchedEntry.prompt,
+              ...namespace !== void 0 ? { namespace } : {}
+            };
+            config.onHit?.({ ...streamResult, value: matchedEntry.response });
+            resolveResult(streamResult);
+            yield* reconstruct(matchedEntry.response);
+            return;
+          }
+        }
+      } catch (err) {
+        config.onError?.(err instanceof Error ? err : new Error(String(err)));
+      }
+      lifetime.misses++;
+      config.onMiss?.(prompt);
+      const chunks = [];
+      for await (const chunk of fn()) {
+        chunks.push(chunk);
+        yield chunk;
+      }
+      const assembled = assemble(chunks);
+      const now = Date.now();
+      const expiresAt = ttl !== void 0 ? computeExpiresAt(ttl) : void 0;
+      const entry = {
+        prompt: normalized,
+        response: assembled,
+        embedding,
+        createdAt: now,
+        ...embeddingNamespace !== void 0 ? { namespace: embeddingNamespace } : {},
+        ...expiresAt !== void 0 ? { expiresAt } : {}
+      };
+      try {
+        await store.set(key, entry, ttl);
+      } catch (err) {
+        config.onError?.(err instanceof Error ? err : new Error(String(err)));
+      }
+      resolveResult({ hit: false, layer: "miss", ...namespace !== void 0 ? { namespace } : {} });
+    }
+    return { stream: generate(), result };
+  }
   async function invalidate(prompt, options) {
     const normalized = normalizePrompt(prompt);
     const key = hashPrompt(options?.namespace, options?.context, normalized);
@@ -383,7 +495,128 @@ function createCache(config) {
       avgSimilarity
     };
   }
-  return { wrap, invalidate, flush, stats: getStats };
+  return { wrap, wrapStream, invalidate, flush, stats: getStats };
+}
+// src/stores/hnsw-memory.ts
+var INITIAL_CAPACITY = 1024;
+async function loadHnswLib() {
+  try {
+    return await new Function("m", "return import(m)")("hnswlib-node");
+  } catch {
+    throw new Error(
+      "[llm-cache] hnswMemoryStore requires hnswlib-node: npm install hnswlib-node"
+    );
+  }
+}
+function hnswMemoryStore(injectedLib) {
+  const entries = /* @__PURE__ */ new Map();
+  const embeddingRecords = /* @__PURE__ */ new Map();
+  const nsIndices = /* @__PURE__ */ new Map();
+  let dimension = null;
+  let libPromise = injectedLib ? Promise.resolve(injectedLib) : null;
+  function getLib() {
+    if (!libPromise) libPromise = loadHnswLib();
+    return libPromise;
+  }
+  function getOrCreateNsIndex(lib, ns, dim) {
+    let nsIdx = nsIndices.get(ns);
+    if (nsIdx === void 0) {
+      const index = new lib.HierarchicalNSW("cosine", dim);
+      index.initIndex(INITIAL_CAPACITY);
+      nsIdx = { index, keyToLabel: /* @__PURE__ */ new Map(), labelToKey: /* @__PURE__ */ new Map(), nextLabel: 0, maxElements: INITIAL_CAPACITY };
+      nsIndices.set(ns, nsIdx);
+    }
+    return nsIdx;
+  }
+  function nsKey(namespace) {
+    return namespace ?? "__default__";
+  }
+  const self = {
+    async get(key) {
+      const entry = entries.get(key);
+      if (!entry) return null;
+      if (isExpired(entry)) {
+        await self.delete(key);
+        return null;
+      }
+      return entry;
+    },
+    async set(key, entry, _ttlSeconds) {
+      const lib = await getLib();
+      if (dimension === null) dimension = entry.embedding.length;
+      const ns = nsKey(entry.namespace);
+      const nsIdx = getOrCreateNsIndex(lib, ns, dimension);
+      const existingLabel = nsIdx.keyToLabel.get(key);
+      if (existingLabel !== void 0) {
+        try {
+          nsIdx.index.markDelete(existingLabel);
+        } catch {
+        }
+        nsIdx.labelToKey.delete(existingLabel);
+      }
+      if (nsIdx.nextLabel >= nsIdx.maxElements) {
+        nsIdx.maxElements *= 2;
+        nsIdx.index.resizeIndex(nsIdx.maxElements);
+      }
+      const label = nsIdx.nextLabel++;
+      nsIdx.index.addPoint(entry.embedding, label);
+      nsIdx.keyToLabel.set(key, label);
+      nsIdx.labelToKey.set(label, key);
+      entries.set(key, entry);
+      embeddingRecords.set(key, {
+        key,
+        embedding: entry.embedding,
+        createdAt: entry.createdAt,
+        ...entry.namespace !== void 0 ? { namespace: entry.namespace } : {}
+      });
+    },
+    async delete(key) {
+      const entry = entries.get(key);
+      if (entry) {
+        const nsIdx = nsIndices.get(nsKey(entry.namespace));
+        if (nsIdx) {
+          const label = nsIdx.keyToLabel.get(key);
+          if (label !== void 0) {
+            try {
+              nsIdx.index.markDelete(label);
+            } catch {
+            }
+            nsIdx.keyToLabel.delete(key);
+            nsIdx.labelToKey.delete(label);
+          }
+        }
+      }
+      entries.delete(key);
+      embeddingRecords.delete(key);
+    },
+    async listEmbeddings(namespace) {
+      const all = Array.from(embeddingRecords.values());
+      return namespace === void 0 ? all : all.filter((r) => r.namespace === namespace);
+    },
+    async searchSimilar(query, threshold, namespace) {
+      const nsIdx = nsIndices.get(nsKey(namespace));
+      if (!nsIdx || nsIdx.index.getCurrentCount() === 0) return null;
+      const { neighbors, distances } = nsIdx.index.searchKnn(query, 1);
+      const label = neighbors[0];
+      const distance = distances[0];
+      if (label === void 0 || distance === void 0) return null;
+      const similarity = 1 - distance;
+      if (similarity < threshold) return null;
+      const key = nsIdx.labelToKey.get(label);
+      if (!key) return null;
+      const entry = entries.get(key);
+      if (!entry) return null;
+      if (isExpired(entry)) {
+        await self.delete(key);
+        return null;
+      }
+      const record = embeddingRecords.get(key);
+      if (!record) return null;
+      return { record, similarity };
+    }
+  };
+  return self;
 }
 // src/utils/validate.ts
@@ -393,6 +626,12 @@ function assertCacheEntry(val, source) {
   }
   return val;
 }
+function assertEmbeddingRecord(val, source) {
+  if (typeof val !== "object" || val === null || typeof val["key"] !== "string" || !Array.isArray(val["embedding"]) || typeof val["createdAt"] !== "number") {
+    throw new Error(`[llm-cache] Invalid embedding record shape from ${source}`);
+  }
+  return val;
+}
 // src/stores/redis.ts
 var ENTRY_PREFIX = "llm-cache:entry:";
@@ -440,7 +679,7 @@ function redisStore(client) {
     async listEmbeddings(namespace) {
       const hash = await redis.hgetall(nsHashKey(namespace));
       if (!hash) return [];
-      return Object.values(hash).map((v) => JSON.parse(v));
+      return Object.values(hash).map((v) => assertEmbeddingRecord(JSON.parse(v), "redis"));
     },
     async close() {
       await redis.quit();
@@ -514,26 +753,32 @@ function sqliteStore(db) {
     },
     async listEmbeddings(namespace) {
       const rows = namespace !== void 0 ? stmtListByNs.all(namespace) : stmtListAll.all();
-      return rows.map((row) => ({
-        key: row.key,
-        embedding: JSON.parse(row.embedding),
-        createdAt: row.created_at,
-        ...row.namespace !== null ? { namespace: row.namespace } : {}
-      }));
+      return rows.map((row) => {
+        const parsed = assertEmbeddingRecord(
+          {
+            key: row.key,
+            embedding: JSON.parse(row.embedding),
+            createdAt: row.created_at,
+            ...row.namespace !== null ? { namespace: row.namespace } : {}
+          },
+          "sqlite"
+        );
+        return parsed;
+      });
     }
   };
 }
 // src/stores/pgvector.ts
-var VECTOR_DIM = 1536;
-async function initSchema2(pool) {
+var DEFAULT_DIMENSIONS = 1536;
+async function initSchema2(pool, dimensions) {
   await pool.query("CREATE EXTENSION IF NOT EXISTS vector");
   await pool.query(`
     CREATE TABLE IF NOT EXISTS llm_cache (
       key TEXT PRIMARY KEY,
       prompt TEXT NOT NULL,
       response JSONB,
-      embedding vector(${VECTOR_DIM}),
+      embedding vector(${dimensions}),
       namespace TEXT,
       created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
       expires_at TIMESTAMPTZ
@@ -549,9 +794,16 @@ async function initSchema2(pool) {
 function parseEmbedding(raw) {
   return raw.replace(/^\[/, "").replace(/\]$/, "").split(",").map(Number);
 }
-function pgvectorStore(pool) {
+function pgvectorStore(pool, options) {
   const pg = pool;
-  const ready = initSchema2(pg);
+  const rawDimensions = options?.dimensions ?? DEFAULT_DIMENSIONS;
+  if (!Number.isInteger(rawDimensions) || rawDimensions < 1 || rawDimensions > 65535) {
+    throw new RangeError(
+      `[llm-cache] pgvectorStore: dimensions must be a positive integer \u2264 65535, got ${rawDimensions}`
+    );
+  }
+  const dimensions = rawDimensions;
+  const ready = initSchema2(pg, dimensions);
   return {
     async get(key) {
       await ready;
@@ -624,6 +876,7 @@ function pgvectorStore(pool) {
 export {
   createCache,
   createEmbedder,
+  hnswMemoryStore,
   memoryStore,
   pgvectorStore,
   redisStore,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pravoobi/llm-cache",
-  "version": "0.1.0",
+  "version": "0.3.1",
   "description": "Semantic caching layer for LLM calls. Deduplicates near-identical prompts using embeddings.",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",
@@ -26,7 +26,8 @@
     "ioredis": ">=5.0.0",
     "better-sqlite3": ">=9.0.0",
     "pg": ">=8.0.0",
-    "@xenova/transformers": ">=2.0.0"
+    "@xenova/transformers": ">=2.0.0",
+    "hnswlib-node": ">=3.0.0"
   },
   "peerDependenciesMeta": {
     "openai": { "optional": true },
@@ -34,7 +35,8 @@
     "ioredis": { "optional": true },
     "better-sqlite3": { "optional": true },
     "pg": { "optional": true },
-    "@xenova/transformers": { "optional": true }
+    "@xenova/transformers": { "optional": true },
+    "hnswlib-node": { "optional": true }
   },
   "author": "Venkata Praveen Kumar Velisetty",
   "repository": {
@@ -58,5 +60,9 @@
   },
   "keywords": ["llm", "cache", "semantic", "embeddings", "openai", "anthropic", "ai"],
   "license": "MIT",
-  "engines": { "node": ">=18.0.0" }
+  "engines": { "node": ">=20.0.0" },
+  "publishConfig": {
+    "access": "public",
+    "provenance": true
+  }
 }