@pravoobi/llm-cache 0.1.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -60,6 +60,9 @@ npm install better-sqlite3
60
60
 
61
61
  # Postgres / pgvector
62
62
  npm install pg
63
+
64
+ # In-process ANN index (hnswMemoryStore — for >10k entries without a database)
65
+ npm install hnswlib-node
63
66
  ```
64
67
 
65
68
  ---
@@ -134,7 +137,50 @@ const result = await cache.wrap(
134
137
  | `matchedPrompt` | `string?` | The original prompt that was matched (semantic hits only) |
135
138
  | `namespace` | `string?` | The namespace used for this call |
136
139
 
137
- > **Streaming is not supported.** If `fn()` returns a `ReadableStream` or async iterable, `wrap()` will throw. Collect the full response before passing it to `wrap()`, or use `bypass: true`.
140
+ > **Streaming:** Use `wrapStream()` for streaming LLM calls see below. Passing a stream directly to `wrap()` will throw.
141
+
142
+ ---
143
+
144
+ ### `cache.wrapStream(prompt, fn, options?)`
145
+
146
+ For streaming LLM responses. Yields chunks to the caller in real-time while assembling the full response for the cache in the background. On a cache hit, replays the cached response as a synthetic stream so the caller always gets an `AsyncIterable<T>` regardless of hit or miss.
147
+
148
+ Returns `{ stream: AsyncIterable<T>, result: Promise<StreamCacheResult> }`.
149
+
150
+ ```ts
151
+ const { stream, result } = cache.wrapStream(
152
+ prompt,
153
+ () => openai.chat.completions.create({ stream: true, ... }),
154
+ {
155
+ // Collapse provider-specific chunk shape into the cached value
156
+ assemble: (chunks) =>
157
+ chunks.map(c => c.choices[0]?.delta.content ?? '').join(''),
158
+ // Replay the cached string as a single chunk on a hit
159
+ reconstruct: async function* (text) {
160
+ yield { choices: [{ delta: { content: text } }] }
161
+ },
162
+ // All CacheOptions (threshold, ttl, namespace, context, bypass) work here too
163
+ }
164
+ )
165
+
166
+ for await (const chunk of stream) {
167
+ process.stdout.write(chunk.choices[0]?.delta.content ?? '')
168
+ }
169
+
170
+ const { hit, layer, similarity } = await result // resolves after stream ends
171
+ ```
172
+
173
+ **`StreamCacheResult`**:
174
+
175
+ | Field | Type | Description |
176
+ |---|---|---|
177
+ | `hit` | `boolean` | Whether it was served from cache |
178
+ | `layer` | `"exact" \| "semantic" \| "miss"` | Which cache layer matched |
179
+ | `similarity` | `number?` | Cosine similarity score (semantic hits only) |
180
+ | `matchedPrompt` | `string?` | The original prompt matched (semantic hits only) |
181
+ | `namespace` | `string?` | The namespace used for this call |
182
+
183
+ If `assemble` / `reconstruct` are omitted, string chunks are joined by default and the assembled string is replayed as a single chunk on a hit.
138
184
 
139
185
  ---
140
186
 
@@ -250,7 +296,23 @@ createCache({ embedder: ..., store: memoryStore() })
250
296
  // or just omit `store` — memory is the default
251
297
  ```
252
298
 
253
- Not persistent across restarts. Suitable for single-process, development, or short-lived workloads.
299
+ Not persistent across restarts. Suitable for single-process, development, or short-lived workloads. Uses O(n) linear scan for similarity search — switch to `hnswMemoryStore` when entry count exceeds ~10k.
300
+
301
+ ### In-memory with ANN index (hnswMemoryStore)
302
+
303
+ Drop-in replacement for `memoryStore()` that uses an [HNSW](https://github.com/nmslib/hnswlib) index for O(log n) similarity search. No database required.
304
+
305
+ ```ts
306
+ // Requires: npm install hnswlib-node
307
+ import { createCache, hnswMemoryStore } from '@pravoobi/llm-cache'
308
+
309
+ createCache({ embedder: ..., store: hnswMemoryStore() })
310
+ ```
311
+
312
+ - Index is created lazily on first `set()` — dimension detected automatically
313
+ - One index per namespace, so namespace isolation has no search overhead
314
+ - Automatically resizes when capacity is exceeded
315
+ - Not persistent across restarts
254
316
 
255
317
  ### Redis
256
318
 
@@ -281,11 +343,21 @@ import { Pool } from 'pg'
281
343
  import { createCache, pgvectorStore } from '@pravoobi/llm-cache'
282
344
 
283
345
  const pool = new Pool({ connectionString: process.env.DATABASE_URL })
346
+
347
+ // Default dimension (1536) — OpenAI text-embedding-3-small/large, ada-002
284
348
  createCache({ embedder: ..., store: pgvectorStore(pool) })
349
+
350
+ // Cohere embed-english-v3.0
351
+ createCache({ embedder: ..., store: pgvectorStore(pool, { dimensions: 1024 }) })
352
+
353
+ // Local model (Xenova/all-MiniLM-L6-v2)
354
+ createCache({ embedder: ..., store: pgvectorStore(pool, { dimensions: 384 }) })
285
355
  ```
286
356
 
287
357
  Requires the [`pgvector`](https://github.com/pgvector/pgvector) Postgres extension. Best for multi-process, high-traffic production use. Uses native ANN similarity search via `ivfflat`.
288
358
 
359
+ > **Changing dimensions on an existing table:** `CREATE TABLE IF NOT EXISTS` will not alter an existing column type. If you switch embedding models, run a migration (`ALTER TABLE llm_cache ALTER COLUMN embedding TYPE vector(1024)`) and rebuild the index before updating `dimensions`.
360
+
289
361
  ---
290
362
 
291
363
  ## Namespace and context scoping
@@ -329,7 +401,7 @@ Embedding costs (e.g., `text-embedding-3-small` at $0.02/million tokens) are neg
329
401
  - **Highly personalized responses** — If the correct answer genuinely depends on who is asking, use per-user namespaces carefully or disable caching.
330
402
  - **Creative or stochastic tasks** — Caching "Write me a poem about autumn" means every user gets the same poem.
331
403
  - **Short TTLs with fast-changing data** — If your data changes faster than your TTL, stale hits cause more harm than cost savings justify.
332
- - **Streaming responses** — Not supported in v0.1. Collect the full response first.
404
+ - **Truly unique streaming responses** — `wrapStream()` assembles and caches the response after the stream ends. If every prompt is unique and never repeated, you pay assembly overhead with no cache benefit; consider `bypass: true` for those calls.
333
405
 
334
406
  ---
335
407
 
package/dist/index.d.mts CHANGED
@@ -10,6 +10,10 @@ interface StoreAdapter {
10
10
  delete(key: string): Promise<void>;
11
11
  listEmbeddings(namespace?: string): Promise<EmbeddingRecord[]>;
12
12
  close?(): Promise<void>;
13
+ searchSimilar?(query: number[], threshold: number, namespace?: string): Promise<{
14
+ record: EmbeddingRecord;
15
+ similarity: number;
16
+ } | null>;
13
17
  }
14
18
  interface EmbeddingRecord {
15
19
  key: string;
@@ -49,9 +53,24 @@ interface LLMCacheConfig {
49
53
  onMiss?: (prompt: string) => void;
50
54
  onError?: (err: Error) => void;
51
55
  }
56
+ interface CacheStreamOptions<T> extends CacheOptions {
57
+ assemble?: (chunks: T[]) => unknown;
58
+ reconstruct?: (cached: unknown) => AsyncIterable<T>;
59
+ }
60
+ interface StreamCacheResult {
61
+ hit: boolean;
62
+ layer: 'exact' | 'semantic' | 'miss';
63
+ similarity?: number;
64
+ matchedPrompt?: string;
65
+ namespace?: string;
66
+ }
52
67
 
53
68
  declare function createCache(config: LLMCacheConfig): {
54
69
  wrap: <T>(prompt: string, fn: () => Promise<T>, options?: CacheOptions) => Promise<CacheResult<T>>;
70
+ wrapStream: <T>(prompt: string, fn: () => AsyncIterable<T>, options?: CacheStreamOptions<T>) => {
71
+ stream: AsyncIterable<T>;
72
+ result: Promise<StreamCacheResult>;
73
+ };
55
74
  invalidate: (prompt: string, options?: Pick<CacheOptions, "namespace" | "context">) => Promise<void>;
56
75
  flush: (namespace?: string) => Promise<void>;
57
76
  stats: () => {
@@ -66,10 +85,30 @@ declare function createEmbedder(config: EmbedderConfig): EmbedFn;
66
85
 
67
86
  declare function memoryStore(): StoreAdapter;
68
87
 
88
+ interface HnswIndex {
89
+ initIndex(maxElements: number, efConstruction?: number, m?: number): void;
90
+ addPoint(point: number[], label: number): void;
91
+ markDelete(label: number): void;
92
+ searchKnn(query: number[], k: number): {
93
+ neighbors: number[];
94
+ distances: number[];
95
+ };
96
+ getCurrentCount(): number;
97
+ getMaxElements(): number;
98
+ resizeIndex(newSize: number): void;
99
+ }
100
+ interface HnswLib {
101
+ HierarchicalNSW: new (space: string, dim: number) => HnswIndex;
102
+ }
103
+ declare function hnswMemoryStore(injectedLib?: HnswLib): StoreAdapter;
104
+
69
105
  declare function redisStore(client: unknown): StoreAdapter;
70
106
 
71
107
  declare function sqliteStore(db: unknown): StoreAdapter;
72
108
 
73
- declare function pgvectorStore(pool: unknown): StoreAdapter;
109
+ interface PgVectorStoreOptions {
110
+ dimensions?: number;
111
+ }
112
+ declare function pgvectorStore(pool: unknown, options?: PgVectorStoreOptions): StoreAdapter;
74
113
 
75
- export { type CacheEntry, type CacheOptions, type CacheResult, type EmbedFn, type EmbedderConfig, type EmbeddingRecord, type LLMCacheConfig, type StoreAdapter, createCache, createEmbedder, memoryStore, pgvectorStore, redisStore, sqliteStore };
114
+ export { type CacheEntry, type CacheOptions, type CacheResult, type CacheStreamOptions, type EmbedFn, type EmbedderConfig, type EmbeddingRecord, type LLMCacheConfig, type PgVectorStoreOptions, type StoreAdapter, type StreamCacheResult, createCache, createEmbedder, hnswMemoryStore, memoryStore, pgvectorStore, redisStore, sqliteStore };
package/dist/index.d.ts CHANGED
@@ -10,6 +10,10 @@ interface StoreAdapter {
10
10
  delete(key: string): Promise<void>;
11
11
  listEmbeddings(namespace?: string): Promise<EmbeddingRecord[]>;
12
12
  close?(): Promise<void>;
13
+ searchSimilar?(query: number[], threshold: number, namespace?: string): Promise<{
14
+ record: EmbeddingRecord;
15
+ similarity: number;
16
+ } | null>;
13
17
  }
14
18
  interface EmbeddingRecord {
15
19
  key: string;
@@ -49,9 +53,24 @@ interface LLMCacheConfig {
49
53
  onMiss?: (prompt: string) => void;
50
54
  onError?: (err: Error) => void;
51
55
  }
56
+ interface CacheStreamOptions<T> extends CacheOptions {
57
+ assemble?: (chunks: T[]) => unknown;
58
+ reconstruct?: (cached: unknown) => AsyncIterable<T>;
59
+ }
60
+ interface StreamCacheResult {
61
+ hit: boolean;
62
+ layer: 'exact' | 'semantic' | 'miss';
63
+ similarity?: number;
64
+ matchedPrompt?: string;
65
+ namespace?: string;
66
+ }
52
67
 
53
68
  declare function createCache(config: LLMCacheConfig): {
54
69
  wrap: <T>(prompt: string, fn: () => Promise<T>, options?: CacheOptions) => Promise<CacheResult<T>>;
70
+ wrapStream: <T>(prompt: string, fn: () => AsyncIterable<T>, options?: CacheStreamOptions<T>) => {
71
+ stream: AsyncIterable<T>;
72
+ result: Promise<StreamCacheResult>;
73
+ };
55
74
  invalidate: (prompt: string, options?: Pick<CacheOptions, "namespace" | "context">) => Promise<void>;
56
75
  flush: (namespace?: string) => Promise<void>;
57
76
  stats: () => {
@@ -66,10 +85,30 @@ declare function createEmbedder(config: EmbedderConfig): EmbedFn;
66
85
 
67
86
  declare function memoryStore(): StoreAdapter;
68
87
 
88
+ interface HnswIndex {
89
+ initIndex(maxElements: number, efConstruction?: number, m?: number): void;
90
+ addPoint(point: number[], label: number): void;
91
+ markDelete(label: number): void;
92
+ searchKnn(query: number[], k: number): {
93
+ neighbors: number[];
94
+ distances: number[];
95
+ };
96
+ getCurrentCount(): number;
97
+ getMaxElements(): number;
98
+ resizeIndex(newSize: number): void;
99
+ }
100
+ interface HnswLib {
101
+ HierarchicalNSW: new (space: string, dim: number) => HnswIndex;
102
+ }
103
+ declare function hnswMemoryStore(injectedLib?: HnswLib): StoreAdapter;
104
+
69
105
  declare function redisStore(client: unknown): StoreAdapter;
70
106
 
71
107
  declare function sqliteStore(db: unknown): StoreAdapter;
72
108
 
73
- declare function pgvectorStore(pool: unknown): StoreAdapter;
109
+ interface PgVectorStoreOptions {
110
+ dimensions?: number;
111
+ }
112
+ declare function pgvectorStore(pool: unknown, options?: PgVectorStoreOptions): StoreAdapter;
74
113
 
75
- export { type CacheEntry, type CacheOptions, type CacheResult, type EmbedFn, type EmbedderConfig, type EmbeddingRecord, type LLMCacheConfig, type StoreAdapter, createCache, createEmbedder, memoryStore, pgvectorStore, redisStore, sqliteStore };
114
+ export { type CacheEntry, type CacheOptions, type CacheResult, type CacheStreamOptions, type EmbedFn, type EmbedderConfig, type EmbeddingRecord, type LLMCacheConfig, type PgVectorStoreOptions, type StoreAdapter, type StreamCacheResult, createCache, createEmbedder, hnswMemoryStore, memoryStore, pgvectorStore, redisStore, sqliteStore };
package/dist/index.js CHANGED
@@ -22,6 +22,7 @@ var index_exports = {};
22
22
  __export(index_exports, {
23
23
  createCache: () => createCache,
24
24
  createEmbedder: () => createEmbedder,
25
+ hnswMemoryStore: () => hnswMemoryStore,
25
26
  memoryStore: () => memoryStore,
26
27
  pgvectorStore: () => pgvectorStore,
27
28
  redisStore: () => redisStore,
@@ -174,7 +175,7 @@ function isExpired(entry) {
174
175
  return Date.now() > entry.expiresAt;
175
176
  }
176
177
  function computeExpiresAt(ttlSeconds) {
177
- if (ttlSeconds === 0) return void 0;
178
+ if (ttlSeconds <= 0) return void 0;
178
179
  return Date.now() + ttlSeconds * 1e3;
179
180
  }
180
181
 
@@ -250,7 +251,7 @@ function cosineSimilarity(a, b) {
250
251
  function findBestMatch(query, records, threshold) {
251
252
  if (records.length > 1e4) {
252
253
  console.warn(
253
- `[llm-cache] Scanning ${records.length} embeddings in memory. Consider switching to pgvector or a dedicated vector store for better performance.`
254
+ `[llm-cache] Scanning ${records.length} embeddings with O(n) linear search. Use hnswMemoryStore() for fast in-process ANN, or pgvector for multi-process deployments.`
254
255
  );
255
256
  }
256
257
  let bestSimilarity = -Infinity;
@@ -301,7 +302,7 @@ function createCache(config) {
301
302
  if (namespace !== void 0) lifetime.seenNamespaces.add(namespace);
302
303
  const normalized = normalizePrompt(prompt);
303
304
  const key = hashPrompt(namespace, context, normalized);
304
- const embeddingNamespace = context !== void 0 ? `${namespace ?? ""}__ctx__${context}` : namespace;
305
+ const embeddingNamespace = context !== void 0 ? JSON.stringify([namespace ?? "", context]) : namespace;
305
306
  try {
306
307
  const cached = await store.get(key);
307
308
  if (cached !== null) {
@@ -323,11 +324,9 @@ function createCache(config) {
323
324
  return { value: value2, hit: false, layer: "miss" };
324
325
  }
325
326
  let embedding;
326
- let records;
327
327
  try {
328
328
  const raw = await embed(normalized);
329
329
  embedding = Array.from(raw);
330
- records = await store.listEmbeddings(embeddingNamespace);
331
330
  } catch (err) {
332
331
  config.onError?.(err instanceof Error ? err : new Error(String(err)));
333
332
  lifetime.misses++;
@@ -336,7 +335,7 @@ function createCache(config) {
336
335
  return { value: value2, hit: false, layer: "miss" };
337
336
  }
338
337
  try {
339
- const match = findBestMatch(embedding, records, threshold);
338
+ const match = typeof store.searchSimilar === "function" ? await store.searchSimilar(embedding, threshold, embeddingNamespace) : findBestMatch(embedding, await store.listEmbeddings(embeddingNamespace), threshold);
340
339
  if (match !== null) {
341
340
  const matchedEntry = await store.get(match.record.key);
342
341
  if (matchedEntry !== null) {
@@ -360,9 +359,9 @@ function createCache(config) {
360
359
  lifetime.misses++;
361
360
  config.onMiss?.(prompt);
362
361
  const value = await fn();
363
- if (value instanceof ReadableStream || typeof value === "object" && value !== null && (Symbol.asyncIterator in value || Symbol.iterator in value) && typeof value.text !== "string") {
362
+ if (value instanceof ReadableStream || typeof value === "object" && value !== null && Symbol.asyncIterator in value) {
364
363
  throw new Error(
365
- "[llm-cache] Streaming responses cannot be cached. Collect the full response before passing fn() to wrap(), or use bypass: true to skip the cache for streaming calls."
364
+ "[llm-cache] Streaming responses cannot be cached via wrap(). Use wrapStream() for streaming LLM calls, or collect the full response before passing fn() to wrap()."
366
365
  );
367
366
  }
368
367
  const now = Date.now();
@@ -389,6 +388,120 @@ function createCache(config) {
389
388
  ...namespace !== void 0 ? { namespace } : {}
390
389
  };
391
390
  }
391
+ function defaultAssemble(chunks) {
392
+ if (chunks.length > 0 && chunks.every((c) => typeof c === "string")) {
393
+ return chunks.join("");
394
+ }
395
+ return chunks;
396
+ }
397
+ async function* defaultReconstruct(cached) {
398
+ yield cached;
399
+ }
400
+ function wrapStream(prompt, fn, options) {
401
+ const assemble = options?.assemble ?? defaultAssemble;
402
+ const reconstruct = options?.reconstruct ?? defaultReconstruct;
403
+ let resolveResult;
404
+ const result = new Promise((res) => {
405
+ resolveResult = res;
406
+ });
407
+ async function* generate() {
408
+ if (options?.bypass === true) {
409
+ yield* fn();
410
+ resolveResult({ hit: false, layer: "miss" });
411
+ return;
412
+ }
413
+ const namespace = options?.namespace;
414
+ const context = options?.context;
415
+ const threshold = options?.threshold ?? globalThreshold;
416
+ const ttl = options?.ttl ?? globalTtl;
417
+ if (namespace !== void 0) lifetime.seenNamespaces.add(namespace);
418
+ const normalized = normalizePrompt(prompt);
419
+ const key = hashPrompt(namespace, context, normalized);
420
+ const embeddingNamespace = context !== void 0 ? `${namespace ?? ""}__ctx__${context}` : namespace;
421
+ try {
422
+ const cached = await store.get(key);
423
+ if (cached !== null) {
424
+ lifetime.hits++;
425
+ const streamResult = {
426
+ hit: true,
427
+ layer: "exact",
428
+ ...namespace !== void 0 ? { namespace } : {}
429
+ };
430
+ config.onHit?.({ ...streamResult, value: cached.response });
431
+ resolveResult(streamResult);
432
+ yield* reconstruct(cached.response);
433
+ return;
434
+ }
435
+ } catch (err) {
436
+ config.onError?.(err instanceof Error ? err : new Error(String(err)));
437
+ lifetime.misses++;
438
+ config.onMiss?.(prompt);
439
+ yield* fn();
440
+ resolveResult({ hit: false, layer: "miss" });
441
+ return;
442
+ }
443
+ let embedding;
444
+ try {
445
+ const raw = await embed(normalized);
446
+ embedding = Array.from(raw);
447
+ } catch (err) {
448
+ config.onError?.(err instanceof Error ? err : new Error(String(err)));
449
+ lifetime.misses++;
450
+ config.onMiss?.(prompt);
451
+ yield* fn();
452
+ resolveResult({ hit: false, layer: "miss" });
453
+ return;
454
+ }
455
+ try {
456
+ const match = typeof store.searchSimilar === "function" ? await store.searchSimilar(embedding, threshold, embeddingNamespace) : findBestMatch(embedding, await store.listEmbeddings(embeddingNamespace), threshold);
457
+ if (match !== null) {
458
+ const matchedEntry = await store.get(match.record.key);
459
+ if (matchedEntry !== null) {
460
+ lifetime.hits++;
461
+ lifetime.similarities.push(match.similarity);
462
+ const streamResult = {
463
+ hit: true,
464
+ layer: "semantic",
465
+ similarity: match.similarity,
466
+ matchedPrompt: matchedEntry.prompt,
467
+ ...namespace !== void 0 ? { namespace } : {}
468
+ };
469
+ config.onHit?.({ ...streamResult, value: matchedEntry.response });
470
+ resolveResult(streamResult);
471
+ yield* reconstruct(matchedEntry.response);
472
+ return;
473
+ }
474
+ }
475
+ } catch (err) {
476
+ config.onError?.(err instanceof Error ? err : new Error(String(err)));
477
+ }
478
+ lifetime.misses++;
479
+ config.onMiss?.(prompt);
480
+ const chunks = [];
481
+ for await (const chunk of fn()) {
482
+ chunks.push(chunk);
483
+ yield chunk;
484
+ }
485
+ const assembled = assemble(chunks);
486
+ const now = Date.now();
487
+ const expiresAt = ttl !== void 0 ? computeExpiresAt(ttl) : void 0;
488
+ const entry = {
489
+ prompt: normalized,
490
+ response: assembled,
491
+ embedding,
492
+ createdAt: now,
493
+ ...embeddingNamespace !== void 0 ? { namespace: embeddingNamespace } : {},
494
+ ...expiresAt !== void 0 ? { expiresAt } : {}
495
+ };
496
+ try {
497
+ await store.set(key, entry, ttl);
498
+ } catch (err) {
499
+ config.onError?.(err instanceof Error ? err : new Error(String(err)));
500
+ }
501
+ resolveResult({ hit: false, layer: "miss", ...namespace !== void 0 ? { namespace } : {} });
502
+ }
503
+ return { stream: generate(), result };
504
+ }
392
505
  async function invalidate(prompt, options) {
393
506
  const normalized = normalizePrompt(prompt);
394
507
  const key = hashPrompt(options?.namespace, options?.context, normalized);
@@ -414,7 +527,128 @@ function createCache(config) {
414
527
  avgSimilarity
415
528
  };
416
529
  }
417
- return { wrap, invalidate, flush, stats: getStats };
530
+ return { wrap, wrapStream, invalidate, flush, stats: getStats };
531
+ }
532
+
533
+ // src/stores/hnsw-memory.ts
534
+ var INITIAL_CAPACITY = 1024;
535
+ async function loadHnswLib() {
536
+ try {
537
+ return await new Function("m", "return import(m)")("hnswlib-node");
538
+ } catch {
539
+ throw new Error(
540
+ "[llm-cache] hnswMemoryStore requires hnswlib-node: npm install hnswlib-node"
541
+ );
542
+ }
543
+ }
544
+ function hnswMemoryStore(injectedLib) {
545
+ const entries = /* @__PURE__ */ new Map();
546
+ const embeddingRecords = /* @__PURE__ */ new Map();
547
+ const nsIndices = /* @__PURE__ */ new Map();
548
+ let dimension = null;
549
+ let libPromise = injectedLib ? Promise.resolve(injectedLib) : null;
550
+ function getLib() {
551
+ if (!libPromise) libPromise = loadHnswLib();
552
+ return libPromise;
553
+ }
554
+ function getOrCreateNsIndex(lib, ns, dim) {
555
+ let nsIdx = nsIndices.get(ns);
556
+ if (nsIdx === void 0) {
557
+ const index = new lib.HierarchicalNSW("cosine", dim);
558
+ index.initIndex(INITIAL_CAPACITY);
559
+ nsIdx = { index, keyToLabel: /* @__PURE__ */ new Map(), labelToKey: /* @__PURE__ */ new Map(), nextLabel: 0, maxElements: INITIAL_CAPACITY };
560
+ nsIndices.set(ns, nsIdx);
561
+ }
562
+ return nsIdx;
563
+ }
564
+ function nsKey(namespace) {
565
+ return namespace ?? "__default__";
566
+ }
567
+ const self = {
568
+ async get(key) {
569
+ const entry = entries.get(key);
570
+ if (!entry) return null;
571
+ if (isExpired(entry)) {
572
+ await self.delete(key);
573
+ return null;
574
+ }
575
+ return entry;
576
+ },
577
+ async set(key, entry, _ttlSeconds) {
578
+ const lib = await getLib();
579
+ if (dimension === null) dimension = entry.embedding.length;
580
+ const ns = nsKey(entry.namespace);
581
+ const nsIdx = getOrCreateNsIndex(lib, ns, dimension);
582
+ const existingLabel = nsIdx.keyToLabel.get(key);
583
+ if (existingLabel !== void 0) {
584
+ try {
585
+ nsIdx.index.markDelete(existingLabel);
586
+ } catch {
587
+ }
588
+ nsIdx.labelToKey.delete(existingLabel);
589
+ }
590
+ if (nsIdx.nextLabel >= nsIdx.maxElements) {
591
+ nsIdx.maxElements *= 2;
592
+ nsIdx.index.resizeIndex(nsIdx.maxElements);
593
+ }
594
+ const label = nsIdx.nextLabel++;
595
+ nsIdx.index.addPoint(entry.embedding, label);
596
+ nsIdx.keyToLabel.set(key, label);
597
+ nsIdx.labelToKey.set(label, key);
598
+ entries.set(key, entry);
599
+ embeddingRecords.set(key, {
600
+ key,
601
+ embedding: entry.embedding,
602
+ createdAt: entry.createdAt,
603
+ ...entry.namespace !== void 0 ? { namespace: entry.namespace } : {}
604
+ });
605
+ },
606
+ async delete(key) {
607
+ const entry = entries.get(key);
608
+ if (entry) {
609
+ const nsIdx = nsIndices.get(nsKey(entry.namespace));
610
+ if (nsIdx) {
611
+ const label = nsIdx.keyToLabel.get(key);
612
+ if (label !== void 0) {
613
+ try {
614
+ nsIdx.index.markDelete(label);
615
+ } catch {
616
+ }
617
+ nsIdx.keyToLabel.delete(key);
618
+ nsIdx.labelToKey.delete(label);
619
+ }
620
+ }
621
+ }
622
+ entries.delete(key);
623
+ embeddingRecords.delete(key);
624
+ },
625
+ async listEmbeddings(namespace) {
626
+ const all = Array.from(embeddingRecords.values());
627
+ return namespace === void 0 ? all : all.filter((r) => r.namespace === namespace);
628
+ },
629
+ async searchSimilar(query, threshold, namespace) {
630
+ const nsIdx = nsIndices.get(nsKey(namespace));
631
+ if (!nsIdx || nsIdx.index.getCurrentCount() === 0) return null;
632
+ const { neighbors, distances } = nsIdx.index.searchKnn(query, 1);
633
+ const label = neighbors[0];
634
+ const distance = distances[0];
635
+ if (label === void 0 || distance === void 0) return null;
636
+ const similarity = 1 - distance;
637
+ if (similarity < threshold) return null;
638
+ const key = nsIdx.labelToKey.get(label);
639
+ if (!key) return null;
640
+ const entry = entries.get(key);
641
+ if (!entry) return null;
642
+ if (isExpired(entry)) {
643
+ await self.delete(key);
644
+ return null;
645
+ }
646
+ const record = embeddingRecords.get(key);
647
+ if (!record) return null;
648
+ return { record, similarity };
649
+ }
650
+ };
651
+ return self;
418
652
  }
419
653
 
420
654
  // src/utils/validate.ts
@@ -424,6 +658,12 @@ function assertCacheEntry(val, source) {
424
658
  }
425
659
  return val;
426
660
  }
661
+ function assertEmbeddingRecord(val, source) {
662
+ if (typeof val !== "object" || val === null || typeof val["key"] !== "string" || !Array.isArray(val["embedding"]) || typeof val["createdAt"] !== "number") {
663
+ throw new Error(`[llm-cache] Invalid embedding record shape from ${source}`);
664
+ }
665
+ return val;
666
+ }
427
667
 
428
668
  // src/stores/redis.ts
429
669
  var ENTRY_PREFIX = "llm-cache:entry:";
@@ -471,7 +711,7 @@ function redisStore(client) {
471
711
  async listEmbeddings(namespace) {
472
712
  const hash = await redis.hgetall(nsHashKey(namespace));
473
713
  if (!hash) return [];
474
- return Object.values(hash).map((v) => JSON.parse(v));
714
+ return Object.values(hash).map((v) => assertEmbeddingRecord(JSON.parse(v), "redis"));
475
715
  },
476
716
  async close() {
477
717
  await redis.quit();
@@ -545,26 +785,32 @@ function sqliteStore(db) {
545
785
  },
546
786
  async listEmbeddings(namespace) {
547
787
  const rows = namespace !== void 0 ? stmtListByNs.all(namespace) : stmtListAll.all();
548
- return rows.map((row) => ({
549
- key: row.key,
550
- embedding: JSON.parse(row.embedding),
551
- createdAt: row.created_at,
552
- ...row.namespace !== null ? { namespace: row.namespace } : {}
553
- }));
788
+ return rows.map((row) => {
789
+ const parsed = assertEmbeddingRecord(
790
+ {
791
+ key: row.key,
792
+ embedding: JSON.parse(row.embedding),
793
+ createdAt: row.created_at,
794
+ ...row.namespace !== null ? { namespace: row.namespace } : {}
795
+ },
796
+ "sqlite"
797
+ );
798
+ return parsed;
799
+ });
554
800
  }
555
801
  };
556
802
  }
557
803
 
558
804
  // src/stores/pgvector.ts
559
- var VECTOR_DIM = 1536;
560
- async function initSchema2(pool) {
805
+ var DEFAULT_DIMENSIONS = 1536;
806
+ async function initSchema2(pool, dimensions) {
561
807
  await pool.query("CREATE EXTENSION IF NOT EXISTS vector");
562
808
  await pool.query(`
563
809
  CREATE TABLE IF NOT EXISTS llm_cache (
564
810
  key TEXT PRIMARY KEY,
565
811
  prompt TEXT NOT NULL,
566
812
  response JSONB,
567
- embedding vector(${VECTOR_DIM}),
813
+ embedding vector(${dimensions}),
568
814
  namespace TEXT,
569
815
  created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
570
816
  expires_at TIMESTAMPTZ
@@ -580,9 +826,16 @@ async function initSchema2(pool) {
580
826
  function parseEmbedding(raw) {
581
827
  return raw.replace(/^\[/, "").replace(/\]$/, "").split(",").map(Number);
582
828
  }
583
- function pgvectorStore(pool) {
829
+ function pgvectorStore(pool, options) {
584
830
  const pg = pool;
585
- const ready = initSchema2(pg);
831
+ const rawDimensions = options?.dimensions ?? DEFAULT_DIMENSIONS;
832
+ if (!Number.isInteger(rawDimensions) || rawDimensions < 1 || rawDimensions > 65535) {
833
+ throw new RangeError(
834
+ `[llm-cache] pgvectorStore: dimensions must be a positive integer \u2264 65535, got ${rawDimensions}`
835
+ );
836
+ }
837
+ const dimensions = rawDimensions;
838
+ const ready = initSchema2(pg, dimensions);
586
839
  return {
587
840
  async get(key) {
588
841
  await ready;
@@ -656,6 +909,7 @@ function pgvectorStore(pool) {
656
909
  0 && (module.exports = {
657
910
  createCache,
658
911
  createEmbedder,
912
+ hnswMemoryStore,
659
913
  memoryStore,
660
914
  pgvectorStore,
661
915
  redisStore,
package/dist/index.mjs CHANGED
@@ -143,7 +143,7 @@ function isExpired(entry) {
143
143
  return Date.now() > entry.expiresAt;
144
144
  }
145
145
  function computeExpiresAt(ttlSeconds) {
146
- if (ttlSeconds === 0) return void 0;
146
+ if (ttlSeconds <= 0) return void 0;
147
147
  return Date.now() + ttlSeconds * 1e3;
148
148
  }
149
149
 
@@ -219,7 +219,7 @@ function cosineSimilarity(a, b) {
219
219
  function findBestMatch(query, records, threshold) {
220
220
  if (records.length > 1e4) {
221
221
  console.warn(
222
- `[llm-cache] Scanning ${records.length} embeddings in memory. Consider switching to pgvector or a dedicated vector store for better performance.`
222
+ `[llm-cache] Scanning ${records.length} embeddings with O(n) linear search. Use hnswMemoryStore() for fast in-process ANN, or pgvector for multi-process deployments.`
223
223
  );
224
224
  }
225
225
  let bestSimilarity = -Infinity;
@@ -270,7 +270,7 @@ function createCache(config) {
270
270
  if (namespace !== void 0) lifetime.seenNamespaces.add(namespace);
271
271
  const normalized = normalizePrompt(prompt);
272
272
  const key = hashPrompt(namespace, context, normalized);
273
- const embeddingNamespace = context !== void 0 ? `${namespace ?? ""}__ctx__${context}` : namespace;
273
+ const embeddingNamespace = context !== void 0 ? JSON.stringify([namespace ?? "", context]) : namespace;
274
274
  try {
275
275
  const cached = await store.get(key);
276
276
  if (cached !== null) {
@@ -292,11 +292,9 @@ function createCache(config) {
292
292
  return { value: value2, hit: false, layer: "miss" };
293
293
  }
294
294
  let embedding;
295
- let records;
296
295
  try {
297
296
  const raw = await embed(normalized);
298
297
  embedding = Array.from(raw);
299
- records = await store.listEmbeddings(embeddingNamespace);
300
298
  } catch (err) {
301
299
  config.onError?.(err instanceof Error ? err : new Error(String(err)));
302
300
  lifetime.misses++;
@@ -305,7 +303,7 @@ function createCache(config) {
305
303
  return { value: value2, hit: false, layer: "miss" };
306
304
  }
307
305
  try {
308
- const match = findBestMatch(embedding, records, threshold);
306
+ const match = typeof store.searchSimilar === "function" ? await store.searchSimilar(embedding, threshold, embeddingNamespace) : findBestMatch(embedding, await store.listEmbeddings(embeddingNamespace), threshold);
309
307
  if (match !== null) {
310
308
  const matchedEntry = await store.get(match.record.key);
311
309
  if (matchedEntry !== null) {
@@ -329,9 +327,9 @@ function createCache(config) {
329
327
  lifetime.misses++;
330
328
  config.onMiss?.(prompt);
331
329
  const value = await fn();
332
- if (value instanceof ReadableStream || typeof value === "object" && value !== null && (Symbol.asyncIterator in value || Symbol.iterator in value) && typeof value.text !== "string") {
330
+ if (value instanceof ReadableStream || typeof value === "object" && value !== null && Symbol.asyncIterator in value) {
333
331
  throw new Error(
334
- "[llm-cache] Streaming responses cannot be cached. Collect the full response before passing fn() to wrap(), or use bypass: true to skip the cache for streaming calls."
332
+ "[llm-cache] Streaming responses cannot be cached via wrap(). Use wrapStream() for streaming LLM calls, or collect the full response before passing fn() to wrap()."
335
333
  );
336
334
  }
337
335
  const now = Date.now();
@@ -358,6 +356,120 @@ function createCache(config) {
358
356
  ...namespace !== void 0 ? { namespace } : {}
359
357
  };
360
358
  }
359
+ function defaultAssemble(chunks) {
360
+ if (chunks.length > 0 && chunks.every((c) => typeof c === "string")) {
361
+ return chunks.join("");
362
+ }
363
+ return chunks;
364
+ }
365
+ async function* defaultReconstruct(cached) {
366
+ yield cached;
367
+ }
368
+ function wrapStream(prompt, fn, options) {
369
+ const assemble = options?.assemble ?? defaultAssemble;
370
+ const reconstruct = options?.reconstruct ?? defaultReconstruct;
371
+ let resolveResult;
372
+ const result = new Promise((res) => {
373
+ resolveResult = res;
374
+ });
375
+ async function* generate() {
376
+ if (options?.bypass === true) {
377
+ yield* fn();
378
+ resolveResult({ hit: false, layer: "miss" });
379
+ return;
380
+ }
381
+ const namespace = options?.namespace;
382
+ const context = options?.context;
383
+ const threshold = options?.threshold ?? globalThreshold;
384
+ const ttl = options?.ttl ?? globalTtl;
385
+ if (namespace !== void 0) lifetime.seenNamespaces.add(namespace);
386
+ const normalized = normalizePrompt(prompt);
387
+ const key = hashPrompt(namespace, context, normalized);
388
+ const embeddingNamespace = context !== void 0 ? `${namespace ?? ""}__ctx__${context}` : namespace;
389
+ try {
390
+ const cached = await store.get(key);
391
+ if (cached !== null) {
392
+ lifetime.hits++;
393
+ const streamResult = {
394
+ hit: true,
395
+ layer: "exact",
396
+ ...namespace !== void 0 ? { namespace } : {}
397
+ };
398
+ config.onHit?.({ ...streamResult, value: cached.response });
399
+ resolveResult(streamResult);
400
+ yield* reconstruct(cached.response);
401
+ return;
402
+ }
403
+ } catch (err) {
404
+ config.onError?.(err instanceof Error ? err : new Error(String(err)));
405
+ lifetime.misses++;
406
+ config.onMiss?.(prompt);
407
+ yield* fn();
408
+ resolveResult({ hit: false, layer: "miss" });
409
+ return;
410
+ }
411
+ let embedding;
412
+ try {
413
+ const raw = await embed(normalized);
414
+ embedding = Array.from(raw);
415
+ } catch (err) {
416
+ config.onError?.(err instanceof Error ? err : new Error(String(err)));
417
+ lifetime.misses++;
418
+ config.onMiss?.(prompt);
419
+ yield* fn();
420
+ resolveResult({ hit: false, layer: "miss" });
421
+ return;
422
+ }
423
+ try {
424
+ const match = typeof store.searchSimilar === "function" ? await store.searchSimilar(embedding, threshold, embeddingNamespace) : findBestMatch(embedding, await store.listEmbeddings(embeddingNamespace), threshold);
425
+ if (match !== null) {
426
+ const matchedEntry = await store.get(match.record.key);
427
+ if (matchedEntry !== null) {
428
+ lifetime.hits++;
429
+ lifetime.similarities.push(match.similarity);
430
+ const streamResult = {
431
+ hit: true,
432
+ layer: "semantic",
433
+ similarity: match.similarity,
434
+ matchedPrompt: matchedEntry.prompt,
435
+ ...namespace !== void 0 ? { namespace } : {}
436
+ };
437
+ config.onHit?.({ ...streamResult, value: matchedEntry.response });
438
+ resolveResult(streamResult);
439
+ yield* reconstruct(matchedEntry.response);
440
+ return;
441
+ }
442
+ }
443
+ } catch (err) {
444
+ config.onError?.(err instanceof Error ? err : new Error(String(err)));
445
+ }
446
+ lifetime.misses++;
447
+ config.onMiss?.(prompt);
448
+ const chunks = [];
449
+ for await (const chunk of fn()) {
450
+ chunks.push(chunk);
451
+ yield chunk;
452
+ }
453
+ const assembled = assemble(chunks);
454
+ const now = Date.now();
455
+ const expiresAt = ttl !== void 0 ? computeExpiresAt(ttl) : void 0;
456
+ const entry = {
457
+ prompt: normalized,
458
+ response: assembled,
459
+ embedding,
460
+ createdAt: now,
461
+ ...embeddingNamespace !== void 0 ? { namespace: embeddingNamespace } : {},
462
+ ...expiresAt !== void 0 ? { expiresAt } : {}
463
+ };
464
+ try {
465
+ await store.set(key, entry, ttl);
466
+ } catch (err) {
467
+ config.onError?.(err instanceof Error ? err : new Error(String(err)));
468
+ }
469
+ resolveResult({ hit: false, layer: "miss", ...namespace !== void 0 ? { namespace } : {} });
470
+ }
471
+ return { stream: generate(), result };
472
+ }
361
473
  async function invalidate(prompt, options) {
362
474
  const normalized = normalizePrompt(prompt);
363
475
  const key = hashPrompt(options?.namespace, options?.context, normalized);
@@ -383,7 +495,128 @@ function createCache(config) {
383
495
  avgSimilarity
384
496
  };
385
497
  }
386
- return { wrap, invalidate, flush, stats: getStats };
498
+ return { wrap, wrapStream, invalidate, flush, stats: getStats };
499
+ }
500
+
501
+ // src/stores/hnsw-memory.ts
502
+ var INITIAL_CAPACITY = 1024;
503
+ async function loadHnswLib() {
504
+ try {
505
+ return await new Function("m", "return import(m)")("hnswlib-node");
506
+ } catch {
507
+ throw new Error(
508
+ "[llm-cache] hnswMemoryStore requires hnswlib-node: npm install hnswlib-node"
509
+ );
510
+ }
511
+ }
512
+ function hnswMemoryStore(injectedLib) {
513
+ const entries = /* @__PURE__ */ new Map();
514
+ const embeddingRecords = /* @__PURE__ */ new Map();
515
+ const nsIndices = /* @__PURE__ */ new Map();
516
+ let dimension = null;
517
+ let libPromise = injectedLib ? Promise.resolve(injectedLib) : null;
518
+ function getLib() {
519
+ if (!libPromise) libPromise = loadHnswLib();
520
+ return libPromise;
521
+ }
522
+ function getOrCreateNsIndex(lib, ns, dim) {
523
+ let nsIdx = nsIndices.get(ns);
524
+ if (nsIdx === void 0) {
525
+ const index = new lib.HierarchicalNSW("cosine", dim);
526
+ index.initIndex(INITIAL_CAPACITY);
527
+ nsIdx = { index, keyToLabel: /* @__PURE__ */ new Map(), labelToKey: /* @__PURE__ */ new Map(), nextLabel: 0, maxElements: INITIAL_CAPACITY };
528
+ nsIndices.set(ns, nsIdx);
529
+ }
530
+ return nsIdx;
531
+ }
532
+ function nsKey(namespace) {
533
+ return namespace ?? "__default__";
534
+ }
535
+ const self = {
536
+ async get(key) {
537
+ const entry = entries.get(key);
538
+ if (!entry) return null;
539
+ if (isExpired(entry)) {
540
+ await self.delete(key);
541
+ return null;
542
+ }
543
+ return entry;
544
+ },
545
+ async set(key, entry, _ttlSeconds) {
546
+ const lib = await getLib();
547
+ if (dimension === null) dimension = entry.embedding.length;
548
+ const ns = nsKey(entry.namespace);
549
+ const nsIdx = getOrCreateNsIndex(lib, ns, dimension);
550
+ const existingLabel = nsIdx.keyToLabel.get(key);
551
+ if (existingLabel !== void 0) {
552
+ try {
553
+ nsIdx.index.markDelete(existingLabel);
554
+ } catch {
555
+ }
556
+ nsIdx.labelToKey.delete(existingLabel);
557
+ }
558
+ if (nsIdx.nextLabel >= nsIdx.maxElements) {
559
+ nsIdx.maxElements *= 2;
560
+ nsIdx.index.resizeIndex(nsIdx.maxElements);
561
+ }
562
+ const label = nsIdx.nextLabel++;
563
+ nsIdx.index.addPoint(entry.embedding, label);
564
+ nsIdx.keyToLabel.set(key, label);
565
+ nsIdx.labelToKey.set(label, key);
566
+ entries.set(key, entry);
567
+ embeddingRecords.set(key, {
568
+ key,
569
+ embedding: entry.embedding,
570
+ createdAt: entry.createdAt,
571
+ ...entry.namespace !== void 0 ? { namespace: entry.namespace } : {}
572
+ });
573
+ },
574
+ async delete(key) {
575
+ const entry = entries.get(key);
576
+ if (entry) {
577
+ const nsIdx = nsIndices.get(nsKey(entry.namespace));
578
+ if (nsIdx) {
579
+ const label = nsIdx.keyToLabel.get(key);
580
+ if (label !== void 0) {
581
+ try {
582
+ nsIdx.index.markDelete(label);
583
+ } catch {
584
+ }
585
+ nsIdx.keyToLabel.delete(key);
586
+ nsIdx.labelToKey.delete(label);
587
+ }
588
+ }
589
+ }
590
+ entries.delete(key);
591
+ embeddingRecords.delete(key);
592
+ },
593
+ async listEmbeddings(namespace) {
594
+ const all = Array.from(embeddingRecords.values());
595
+ return namespace === void 0 ? all : all.filter((r) => r.namespace === namespace);
596
+ },
597
+ async searchSimilar(query, threshold, namespace) {
598
+ const nsIdx = nsIndices.get(nsKey(namespace));
599
+ if (!nsIdx || nsIdx.index.getCurrentCount() === 0) return null;
600
+ const { neighbors, distances } = nsIdx.index.searchKnn(query, 1);
601
+ const label = neighbors[0];
602
+ const distance = distances[0];
603
+ if (label === void 0 || distance === void 0) return null;
604
+ const similarity = 1 - distance;
605
+ if (similarity < threshold) return null;
606
+ const key = nsIdx.labelToKey.get(label);
607
+ if (!key) return null;
608
+ const entry = entries.get(key);
609
+ if (!entry) return null;
610
+ if (isExpired(entry)) {
611
+ await self.delete(key);
612
+ return null;
613
+ }
614
+ const record = embeddingRecords.get(key);
615
+ if (!record) return null;
616
+ return { record, similarity };
617
+ }
618
+ };
619
+ return self;
387
620
  }
388
621
 
389
622
  // src/utils/validate.ts
@@ -393,6 +626,12 @@ function assertCacheEntry(val, source) {
393
626
  }
394
627
  return val;
395
628
  }
629
+ function assertEmbeddingRecord(val, source) {
630
+ if (typeof val !== "object" || val === null || typeof val["key"] !== "string" || !Array.isArray(val["embedding"]) || typeof val["createdAt"] !== "number") {
631
+ throw new Error(`[llm-cache] Invalid embedding record shape from ${source}`);
632
+ }
633
+ return val;
634
+ }
396
635
 
397
636
  // src/stores/redis.ts
398
637
  var ENTRY_PREFIX = "llm-cache:entry:";
@@ -440,7 +679,7 @@ function redisStore(client) {
440
679
  async listEmbeddings(namespace) {
441
680
  const hash = await redis.hgetall(nsHashKey(namespace));
442
681
  if (!hash) return [];
443
- return Object.values(hash).map((v) => JSON.parse(v));
682
+ return Object.values(hash).map((v) => assertEmbeddingRecord(JSON.parse(v), "redis"));
444
683
  },
445
684
  async close() {
446
685
  await redis.quit();
@@ -514,26 +753,32 @@ function sqliteStore(db) {
514
753
  },
515
754
  async listEmbeddings(namespace) {
516
755
  const rows = namespace !== void 0 ? stmtListByNs.all(namespace) : stmtListAll.all();
517
- return rows.map((row) => ({
518
- key: row.key,
519
- embedding: JSON.parse(row.embedding),
520
- createdAt: row.created_at,
521
- ...row.namespace !== null ? { namespace: row.namespace } : {}
522
- }));
756
+ return rows.map((row) => {
757
+ const parsed = assertEmbeddingRecord(
758
+ {
759
+ key: row.key,
760
+ embedding: JSON.parse(row.embedding),
761
+ createdAt: row.created_at,
762
+ ...row.namespace !== null ? { namespace: row.namespace } : {}
763
+ },
764
+ "sqlite"
765
+ );
766
+ return parsed;
767
+ });
523
768
  }
524
769
  };
525
770
  }
526
771
 
527
772
  // src/stores/pgvector.ts
528
- var VECTOR_DIM = 1536;
529
- async function initSchema2(pool) {
773
+ var DEFAULT_DIMENSIONS = 1536;
774
+ async function initSchema2(pool, dimensions) {
530
775
  await pool.query("CREATE EXTENSION IF NOT EXISTS vector");
531
776
  await pool.query(`
532
777
  CREATE TABLE IF NOT EXISTS llm_cache (
533
778
  key TEXT PRIMARY KEY,
534
779
  prompt TEXT NOT NULL,
535
780
  response JSONB,
536
- embedding vector(${VECTOR_DIM}),
781
+ embedding vector(${dimensions}),
537
782
  namespace TEXT,
538
783
  created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
539
784
  expires_at TIMESTAMPTZ
@@ -549,9 +794,16 @@ async function initSchema2(pool) {
549
794
  function parseEmbedding(raw) {
550
795
  return raw.replace(/^\[/, "").replace(/\]$/, "").split(",").map(Number);
551
796
  }
552
- function pgvectorStore(pool) {
797
+ function pgvectorStore(pool, options) {
553
798
  const pg = pool;
554
- const ready = initSchema2(pg);
799
+ const rawDimensions = options?.dimensions ?? DEFAULT_DIMENSIONS;
800
+ if (!Number.isInteger(rawDimensions) || rawDimensions < 1 || rawDimensions > 65535) {
801
+ throw new RangeError(
802
+ `[llm-cache] pgvectorStore: dimensions must be a positive integer \u2264 65535, got ${rawDimensions}`
803
+ );
804
+ }
805
+ const dimensions = rawDimensions;
806
+ const ready = initSchema2(pg, dimensions);
555
807
  return {
556
808
  async get(key) {
557
809
  await ready;
@@ -624,6 +876,7 @@ function pgvectorStore(pool) {
624
876
  export {
625
877
  createCache,
626
878
  createEmbedder,
879
+ hnswMemoryStore,
627
880
  memoryStore,
628
881
  pgvectorStore,
629
882
  redisStore,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pravoobi/llm-cache",
3
- "version": "0.1.0",
3
+ "version": "0.3.1",
4
4
  "description": "Semantic caching layer for LLM calls. Deduplicates near-identical prompts using embeddings.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",
@@ -26,7 +26,8 @@
26
26
  "ioredis": ">=5.0.0",
27
27
  "better-sqlite3": ">=9.0.0",
28
28
  "pg": ">=8.0.0",
29
- "@xenova/transformers": ">=2.0.0"
29
+ "@xenova/transformers": ">=2.0.0",
30
+ "hnswlib-node": ">=3.0.0"
30
31
  },
31
32
  "peerDependenciesMeta": {
32
33
  "openai": { "optional": true },
@@ -34,7 +35,8 @@
34
35
  "ioredis": { "optional": true },
35
36
  "better-sqlite3": { "optional": true },
36
37
  "pg": { "optional": true },
37
- "@xenova/transformers": { "optional": true }
38
+ "@xenova/transformers": { "optional": true },
39
+ "hnswlib-node": { "optional": true }
38
40
  },
39
41
  "author": "Venkata Praveen Kumar Velisetty",
40
42
  "repository": {
@@ -58,5 +60,9 @@
58
60
  },
59
61
  "keywords": ["llm", "cache", "semantic", "embeddings", "openai", "anthropic", "ai"],
60
62
  "license": "MIT",
61
- "engines": { "node": ">=18.0.0" }
63
+ "engines": { "node": ">=20.0.0" },
64
+ "publishConfig": {
65
+ "access": "public",
66
+ "provenance": true
67
+ }
62
68
  }