@pravoobi/llm-cache 0.1.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -3
- package/dist/index.d.mts +41 -2
- package/dist/index.d.ts +41 -2
- package/dist/index.js +275 -21
- package/dist/index.mjs +274 -21
- package/package.json +10 -4
package/README.md
CHANGED
|
@@ -60,6 +60,9 @@ npm install better-sqlite3
|
|
|
60
60
|
|
|
61
61
|
# Postgres / pgvector
|
|
62
62
|
npm install pg
|
|
63
|
+
|
|
64
|
+
# In-process ANN index (hnswMemoryStore — for >10k entries without a database)
|
|
65
|
+
npm install hnswlib-node
|
|
63
66
|
```
|
|
64
67
|
|
|
65
68
|
---
|
|
@@ -134,7 +137,50 @@ const result = await cache.wrap(
|
|
|
134
137
|
| `matchedPrompt` | `string?` | The original prompt that was matched (semantic hits only) |
|
|
135
138
|
| `namespace` | `string?` | The namespace used for this call |
|
|
136
139
|
|
|
137
|
-
> **Streaming
|
|
140
|
+
> **Streaming:** Use `wrapStream()` for streaming LLM calls — see below. Passing a stream directly to `wrap()` will throw.
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
### `cache.wrapStream(prompt, fn, options?)`
|
|
145
|
+
|
|
146
|
+
For streaming LLM responses. Yields chunks to the caller in real-time while assembling the full response for the cache in the background. On a cache hit, replays the cached response as a synthetic stream so the caller always gets an `AsyncIterable<T>` regardless of hit or miss.
|
|
147
|
+
|
|
148
|
+
Returns `{ stream: AsyncIterable<T>, result: Promise<StreamCacheResult> }`.
|
|
149
|
+
|
|
150
|
+
```ts
|
|
151
|
+
const { stream, result } = cache.wrapStream(
|
|
152
|
+
prompt,
|
|
153
|
+
() => openai.chat.completions.create({ stream: true, ... }),
|
|
154
|
+
{
|
|
155
|
+
// Collapse provider-specific chunk shape into the cached value
|
|
156
|
+
assemble: (chunks) =>
|
|
157
|
+
chunks.map(c => c.choices[0]?.delta.content ?? '').join(''),
|
|
158
|
+
// Replay the cached string as a single chunk on a hit
|
|
159
|
+
reconstruct: async function* (text) {
|
|
160
|
+
yield { choices: [{ delta: { content: text } }] }
|
|
161
|
+
},
|
|
162
|
+
// All CacheOptions (threshold, ttl, namespace, context, bypass) work here too
|
|
163
|
+
}
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
for await (const chunk of stream) {
|
|
167
|
+
process.stdout.write(chunk.choices[0]?.delta.content ?? '')
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const { hit, layer, similarity } = await result // resolves after stream ends
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
**`StreamCacheResult`**:
|
|
174
|
+
|
|
175
|
+
| Field | Type | Description |
|
|
176
|
+
|---|---|---|
|
|
177
|
+
| `hit` | `boolean` | Whether it was served from cache |
|
|
178
|
+
| `layer` | `"exact" \| "semantic" \| "miss"` | Which cache layer matched |
|
|
179
|
+
| `similarity` | `number?` | Cosine similarity score (semantic hits only) |
|
|
180
|
+
| `matchedPrompt` | `string?` | The original prompt matched (semantic hits only) |
|
|
181
|
+
| `namespace` | `string?` | The namespace used for this call |
|
|
182
|
+
|
|
183
|
+
If `assemble` / `reconstruct` are omitted, string chunks are joined by default and the assembled string is replayed as a single chunk on a hit.
|
|
138
184
|
|
|
139
185
|
---
|
|
140
186
|
|
|
@@ -250,7 +296,23 @@ createCache({ embedder: ..., store: memoryStore() })
|
|
|
250
296
|
// or just omit `store` — memory is the default
|
|
251
297
|
```
|
|
252
298
|
|
|
253
|
-
Not persistent across restarts. Suitable for single-process, development, or short-lived workloads.
|
|
299
|
+
Not persistent across restarts. Suitable for single-process, development, or short-lived workloads. Uses O(n) linear scan for similarity search — switch to `hnswMemoryStore` when entry count exceeds ~10k.
|
|
300
|
+
|
|
301
|
+
### In-memory with ANN index (hnswMemoryStore)
|
|
302
|
+
|
|
303
|
+
Drop-in replacement for `memoryStore()` that uses an [HNSW](https://github.com/nmslib/hnswlib) index for O(log n) similarity search. No database required.
|
|
304
|
+
|
|
305
|
+
```ts
|
|
306
|
+
// Requires: npm install hnswlib-node
|
|
307
|
+
import { createCache, hnswMemoryStore } from '@pravoobi/llm-cache'
|
|
308
|
+
|
|
309
|
+
createCache({ embedder: ..., store: hnswMemoryStore() })
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
- Index is created lazily on first `set()` — dimension detected automatically
|
|
313
|
+
- One index per namespace, so namespace isolation has no search overhead
|
|
314
|
+
- Automatically resizes when capacity is exceeded
|
|
315
|
+
- Not persistent across restarts
|
|
254
316
|
|
|
255
317
|
### Redis
|
|
256
318
|
|
|
@@ -281,11 +343,21 @@ import { Pool } from 'pg'
|
|
|
281
343
|
import { createCache, pgvectorStore } from '@pravoobi/llm-cache'
|
|
282
344
|
|
|
283
345
|
const pool = new Pool({ connectionString: process.env.DATABASE_URL })
|
|
346
|
+
|
|
347
|
+
// Default dimension (1536) — OpenAI text-embedding-3-small/large, ada-002
|
|
284
348
|
createCache({ embedder: ..., store: pgvectorStore(pool) })
|
|
349
|
+
|
|
350
|
+
// Cohere embed-english-v3.0
|
|
351
|
+
createCache({ embedder: ..., store: pgvectorStore(pool, { dimensions: 1024 }) })
|
|
352
|
+
|
|
353
|
+
// Local model (Xenova/all-MiniLM-L6-v2)
|
|
354
|
+
createCache({ embedder: ..., store: pgvectorStore(pool, { dimensions: 384 }) })
|
|
285
355
|
```
|
|
286
356
|
|
|
287
357
|
Requires the [`pgvector`](https://github.com/pgvector/pgvector) Postgres extension. Best for multi-process, high-traffic production use. Uses native ANN similarity search via `ivfflat`.
|
|
288
358
|
|
|
359
|
+
> **Changing dimensions on an existing table:** `CREATE TABLE IF NOT EXISTS` will not alter an existing column type. If you switch embedding models, run a migration (`ALTER TABLE llm_cache ALTER COLUMN embedding TYPE vector(1024)`) and rebuild the index before updating `dimensions`.
|
|
360
|
+
|
|
289
361
|
---
|
|
290
362
|
|
|
291
363
|
## Namespace and context scoping
|
|
@@ -329,7 +401,7 @@ Embedding costs (e.g., `text-embedding-3-small` at $0.02/million tokens) are neg
|
|
|
329
401
|
- **Highly personalized responses** — If the correct answer genuinely depends on who is asking, use per-user namespaces carefully or disable caching.
|
|
330
402
|
- **Creative or stochastic tasks** — Caching "Write me a poem about autumn" means every user gets the same poem.
|
|
331
403
|
- **Short TTLs with fast-changing data** — If your data changes faster than your TTL, stale hits cause more harm than cost savings justify.
|
|
332
|
-
- **
|
|
404
|
+
- **Truly unique streaming responses** — `wrapStream()` assembles and caches the response after the stream ends. If every prompt is unique and never repeated, you pay assembly overhead with no cache benefit; consider `bypass: true` for those calls.
|
|
333
405
|
|
|
334
406
|
---
|
|
335
407
|
|
package/dist/index.d.mts
CHANGED
|
@@ -10,6 +10,10 @@ interface StoreAdapter {
|
|
|
10
10
|
delete(key: string): Promise<void>;
|
|
11
11
|
listEmbeddings(namespace?: string): Promise<EmbeddingRecord[]>;
|
|
12
12
|
close?(): Promise<void>;
|
|
13
|
+
searchSimilar?(query: number[], threshold: number, namespace?: string): Promise<{
|
|
14
|
+
record: EmbeddingRecord;
|
|
15
|
+
similarity: number;
|
|
16
|
+
} | null>;
|
|
13
17
|
}
|
|
14
18
|
interface EmbeddingRecord {
|
|
15
19
|
key: string;
|
|
@@ -49,9 +53,24 @@ interface LLMCacheConfig {
|
|
|
49
53
|
onMiss?: (prompt: string) => void;
|
|
50
54
|
onError?: (err: Error) => void;
|
|
51
55
|
}
|
|
56
|
+
interface CacheStreamOptions<T> extends CacheOptions {
|
|
57
|
+
assemble?: (chunks: T[]) => unknown;
|
|
58
|
+
reconstruct?: (cached: unknown) => AsyncIterable<T>;
|
|
59
|
+
}
|
|
60
|
+
interface StreamCacheResult {
|
|
61
|
+
hit: boolean;
|
|
62
|
+
layer: 'exact' | 'semantic' | 'miss';
|
|
63
|
+
similarity?: number;
|
|
64
|
+
matchedPrompt?: string;
|
|
65
|
+
namespace?: string;
|
|
66
|
+
}
|
|
52
67
|
|
|
53
68
|
declare function createCache(config: LLMCacheConfig): {
|
|
54
69
|
wrap: <T>(prompt: string, fn: () => Promise<T>, options?: CacheOptions) => Promise<CacheResult<T>>;
|
|
70
|
+
wrapStream: <T>(prompt: string, fn: () => AsyncIterable<T>, options?: CacheStreamOptions<T>) => {
|
|
71
|
+
stream: AsyncIterable<T>;
|
|
72
|
+
result: Promise<StreamCacheResult>;
|
|
73
|
+
};
|
|
55
74
|
invalidate: (prompt: string, options?: Pick<CacheOptions, "namespace" | "context">) => Promise<void>;
|
|
56
75
|
flush: (namespace?: string) => Promise<void>;
|
|
57
76
|
stats: () => {
|
|
@@ -66,10 +85,30 @@ declare function createEmbedder(config: EmbedderConfig): EmbedFn;
|
|
|
66
85
|
|
|
67
86
|
declare function memoryStore(): StoreAdapter;
|
|
68
87
|
|
|
88
|
+
interface HnswIndex {
|
|
89
|
+
initIndex(maxElements: number, efConstruction?: number, m?: number): void;
|
|
90
|
+
addPoint(point: number[], label: number): void;
|
|
91
|
+
markDelete(label: number): void;
|
|
92
|
+
searchKnn(query: number[], k: number): {
|
|
93
|
+
neighbors: number[];
|
|
94
|
+
distances: number[];
|
|
95
|
+
};
|
|
96
|
+
getCurrentCount(): number;
|
|
97
|
+
getMaxElements(): number;
|
|
98
|
+
resizeIndex(newSize: number): void;
|
|
99
|
+
}
|
|
100
|
+
interface HnswLib {
|
|
101
|
+
HierarchicalNSW: new (space: string, dim: number) => HnswIndex;
|
|
102
|
+
}
|
|
103
|
+
declare function hnswMemoryStore(injectedLib?: HnswLib): StoreAdapter;
|
|
104
|
+
|
|
69
105
|
declare function redisStore(client: unknown): StoreAdapter;
|
|
70
106
|
|
|
71
107
|
declare function sqliteStore(db: unknown): StoreAdapter;
|
|
72
108
|
|
|
73
|
-
|
|
109
|
+
interface PgVectorStoreOptions {
|
|
110
|
+
dimensions?: number;
|
|
111
|
+
}
|
|
112
|
+
declare function pgvectorStore(pool: unknown, options?: PgVectorStoreOptions): StoreAdapter;
|
|
74
113
|
|
|
75
|
-
export { type CacheEntry, type CacheOptions, type CacheResult, type EmbedFn, type EmbedderConfig, type EmbeddingRecord, type LLMCacheConfig, type StoreAdapter, createCache, createEmbedder, memoryStore, pgvectorStore, redisStore, sqliteStore };
|
|
114
|
+
export { type CacheEntry, type CacheOptions, type CacheResult, type CacheStreamOptions, type EmbedFn, type EmbedderConfig, type EmbeddingRecord, type LLMCacheConfig, type PgVectorStoreOptions, type StoreAdapter, type StreamCacheResult, createCache, createEmbedder, hnswMemoryStore, memoryStore, pgvectorStore, redisStore, sqliteStore };
|
package/dist/index.d.ts
CHANGED
|
@@ -10,6 +10,10 @@ interface StoreAdapter {
|
|
|
10
10
|
delete(key: string): Promise<void>;
|
|
11
11
|
listEmbeddings(namespace?: string): Promise<EmbeddingRecord[]>;
|
|
12
12
|
close?(): Promise<void>;
|
|
13
|
+
searchSimilar?(query: number[], threshold: number, namespace?: string): Promise<{
|
|
14
|
+
record: EmbeddingRecord;
|
|
15
|
+
similarity: number;
|
|
16
|
+
} | null>;
|
|
13
17
|
}
|
|
14
18
|
interface EmbeddingRecord {
|
|
15
19
|
key: string;
|
|
@@ -49,9 +53,24 @@ interface LLMCacheConfig {
|
|
|
49
53
|
onMiss?: (prompt: string) => void;
|
|
50
54
|
onError?: (err: Error) => void;
|
|
51
55
|
}
|
|
56
|
+
interface CacheStreamOptions<T> extends CacheOptions {
|
|
57
|
+
assemble?: (chunks: T[]) => unknown;
|
|
58
|
+
reconstruct?: (cached: unknown) => AsyncIterable<T>;
|
|
59
|
+
}
|
|
60
|
+
interface StreamCacheResult {
|
|
61
|
+
hit: boolean;
|
|
62
|
+
layer: 'exact' | 'semantic' | 'miss';
|
|
63
|
+
similarity?: number;
|
|
64
|
+
matchedPrompt?: string;
|
|
65
|
+
namespace?: string;
|
|
66
|
+
}
|
|
52
67
|
|
|
53
68
|
declare function createCache(config: LLMCacheConfig): {
|
|
54
69
|
wrap: <T>(prompt: string, fn: () => Promise<T>, options?: CacheOptions) => Promise<CacheResult<T>>;
|
|
70
|
+
wrapStream: <T>(prompt: string, fn: () => AsyncIterable<T>, options?: CacheStreamOptions<T>) => {
|
|
71
|
+
stream: AsyncIterable<T>;
|
|
72
|
+
result: Promise<StreamCacheResult>;
|
|
73
|
+
};
|
|
55
74
|
invalidate: (prompt: string, options?: Pick<CacheOptions, "namespace" | "context">) => Promise<void>;
|
|
56
75
|
flush: (namespace?: string) => Promise<void>;
|
|
57
76
|
stats: () => {
|
|
@@ -66,10 +85,30 @@ declare function createEmbedder(config: EmbedderConfig): EmbedFn;
|
|
|
66
85
|
|
|
67
86
|
declare function memoryStore(): StoreAdapter;
|
|
68
87
|
|
|
88
|
+
interface HnswIndex {
|
|
89
|
+
initIndex(maxElements: number, efConstruction?: number, m?: number): void;
|
|
90
|
+
addPoint(point: number[], label: number): void;
|
|
91
|
+
markDelete(label: number): void;
|
|
92
|
+
searchKnn(query: number[], k: number): {
|
|
93
|
+
neighbors: number[];
|
|
94
|
+
distances: number[];
|
|
95
|
+
};
|
|
96
|
+
getCurrentCount(): number;
|
|
97
|
+
getMaxElements(): number;
|
|
98
|
+
resizeIndex(newSize: number): void;
|
|
99
|
+
}
|
|
100
|
+
interface HnswLib {
|
|
101
|
+
HierarchicalNSW: new (space: string, dim: number) => HnswIndex;
|
|
102
|
+
}
|
|
103
|
+
declare function hnswMemoryStore(injectedLib?: HnswLib): StoreAdapter;
|
|
104
|
+
|
|
69
105
|
declare function redisStore(client: unknown): StoreAdapter;
|
|
70
106
|
|
|
71
107
|
declare function sqliteStore(db: unknown): StoreAdapter;
|
|
72
108
|
|
|
73
|
-
|
|
109
|
+
interface PgVectorStoreOptions {
|
|
110
|
+
dimensions?: number;
|
|
111
|
+
}
|
|
112
|
+
declare function pgvectorStore(pool: unknown, options?: PgVectorStoreOptions): StoreAdapter;
|
|
74
113
|
|
|
75
|
-
export { type CacheEntry, type CacheOptions, type CacheResult, type EmbedFn, type EmbedderConfig, type EmbeddingRecord, type LLMCacheConfig, type StoreAdapter, createCache, createEmbedder, memoryStore, pgvectorStore, redisStore, sqliteStore };
|
|
114
|
+
export { type CacheEntry, type CacheOptions, type CacheResult, type CacheStreamOptions, type EmbedFn, type EmbedderConfig, type EmbeddingRecord, type LLMCacheConfig, type PgVectorStoreOptions, type StoreAdapter, type StreamCacheResult, createCache, createEmbedder, hnswMemoryStore, memoryStore, pgvectorStore, redisStore, sqliteStore };
|
package/dist/index.js
CHANGED
|
@@ -22,6 +22,7 @@ var index_exports = {};
|
|
|
22
22
|
__export(index_exports, {
|
|
23
23
|
createCache: () => createCache,
|
|
24
24
|
createEmbedder: () => createEmbedder,
|
|
25
|
+
hnswMemoryStore: () => hnswMemoryStore,
|
|
25
26
|
memoryStore: () => memoryStore,
|
|
26
27
|
pgvectorStore: () => pgvectorStore,
|
|
27
28
|
redisStore: () => redisStore,
|
|
@@ -174,7 +175,7 @@ function isExpired(entry) {
|
|
|
174
175
|
return Date.now() > entry.expiresAt;
|
|
175
176
|
}
|
|
176
177
|
function computeExpiresAt(ttlSeconds) {
|
|
177
|
-
if (ttlSeconds
|
|
178
|
+
if (ttlSeconds <= 0) return void 0;
|
|
178
179
|
return Date.now() + ttlSeconds * 1e3;
|
|
179
180
|
}
|
|
180
181
|
|
|
@@ -250,7 +251,7 @@ function cosineSimilarity(a, b) {
|
|
|
250
251
|
function findBestMatch(query, records, threshold) {
|
|
251
252
|
if (records.length > 1e4) {
|
|
252
253
|
console.warn(
|
|
253
|
-
`[llm-cache] Scanning ${records.length} embeddings
|
|
254
|
+
`[llm-cache] Scanning ${records.length} embeddings with O(n) linear search. Use hnswMemoryStore() for fast in-process ANN, or pgvector for multi-process deployments.`
|
|
254
255
|
);
|
|
255
256
|
}
|
|
256
257
|
let bestSimilarity = -Infinity;
|
|
@@ -301,7 +302,7 @@ function createCache(config) {
|
|
|
301
302
|
if (namespace !== void 0) lifetime.seenNamespaces.add(namespace);
|
|
302
303
|
const normalized = normalizePrompt(prompt);
|
|
303
304
|
const key = hashPrompt(namespace, context, normalized);
|
|
304
|
-
const embeddingNamespace = context !== void 0 ?
|
|
305
|
+
const embeddingNamespace = context !== void 0 ? JSON.stringify([namespace ?? "", context]) : namespace;
|
|
305
306
|
try {
|
|
306
307
|
const cached = await store.get(key);
|
|
307
308
|
if (cached !== null) {
|
|
@@ -323,11 +324,9 @@ function createCache(config) {
|
|
|
323
324
|
return { value: value2, hit: false, layer: "miss" };
|
|
324
325
|
}
|
|
325
326
|
let embedding;
|
|
326
|
-
let records;
|
|
327
327
|
try {
|
|
328
328
|
const raw = await embed(normalized);
|
|
329
329
|
embedding = Array.from(raw);
|
|
330
|
-
records = await store.listEmbeddings(embeddingNamespace);
|
|
331
330
|
} catch (err) {
|
|
332
331
|
config.onError?.(err instanceof Error ? err : new Error(String(err)));
|
|
333
332
|
lifetime.misses++;
|
|
@@ -336,7 +335,7 @@ function createCache(config) {
|
|
|
336
335
|
return { value: value2, hit: false, layer: "miss" };
|
|
337
336
|
}
|
|
338
337
|
try {
|
|
339
|
-
const match = findBestMatch(embedding,
|
|
338
|
+
const match = typeof store.searchSimilar === "function" ? await store.searchSimilar(embedding, threshold, embeddingNamespace) : findBestMatch(embedding, await store.listEmbeddings(embeddingNamespace), threshold);
|
|
340
339
|
if (match !== null) {
|
|
341
340
|
const matchedEntry = await store.get(match.record.key);
|
|
342
341
|
if (matchedEntry !== null) {
|
|
@@ -360,9 +359,9 @@ function createCache(config) {
|
|
|
360
359
|
lifetime.misses++;
|
|
361
360
|
config.onMiss?.(prompt);
|
|
362
361
|
const value = await fn();
|
|
363
|
-
if (value instanceof ReadableStream || typeof value === "object" && value !== null &&
|
|
362
|
+
if (value instanceof ReadableStream || typeof value === "object" && value !== null && Symbol.asyncIterator in value) {
|
|
364
363
|
throw new Error(
|
|
365
|
-
"[llm-cache] Streaming responses cannot be cached.
|
|
364
|
+
"[llm-cache] Streaming responses cannot be cached via wrap(). Use wrapStream() for streaming LLM calls, or collect the full response before passing fn() to wrap()."
|
|
366
365
|
);
|
|
367
366
|
}
|
|
368
367
|
const now = Date.now();
|
|
@@ -389,6 +388,120 @@ function createCache(config) {
|
|
|
389
388
|
...namespace !== void 0 ? { namespace } : {}
|
|
390
389
|
};
|
|
391
390
|
}
|
|
391
|
+
function defaultAssemble(chunks) {
|
|
392
|
+
if (chunks.length > 0 && chunks.every((c) => typeof c === "string")) {
|
|
393
|
+
return chunks.join("");
|
|
394
|
+
}
|
|
395
|
+
return chunks;
|
|
396
|
+
}
|
|
397
|
+
async function* defaultReconstruct(cached) {
|
|
398
|
+
yield cached;
|
|
399
|
+
}
|
|
400
|
+
function wrapStream(prompt, fn, options) {
|
|
401
|
+
const assemble = options?.assemble ?? defaultAssemble;
|
|
402
|
+
const reconstruct = options?.reconstruct ?? defaultReconstruct;
|
|
403
|
+
let resolveResult;
|
|
404
|
+
const result = new Promise((res) => {
|
|
405
|
+
resolveResult = res;
|
|
406
|
+
});
|
|
407
|
+
async function* generate() {
|
|
408
|
+
if (options?.bypass === true) {
|
|
409
|
+
yield* fn();
|
|
410
|
+
resolveResult({ hit: false, layer: "miss" });
|
|
411
|
+
return;
|
|
412
|
+
}
|
|
413
|
+
const namespace = options?.namespace;
|
|
414
|
+
const context = options?.context;
|
|
415
|
+
const threshold = options?.threshold ?? globalThreshold;
|
|
416
|
+
const ttl = options?.ttl ?? globalTtl;
|
|
417
|
+
if (namespace !== void 0) lifetime.seenNamespaces.add(namespace);
|
|
418
|
+
const normalized = normalizePrompt(prompt);
|
|
419
|
+
const key = hashPrompt(namespace, context, normalized);
|
|
420
|
+
const embeddingNamespace = context !== void 0 ? `${namespace ?? ""}__ctx__${context}` : namespace;
|
|
421
|
+
try {
|
|
422
|
+
const cached = await store.get(key);
|
|
423
|
+
if (cached !== null) {
|
|
424
|
+
lifetime.hits++;
|
|
425
|
+
const streamResult = {
|
|
426
|
+
hit: true,
|
|
427
|
+
layer: "exact",
|
|
428
|
+
...namespace !== void 0 ? { namespace } : {}
|
|
429
|
+
};
|
|
430
|
+
config.onHit?.({ ...streamResult, value: cached.response });
|
|
431
|
+
resolveResult(streamResult);
|
|
432
|
+
yield* reconstruct(cached.response);
|
|
433
|
+
return;
|
|
434
|
+
}
|
|
435
|
+
} catch (err) {
|
|
436
|
+
config.onError?.(err instanceof Error ? err : new Error(String(err)));
|
|
437
|
+
lifetime.misses++;
|
|
438
|
+
config.onMiss?.(prompt);
|
|
439
|
+
yield* fn();
|
|
440
|
+
resolveResult({ hit: false, layer: "miss" });
|
|
441
|
+
return;
|
|
442
|
+
}
|
|
443
|
+
let embedding;
|
|
444
|
+
try {
|
|
445
|
+
const raw = await embed(normalized);
|
|
446
|
+
embedding = Array.from(raw);
|
|
447
|
+
} catch (err) {
|
|
448
|
+
config.onError?.(err instanceof Error ? err : new Error(String(err)));
|
|
449
|
+
lifetime.misses++;
|
|
450
|
+
config.onMiss?.(prompt);
|
|
451
|
+
yield* fn();
|
|
452
|
+
resolveResult({ hit: false, layer: "miss" });
|
|
453
|
+
return;
|
|
454
|
+
}
|
|
455
|
+
try {
|
|
456
|
+
const match = typeof store.searchSimilar === "function" ? await store.searchSimilar(embedding, threshold, embeddingNamespace) : findBestMatch(embedding, await store.listEmbeddings(embeddingNamespace), threshold);
|
|
457
|
+
if (match !== null) {
|
|
458
|
+
const matchedEntry = await store.get(match.record.key);
|
|
459
|
+
if (matchedEntry !== null) {
|
|
460
|
+
lifetime.hits++;
|
|
461
|
+
lifetime.similarities.push(match.similarity);
|
|
462
|
+
const streamResult = {
|
|
463
|
+
hit: true,
|
|
464
|
+
layer: "semantic",
|
|
465
|
+
similarity: match.similarity,
|
|
466
|
+
matchedPrompt: matchedEntry.prompt,
|
|
467
|
+
...namespace !== void 0 ? { namespace } : {}
|
|
468
|
+
};
|
|
469
|
+
config.onHit?.({ ...streamResult, value: matchedEntry.response });
|
|
470
|
+
resolveResult(streamResult);
|
|
471
|
+
yield* reconstruct(matchedEntry.response);
|
|
472
|
+
return;
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
} catch (err) {
|
|
476
|
+
config.onError?.(err instanceof Error ? err : new Error(String(err)));
|
|
477
|
+
}
|
|
478
|
+
lifetime.misses++;
|
|
479
|
+
config.onMiss?.(prompt);
|
|
480
|
+
const chunks = [];
|
|
481
|
+
for await (const chunk of fn()) {
|
|
482
|
+
chunks.push(chunk);
|
|
483
|
+
yield chunk;
|
|
484
|
+
}
|
|
485
|
+
const assembled = assemble(chunks);
|
|
486
|
+
const now = Date.now();
|
|
487
|
+
const expiresAt = ttl !== void 0 ? computeExpiresAt(ttl) : void 0;
|
|
488
|
+
const entry = {
|
|
489
|
+
prompt: normalized,
|
|
490
|
+
response: assembled,
|
|
491
|
+
embedding,
|
|
492
|
+
createdAt: now,
|
|
493
|
+
...embeddingNamespace !== void 0 ? { namespace: embeddingNamespace } : {},
|
|
494
|
+
...expiresAt !== void 0 ? { expiresAt } : {}
|
|
495
|
+
};
|
|
496
|
+
try {
|
|
497
|
+
await store.set(key, entry, ttl);
|
|
498
|
+
} catch (err) {
|
|
499
|
+
config.onError?.(err instanceof Error ? err : new Error(String(err)));
|
|
500
|
+
}
|
|
501
|
+
resolveResult({ hit: false, layer: "miss", ...namespace !== void 0 ? { namespace } : {} });
|
|
502
|
+
}
|
|
503
|
+
return { stream: generate(), result };
|
|
504
|
+
}
|
|
392
505
|
async function invalidate(prompt, options) {
|
|
393
506
|
const normalized = normalizePrompt(prompt);
|
|
394
507
|
const key = hashPrompt(options?.namespace, options?.context, normalized);
|
|
@@ -414,7 +527,128 @@ function createCache(config) {
|
|
|
414
527
|
avgSimilarity
|
|
415
528
|
};
|
|
416
529
|
}
|
|
417
|
-
return { wrap, invalidate, flush, stats: getStats };
|
|
530
|
+
return { wrap, wrapStream, invalidate, flush, stats: getStats };
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
// src/stores/hnsw-memory.ts
|
|
534
|
+
var INITIAL_CAPACITY = 1024;
|
|
535
|
+
async function loadHnswLib() {
|
|
536
|
+
try {
|
|
537
|
+
return await new Function("m", "return import(m)")("hnswlib-node");
|
|
538
|
+
} catch {
|
|
539
|
+
throw new Error(
|
|
540
|
+
"[llm-cache] hnswMemoryStore requires hnswlib-node: npm install hnswlib-node"
|
|
541
|
+
);
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
function hnswMemoryStore(injectedLib) {
|
|
545
|
+
const entries = /* @__PURE__ */ new Map();
|
|
546
|
+
const embeddingRecords = /* @__PURE__ */ new Map();
|
|
547
|
+
const nsIndices = /* @__PURE__ */ new Map();
|
|
548
|
+
let dimension = null;
|
|
549
|
+
let libPromise = injectedLib ? Promise.resolve(injectedLib) : null;
|
|
550
|
+
function getLib() {
|
|
551
|
+
if (!libPromise) libPromise = loadHnswLib();
|
|
552
|
+
return libPromise;
|
|
553
|
+
}
|
|
554
|
+
function getOrCreateNsIndex(lib, ns, dim) {
|
|
555
|
+
let nsIdx = nsIndices.get(ns);
|
|
556
|
+
if (nsIdx === void 0) {
|
|
557
|
+
const index = new lib.HierarchicalNSW("cosine", dim);
|
|
558
|
+
index.initIndex(INITIAL_CAPACITY);
|
|
559
|
+
nsIdx = { index, keyToLabel: /* @__PURE__ */ new Map(), labelToKey: /* @__PURE__ */ new Map(), nextLabel: 0, maxElements: INITIAL_CAPACITY };
|
|
560
|
+
nsIndices.set(ns, nsIdx);
|
|
561
|
+
}
|
|
562
|
+
return nsIdx;
|
|
563
|
+
}
|
|
564
|
+
function nsKey(namespace) {
|
|
565
|
+
return namespace ?? "__default__";
|
|
566
|
+
}
|
|
567
|
+
const self = {
|
|
568
|
+
async get(key) {
|
|
569
|
+
const entry = entries.get(key);
|
|
570
|
+
if (!entry) return null;
|
|
571
|
+
if (isExpired(entry)) {
|
|
572
|
+
await self.delete(key);
|
|
573
|
+
return null;
|
|
574
|
+
}
|
|
575
|
+
return entry;
|
|
576
|
+
},
|
|
577
|
+
async set(key, entry, _ttlSeconds) {
|
|
578
|
+
const lib = await getLib();
|
|
579
|
+
if (dimension === null) dimension = entry.embedding.length;
|
|
580
|
+
const ns = nsKey(entry.namespace);
|
|
581
|
+
const nsIdx = getOrCreateNsIndex(lib, ns, dimension);
|
|
582
|
+
const existingLabel = nsIdx.keyToLabel.get(key);
|
|
583
|
+
if (existingLabel !== void 0) {
|
|
584
|
+
try {
|
|
585
|
+
nsIdx.index.markDelete(existingLabel);
|
|
586
|
+
} catch {
|
|
587
|
+
}
|
|
588
|
+
nsIdx.labelToKey.delete(existingLabel);
|
|
589
|
+
}
|
|
590
|
+
if (nsIdx.nextLabel >= nsIdx.maxElements) {
|
|
591
|
+
nsIdx.maxElements *= 2;
|
|
592
|
+
nsIdx.index.resizeIndex(nsIdx.maxElements);
|
|
593
|
+
}
|
|
594
|
+
const label = nsIdx.nextLabel++;
|
|
595
|
+
nsIdx.index.addPoint(entry.embedding, label);
|
|
596
|
+
nsIdx.keyToLabel.set(key, label);
|
|
597
|
+
nsIdx.labelToKey.set(label, key);
|
|
598
|
+
entries.set(key, entry);
|
|
599
|
+
embeddingRecords.set(key, {
|
|
600
|
+
key,
|
|
601
|
+
embedding: entry.embedding,
|
|
602
|
+
createdAt: entry.createdAt,
|
|
603
|
+
...entry.namespace !== void 0 ? { namespace: entry.namespace } : {}
|
|
604
|
+
});
|
|
605
|
+
},
|
|
606
|
+
async delete(key) {
|
|
607
|
+
const entry = entries.get(key);
|
|
608
|
+
if (entry) {
|
|
609
|
+
const nsIdx = nsIndices.get(nsKey(entry.namespace));
|
|
610
|
+
if (nsIdx) {
|
|
611
|
+
const label = nsIdx.keyToLabel.get(key);
|
|
612
|
+
if (label !== void 0) {
|
|
613
|
+
try {
|
|
614
|
+
nsIdx.index.markDelete(label);
|
|
615
|
+
} catch {
|
|
616
|
+
}
|
|
617
|
+
nsIdx.keyToLabel.delete(key);
|
|
618
|
+
nsIdx.labelToKey.delete(label);
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
entries.delete(key);
|
|
623
|
+
embeddingRecords.delete(key);
|
|
624
|
+
},
|
|
625
|
+
async listEmbeddings(namespace) {
|
|
626
|
+
const all = Array.from(embeddingRecords.values());
|
|
627
|
+
return namespace === void 0 ? all : all.filter((r) => r.namespace === namespace);
|
|
628
|
+
},
|
|
629
|
+
async searchSimilar(query, threshold, namespace) {
|
|
630
|
+
const nsIdx = nsIndices.get(nsKey(namespace));
|
|
631
|
+
if (!nsIdx || nsIdx.index.getCurrentCount() === 0) return null;
|
|
632
|
+
const { neighbors, distances } = nsIdx.index.searchKnn(query, 1);
|
|
633
|
+
const label = neighbors[0];
|
|
634
|
+
const distance = distances[0];
|
|
635
|
+
if (label === void 0 || distance === void 0) return null;
|
|
636
|
+
const similarity = 1 - distance;
|
|
637
|
+
if (similarity < threshold) return null;
|
|
638
|
+
const key = nsIdx.labelToKey.get(label);
|
|
639
|
+
if (!key) return null;
|
|
640
|
+
const entry = entries.get(key);
|
|
641
|
+
if (!entry) return null;
|
|
642
|
+
if (isExpired(entry)) {
|
|
643
|
+
await self.delete(key);
|
|
644
|
+
return null;
|
|
645
|
+
}
|
|
646
|
+
const record = embeddingRecords.get(key);
|
|
647
|
+
if (!record) return null;
|
|
648
|
+
return { record, similarity };
|
|
649
|
+
}
|
|
650
|
+
};
|
|
651
|
+
return self;
|
|
418
652
|
}
|
|
419
653
|
|
|
420
654
|
// src/utils/validate.ts
|
|
@@ -424,6 +658,12 @@ function assertCacheEntry(val, source) {
|
|
|
424
658
|
}
|
|
425
659
|
return val;
|
|
426
660
|
}
|
|
661
|
+
function assertEmbeddingRecord(val, source) {
|
|
662
|
+
if (typeof val !== "object" || val === null || typeof val["key"] !== "string" || !Array.isArray(val["embedding"]) || typeof val["createdAt"] !== "number") {
|
|
663
|
+
throw new Error(`[llm-cache] Invalid embedding record shape from ${source}`);
|
|
664
|
+
}
|
|
665
|
+
return val;
|
|
666
|
+
}
|
|
427
667
|
|
|
428
668
|
// src/stores/redis.ts
|
|
429
669
|
var ENTRY_PREFIX = "llm-cache:entry:";
|
|
@@ -471,7 +711,7 @@ function redisStore(client) {
|
|
|
471
711
|
async listEmbeddings(namespace) {
|
|
472
712
|
const hash = await redis.hgetall(nsHashKey(namespace));
|
|
473
713
|
if (!hash) return [];
|
|
474
|
-
return Object.values(hash).map((v) => JSON.parse(v));
|
|
714
|
+
return Object.values(hash).map((v) => assertEmbeddingRecord(JSON.parse(v), "redis"));
|
|
475
715
|
},
|
|
476
716
|
async close() {
|
|
477
717
|
await redis.quit();
|
|
@@ -545,26 +785,32 @@ function sqliteStore(db) {
|
|
|
545
785
|
},
|
|
546
786
|
async listEmbeddings(namespace) {
|
|
547
787
|
const rows = namespace !== void 0 ? stmtListByNs.all(namespace) : stmtListAll.all();
|
|
548
|
-
return rows.map((row) =>
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
788
|
+
return rows.map((row) => {
|
|
789
|
+
const parsed = assertEmbeddingRecord(
|
|
790
|
+
{
|
|
791
|
+
key: row.key,
|
|
792
|
+
embedding: JSON.parse(row.embedding),
|
|
793
|
+
createdAt: row.created_at,
|
|
794
|
+
...row.namespace !== null ? { namespace: row.namespace } : {}
|
|
795
|
+
},
|
|
796
|
+
"sqlite"
|
|
797
|
+
);
|
|
798
|
+
return parsed;
|
|
799
|
+
});
|
|
554
800
|
}
|
|
555
801
|
};
|
|
556
802
|
}
|
|
557
803
|
|
|
558
804
|
// src/stores/pgvector.ts
|
|
559
|
-
var
|
|
560
|
-
async function initSchema2(pool) {
|
|
805
|
+
var DEFAULT_DIMENSIONS = 1536;
|
|
806
|
+
async function initSchema2(pool, dimensions) {
|
|
561
807
|
await pool.query("CREATE EXTENSION IF NOT EXISTS vector");
|
|
562
808
|
await pool.query(`
|
|
563
809
|
CREATE TABLE IF NOT EXISTS llm_cache (
|
|
564
810
|
key TEXT PRIMARY KEY,
|
|
565
811
|
prompt TEXT NOT NULL,
|
|
566
812
|
response JSONB,
|
|
567
|
-
embedding vector(${
|
|
813
|
+
embedding vector(${dimensions}),
|
|
568
814
|
namespace TEXT,
|
|
569
815
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
570
816
|
expires_at TIMESTAMPTZ
|
|
@@ -580,9 +826,16 @@ async function initSchema2(pool) {
|
|
|
580
826
|
function parseEmbedding(raw) {
|
|
581
827
|
return raw.replace(/^\[/, "").replace(/\]$/, "").split(",").map(Number);
|
|
582
828
|
}
|
|
583
|
-
function pgvectorStore(pool) {
|
|
829
|
+
function pgvectorStore(pool, options) {
|
|
584
830
|
const pg = pool;
|
|
585
|
-
const
|
|
831
|
+
const rawDimensions = options?.dimensions ?? DEFAULT_DIMENSIONS;
|
|
832
|
+
if (!Number.isInteger(rawDimensions) || rawDimensions < 1 || rawDimensions > 65535) {
|
|
833
|
+
throw new RangeError(
|
|
834
|
+
`[llm-cache] pgvectorStore: dimensions must be a positive integer \u2264 65535, got ${rawDimensions}`
|
|
835
|
+
);
|
|
836
|
+
}
|
|
837
|
+
const dimensions = rawDimensions;
|
|
838
|
+
const ready = initSchema2(pg, dimensions);
|
|
586
839
|
return {
|
|
587
840
|
async get(key) {
|
|
588
841
|
await ready;
|
|
@@ -656,6 +909,7 @@ function pgvectorStore(pool) {
|
|
|
656
909
|
0 && (module.exports = {
|
|
657
910
|
createCache,
|
|
658
911
|
createEmbedder,
|
|
912
|
+
hnswMemoryStore,
|
|
659
913
|
memoryStore,
|
|
660
914
|
pgvectorStore,
|
|
661
915
|
redisStore,
|
package/dist/index.mjs
CHANGED
|
@@ -143,7 +143,7 @@ function isExpired(entry) {
|
|
|
143
143
|
return Date.now() > entry.expiresAt;
|
|
144
144
|
}
|
|
145
145
|
function computeExpiresAt(ttlSeconds) {
|
|
146
|
-
if (ttlSeconds
|
|
146
|
+
if (ttlSeconds <= 0) return void 0;
|
|
147
147
|
return Date.now() + ttlSeconds * 1e3;
|
|
148
148
|
}
|
|
149
149
|
|
|
@@ -219,7 +219,7 @@ function cosineSimilarity(a, b) {
|
|
|
219
219
|
function findBestMatch(query, records, threshold) {
|
|
220
220
|
if (records.length > 1e4) {
|
|
221
221
|
console.warn(
|
|
222
|
-
`[llm-cache] Scanning ${records.length} embeddings
|
|
222
|
+
`[llm-cache] Scanning ${records.length} embeddings with O(n) linear search. Use hnswMemoryStore() for fast in-process ANN, or pgvector for multi-process deployments.`
|
|
223
223
|
);
|
|
224
224
|
}
|
|
225
225
|
let bestSimilarity = -Infinity;
|
|
@@ -270,7 +270,7 @@ function createCache(config) {
|
|
|
270
270
|
if (namespace !== void 0) lifetime.seenNamespaces.add(namespace);
|
|
271
271
|
const normalized = normalizePrompt(prompt);
|
|
272
272
|
const key = hashPrompt(namespace, context, normalized);
|
|
273
|
-
const embeddingNamespace = context !== void 0 ?
|
|
273
|
+
const embeddingNamespace = context !== void 0 ? JSON.stringify([namespace ?? "", context]) : namespace;
|
|
274
274
|
try {
|
|
275
275
|
const cached = await store.get(key);
|
|
276
276
|
if (cached !== null) {
|
|
@@ -292,11 +292,9 @@ function createCache(config) {
|
|
|
292
292
|
return { value: value2, hit: false, layer: "miss" };
|
|
293
293
|
}
|
|
294
294
|
let embedding;
|
|
295
|
-
let records;
|
|
296
295
|
try {
|
|
297
296
|
const raw = await embed(normalized);
|
|
298
297
|
embedding = Array.from(raw);
|
|
299
|
-
records = await store.listEmbeddings(embeddingNamespace);
|
|
300
298
|
} catch (err) {
|
|
301
299
|
config.onError?.(err instanceof Error ? err : new Error(String(err)));
|
|
302
300
|
lifetime.misses++;
|
|
@@ -305,7 +303,7 @@ function createCache(config) {
|
|
|
305
303
|
return { value: value2, hit: false, layer: "miss" };
|
|
306
304
|
}
|
|
307
305
|
try {
|
|
308
|
-
const match = findBestMatch(embedding,
|
|
306
|
+
const match = typeof store.searchSimilar === "function" ? await store.searchSimilar(embedding, threshold, embeddingNamespace) : findBestMatch(embedding, await store.listEmbeddings(embeddingNamespace), threshold);
|
|
309
307
|
if (match !== null) {
|
|
310
308
|
const matchedEntry = await store.get(match.record.key);
|
|
311
309
|
if (matchedEntry !== null) {
|
|
@@ -329,9 +327,9 @@ function createCache(config) {
|
|
|
329
327
|
lifetime.misses++;
|
|
330
328
|
config.onMiss?.(prompt);
|
|
331
329
|
const value = await fn();
|
|
332
|
-
if (value instanceof ReadableStream || typeof value === "object" && value !== null &&
|
|
330
|
+
if (value instanceof ReadableStream || typeof value === "object" && value !== null && Symbol.asyncIterator in value) {
|
|
333
331
|
throw new Error(
|
|
334
|
-
"[llm-cache] Streaming responses cannot be cached.
|
|
332
|
+
"[llm-cache] Streaming responses cannot be cached via wrap(). Use wrapStream() for streaming LLM calls, or collect the full response before passing fn() to wrap()."
|
|
335
333
|
);
|
|
336
334
|
}
|
|
337
335
|
const now = Date.now();
|
|
@@ -358,6 +356,120 @@ function createCache(config) {
|
|
|
358
356
|
...namespace !== void 0 ? { namespace } : {}
|
|
359
357
|
};
|
|
360
358
|
}
|
|
359
|
+
function defaultAssemble(chunks) {
|
|
360
|
+
if (chunks.length > 0 && chunks.every((c) => typeof c === "string")) {
|
|
361
|
+
return chunks.join("");
|
|
362
|
+
}
|
|
363
|
+
return chunks;
|
|
364
|
+
}
|
|
365
|
+
async function* defaultReconstruct(cached) {
|
|
366
|
+
yield cached;
|
|
367
|
+
}
|
|
368
|
+
function wrapStream(prompt, fn, options) {
|
|
369
|
+
const assemble = options?.assemble ?? defaultAssemble;
|
|
370
|
+
const reconstruct = options?.reconstruct ?? defaultReconstruct;
|
|
371
|
+
let resolveResult;
|
|
372
|
+
const result = new Promise((res) => {
|
|
373
|
+
resolveResult = res;
|
|
374
|
+
});
|
|
375
|
+
async function* generate() {
|
|
376
|
+
if (options?.bypass === true) {
|
|
377
|
+
yield* fn();
|
|
378
|
+
resolveResult({ hit: false, layer: "miss" });
|
|
379
|
+
return;
|
|
380
|
+
}
|
|
381
|
+
const namespace = options?.namespace;
|
|
382
|
+
const context = options?.context;
|
|
383
|
+
const threshold = options?.threshold ?? globalThreshold;
|
|
384
|
+
const ttl = options?.ttl ?? globalTtl;
|
|
385
|
+
if (namespace !== void 0) lifetime.seenNamespaces.add(namespace);
|
|
386
|
+
const normalized = normalizePrompt(prompt);
|
|
387
|
+
const key = hashPrompt(namespace, context, normalized);
|
|
388
|
+
const embeddingNamespace = context !== void 0 ? `${namespace ?? ""}__ctx__${context}` : namespace;
|
|
389
|
+
try {
|
|
390
|
+
const cached = await store.get(key);
|
|
391
|
+
if (cached !== null) {
|
|
392
|
+
lifetime.hits++;
|
|
393
|
+
const streamResult = {
|
|
394
|
+
hit: true,
|
|
395
|
+
layer: "exact",
|
|
396
|
+
...namespace !== void 0 ? { namespace } : {}
|
|
397
|
+
};
|
|
398
|
+
config.onHit?.({ ...streamResult, value: cached.response });
|
|
399
|
+
resolveResult(streamResult);
|
|
400
|
+
yield* reconstruct(cached.response);
|
|
401
|
+
return;
|
|
402
|
+
}
|
|
403
|
+
} catch (err) {
|
|
404
|
+
config.onError?.(err instanceof Error ? err : new Error(String(err)));
|
|
405
|
+
lifetime.misses++;
|
|
406
|
+
config.onMiss?.(prompt);
|
|
407
|
+
yield* fn();
|
|
408
|
+
resolveResult({ hit: false, layer: "miss" });
|
|
409
|
+
return;
|
|
410
|
+
}
|
|
411
|
+
let embedding;
|
|
412
|
+
try {
|
|
413
|
+
const raw = await embed(normalized);
|
|
414
|
+
embedding = Array.from(raw);
|
|
415
|
+
} catch (err) {
|
|
416
|
+
config.onError?.(err instanceof Error ? err : new Error(String(err)));
|
|
417
|
+
lifetime.misses++;
|
|
418
|
+
config.onMiss?.(prompt);
|
|
419
|
+
yield* fn();
|
|
420
|
+
resolveResult({ hit: false, layer: "miss" });
|
|
421
|
+
return;
|
|
422
|
+
}
|
|
423
|
+
try {
|
|
424
|
+
const match = typeof store.searchSimilar === "function" ? await store.searchSimilar(embedding, threshold, embeddingNamespace) : findBestMatch(embedding, await store.listEmbeddings(embeddingNamespace), threshold);
|
|
425
|
+
if (match !== null) {
|
|
426
|
+
const matchedEntry = await store.get(match.record.key);
|
|
427
|
+
if (matchedEntry !== null) {
|
|
428
|
+
lifetime.hits++;
|
|
429
|
+
lifetime.similarities.push(match.similarity);
|
|
430
|
+
const streamResult = {
|
|
431
|
+
hit: true,
|
|
432
|
+
layer: "semantic",
|
|
433
|
+
similarity: match.similarity,
|
|
434
|
+
matchedPrompt: matchedEntry.prompt,
|
|
435
|
+
...namespace !== void 0 ? { namespace } : {}
|
|
436
|
+
};
|
|
437
|
+
config.onHit?.({ ...streamResult, value: matchedEntry.response });
|
|
438
|
+
resolveResult(streamResult);
|
|
439
|
+
yield* reconstruct(matchedEntry.response);
|
|
440
|
+
return;
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
} catch (err) {
|
|
444
|
+
config.onError?.(err instanceof Error ? err : new Error(String(err)));
|
|
445
|
+
}
|
|
446
|
+
lifetime.misses++;
|
|
447
|
+
config.onMiss?.(prompt);
|
|
448
|
+
const chunks = [];
|
|
449
|
+
for await (const chunk of fn()) {
|
|
450
|
+
chunks.push(chunk);
|
|
451
|
+
yield chunk;
|
|
452
|
+
}
|
|
453
|
+
const assembled = assemble(chunks);
|
|
454
|
+
const now = Date.now();
|
|
455
|
+
const expiresAt = ttl !== void 0 ? computeExpiresAt(ttl) : void 0;
|
|
456
|
+
const entry = {
|
|
457
|
+
prompt: normalized,
|
|
458
|
+
response: assembled,
|
|
459
|
+
embedding,
|
|
460
|
+
createdAt: now,
|
|
461
|
+
...embeddingNamespace !== void 0 ? { namespace: embeddingNamespace } : {},
|
|
462
|
+
...expiresAt !== void 0 ? { expiresAt } : {}
|
|
463
|
+
};
|
|
464
|
+
try {
|
|
465
|
+
await store.set(key, entry, ttl);
|
|
466
|
+
} catch (err) {
|
|
467
|
+
config.onError?.(err instanceof Error ? err : new Error(String(err)));
|
|
468
|
+
}
|
|
469
|
+
resolveResult({ hit: false, layer: "miss", ...namespace !== void 0 ? { namespace } : {} });
|
|
470
|
+
}
|
|
471
|
+
return { stream: generate(), result };
|
|
472
|
+
}
|
|
361
473
|
async function invalidate(prompt, options) {
|
|
362
474
|
const normalized = normalizePrompt(prompt);
|
|
363
475
|
const key = hashPrompt(options?.namespace, options?.context, normalized);
|
|
@@ -383,7 +495,128 @@ function createCache(config) {
|
|
|
383
495
|
avgSimilarity
|
|
384
496
|
};
|
|
385
497
|
}
|
|
386
|
-
return { wrap, invalidate, flush, stats: getStats };
|
|
498
|
+
return { wrap, wrapStream, invalidate, flush, stats: getStats };
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
// src/stores/hnsw-memory.ts
|
|
502
|
+
var INITIAL_CAPACITY = 1024;
|
|
503
|
+
async function loadHnswLib() {
|
|
504
|
+
try {
|
|
505
|
+
return await new Function("m", "return import(m)")("hnswlib-node");
|
|
506
|
+
} catch {
|
|
507
|
+
throw new Error(
|
|
508
|
+
"[llm-cache] hnswMemoryStore requires hnswlib-node: npm install hnswlib-node"
|
|
509
|
+
);
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
function hnswMemoryStore(injectedLib) {
|
|
513
|
+
const entries = /* @__PURE__ */ new Map();
|
|
514
|
+
const embeddingRecords = /* @__PURE__ */ new Map();
|
|
515
|
+
const nsIndices = /* @__PURE__ */ new Map();
|
|
516
|
+
let dimension = null;
|
|
517
|
+
let libPromise = injectedLib ? Promise.resolve(injectedLib) : null;
|
|
518
|
+
function getLib() {
|
|
519
|
+
if (!libPromise) libPromise = loadHnswLib();
|
|
520
|
+
return libPromise;
|
|
521
|
+
}
|
|
522
|
+
function getOrCreateNsIndex(lib, ns, dim) {
|
|
523
|
+
let nsIdx = nsIndices.get(ns);
|
|
524
|
+
if (nsIdx === void 0) {
|
|
525
|
+
const index = new lib.HierarchicalNSW("cosine", dim);
|
|
526
|
+
index.initIndex(INITIAL_CAPACITY);
|
|
527
|
+
nsIdx = { index, keyToLabel: /* @__PURE__ */ new Map(), labelToKey: /* @__PURE__ */ new Map(), nextLabel: 0, maxElements: INITIAL_CAPACITY };
|
|
528
|
+
nsIndices.set(ns, nsIdx);
|
|
529
|
+
}
|
|
530
|
+
return nsIdx;
|
|
531
|
+
}
|
|
532
|
+
function nsKey(namespace) {
|
|
533
|
+
return namespace ?? "__default__";
|
|
534
|
+
}
|
|
535
|
+
const self = {
|
|
536
|
+
async get(key) {
|
|
537
|
+
const entry = entries.get(key);
|
|
538
|
+
if (!entry) return null;
|
|
539
|
+
if (isExpired(entry)) {
|
|
540
|
+
await self.delete(key);
|
|
541
|
+
return null;
|
|
542
|
+
}
|
|
543
|
+
return entry;
|
|
544
|
+
},
|
|
545
|
+
async set(key, entry, _ttlSeconds) {
|
|
546
|
+
const lib = await getLib();
|
|
547
|
+
if (dimension === null) dimension = entry.embedding.length;
|
|
548
|
+
const ns = nsKey(entry.namespace);
|
|
549
|
+
const nsIdx = getOrCreateNsIndex(lib, ns, dimension);
|
|
550
|
+
const existingLabel = nsIdx.keyToLabel.get(key);
|
|
551
|
+
if (existingLabel !== void 0) {
|
|
552
|
+
try {
|
|
553
|
+
nsIdx.index.markDelete(existingLabel);
|
|
554
|
+
} catch {
|
|
555
|
+
}
|
|
556
|
+
nsIdx.labelToKey.delete(existingLabel);
|
|
557
|
+
}
|
|
558
|
+
if (nsIdx.nextLabel >= nsIdx.maxElements) {
|
|
559
|
+
nsIdx.maxElements *= 2;
|
|
560
|
+
nsIdx.index.resizeIndex(nsIdx.maxElements);
|
|
561
|
+
}
|
|
562
|
+
const label = nsIdx.nextLabel++;
|
|
563
|
+
nsIdx.index.addPoint(entry.embedding, label);
|
|
564
|
+
nsIdx.keyToLabel.set(key, label);
|
|
565
|
+
nsIdx.labelToKey.set(label, key);
|
|
566
|
+
entries.set(key, entry);
|
|
567
|
+
embeddingRecords.set(key, {
|
|
568
|
+
key,
|
|
569
|
+
embedding: entry.embedding,
|
|
570
|
+
createdAt: entry.createdAt,
|
|
571
|
+
...entry.namespace !== void 0 ? { namespace: entry.namespace } : {}
|
|
572
|
+
});
|
|
573
|
+
},
|
|
574
|
+
async delete(key) {
|
|
575
|
+
const entry = entries.get(key);
|
|
576
|
+
if (entry) {
|
|
577
|
+
const nsIdx = nsIndices.get(nsKey(entry.namespace));
|
|
578
|
+
if (nsIdx) {
|
|
579
|
+
const label = nsIdx.keyToLabel.get(key);
|
|
580
|
+
if (label !== void 0) {
|
|
581
|
+
try {
|
|
582
|
+
nsIdx.index.markDelete(label);
|
|
583
|
+
} catch {
|
|
584
|
+
}
|
|
585
|
+
nsIdx.keyToLabel.delete(key);
|
|
586
|
+
nsIdx.labelToKey.delete(label);
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
entries.delete(key);
|
|
591
|
+
embeddingRecords.delete(key);
|
|
592
|
+
},
|
|
593
|
+
async listEmbeddings(namespace) {
|
|
594
|
+
const all = Array.from(embeddingRecords.values());
|
|
595
|
+
return namespace === void 0 ? all : all.filter((r) => r.namespace === namespace);
|
|
596
|
+
},
|
|
597
|
+
async searchSimilar(query, threshold, namespace) {
|
|
598
|
+
const nsIdx = nsIndices.get(nsKey(namespace));
|
|
599
|
+
if (!nsIdx || nsIdx.index.getCurrentCount() === 0) return null;
|
|
600
|
+
const { neighbors, distances } = nsIdx.index.searchKnn(query, 1);
|
|
601
|
+
const label = neighbors[0];
|
|
602
|
+
const distance = distances[0];
|
|
603
|
+
if (label === void 0 || distance === void 0) return null;
|
|
604
|
+
const similarity = 1 - distance;
|
|
605
|
+
if (similarity < threshold) return null;
|
|
606
|
+
const key = nsIdx.labelToKey.get(label);
|
|
607
|
+
if (!key) return null;
|
|
608
|
+
const entry = entries.get(key);
|
|
609
|
+
if (!entry) return null;
|
|
610
|
+
if (isExpired(entry)) {
|
|
611
|
+
await self.delete(key);
|
|
612
|
+
return null;
|
|
613
|
+
}
|
|
614
|
+
const record = embeddingRecords.get(key);
|
|
615
|
+
if (!record) return null;
|
|
616
|
+
return { record, similarity };
|
|
617
|
+
}
|
|
618
|
+
};
|
|
619
|
+
return self;
|
|
387
620
|
}
|
|
388
621
|
|
|
389
622
|
// src/utils/validate.ts
|
|
@@ -393,6 +626,12 @@ function assertCacheEntry(val, source) {
|
|
|
393
626
|
}
|
|
394
627
|
return val;
|
|
395
628
|
}
|
|
629
|
+
function assertEmbeddingRecord(val, source) {
|
|
630
|
+
if (typeof val !== "object" || val === null || typeof val["key"] !== "string" || !Array.isArray(val["embedding"]) || typeof val["createdAt"] !== "number") {
|
|
631
|
+
throw new Error(`[llm-cache] Invalid embedding record shape from ${source}`);
|
|
632
|
+
}
|
|
633
|
+
return val;
|
|
634
|
+
}
|
|
396
635
|
|
|
397
636
|
// src/stores/redis.ts
|
|
398
637
|
var ENTRY_PREFIX = "llm-cache:entry:";
|
|
@@ -440,7 +679,7 @@ function redisStore(client) {
|
|
|
440
679
|
async listEmbeddings(namespace) {
|
|
441
680
|
const hash = await redis.hgetall(nsHashKey(namespace));
|
|
442
681
|
if (!hash) return [];
|
|
443
|
-
return Object.values(hash).map((v) => JSON.parse(v));
|
|
682
|
+
return Object.values(hash).map((v) => assertEmbeddingRecord(JSON.parse(v), "redis"));
|
|
444
683
|
},
|
|
445
684
|
async close() {
|
|
446
685
|
await redis.quit();
|
|
@@ -514,26 +753,32 @@ function sqliteStore(db) {
|
|
|
514
753
|
},
|
|
515
754
|
async listEmbeddings(namespace) {
|
|
516
755
|
const rows = namespace !== void 0 ? stmtListByNs.all(namespace) : stmtListAll.all();
|
|
517
|
-
return rows.map((row) =>
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
756
|
+
return rows.map((row) => {
|
|
757
|
+
const parsed = assertEmbeddingRecord(
|
|
758
|
+
{
|
|
759
|
+
key: row.key,
|
|
760
|
+
embedding: JSON.parse(row.embedding),
|
|
761
|
+
createdAt: row.created_at,
|
|
762
|
+
...row.namespace !== null ? { namespace: row.namespace } : {}
|
|
763
|
+
},
|
|
764
|
+
"sqlite"
|
|
765
|
+
);
|
|
766
|
+
return parsed;
|
|
767
|
+
});
|
|
523
768
|
}
|
|
524
769
|
};
|
|
525
770
|
}
|
|
526
771
|
|
|
527
772
|
// src/stores/pgvector.ts
|
|
528
|
-
var
|
|
529
|
-
async function initSchema2(pool) {
|
|
773
|
+
var DEFAULT_DIMENSIONS = 1536;
|
|
774
|
+
async function initSchema2(pool, dimensions) {
|
|
530
775
|
await pool.query("CREATE EXTENSION IF NOT EXISTS vector");
|
|
531
776
|
await pool.query(`
|
|
532
777
|
CREATE TABLE IF NOT EXISTS llm_cache (
|
|
533
778
|
key TEXT PRIMARY KEY,
|
|
534
779
|
prompt TEXT NOT NULL,
|
|
535
780
|
response JSONB,
|
|
536
|
-
embedding vector(${
|
|
781
|
+
embedding vector(${dimensions}),
|
|
537
782
|
namespace TEXT,
|
|
538
783
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
539
784
|
expires_at TIMESTAMPTZ
|
|
@@ -549,9 +794,16 @@ async function initSchema2(pool) {
|
|
|
549
794
|
function parseEmbedding(raw) {
|
|
550
795
|
return raw.replace(/^\[/, "").replace(/\]$/, "").split(",").map(Number);
|
|
551
796
|
}
|
|
552
|
-
function pgvectorStore(pool) {
|
|
797
|
+
function pgvectorStore(pool, options) {
|
|
553
798
|
const pg = pool;
|
|
554
|
-
const
|
|
799
|
+
const rawDimensions = options?.dimensions ?? DEFAULT_DIMENSIONS;
|
|
800
|
+
if (!Number.isInteger(rawDimensions) || rawDimensions < 1 || rawDimensions > 65535) {
|
|
801
|
+
throw new RangeError(
|
|
802
|
+
`[llm-cache] pgvectorStore: dimensions must be a positive integer \u2264 65535, got ${rawDimensions}`
|
|
803
|
+
);
|
|
804
|
+
}
|
|
805
|
+
const dimensions = rawDimensions;
|
|
806
|
+
const ready = initSchema2(pg, dimensions);
|
|
555
807
|
return {
|
|
556
808
|
async get(key) {
|
|
557
809
|
await ready;
|
|
@@ -624,6 +876,7 @@ function pgvectorStore(pool) {
|
|
|
624
876
|
export {
|
|
625
877
|
createCache,
|
|
626
878
|
createEmbedder,
|
|
879
|
+
hnswMemoryStore,
|
|
627
880
|
memoryStore,
|
|
628
881
|
pgvectorStore,
|
|
629
882
|
redisStore,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pravoobi/llm-cache",
|
|
3
|
-
"version": "0.1
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"description": "Semantic caching layer for LLM calls. Deduplicates near-identical prompts using embeddings.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|
|
@@ -26,7 +26,8 @@
|
|
|
26
26
|
"ioredis": ">=5.0.0",
|
|
27
27
|
"better-sqlite3": ">=9.0.0",
|
|
28
28
|
"pg": ">=8.0.0",
|
|
29
|
-
"@xenova/transformers": ">=2.0.0"
|
|
29
|
+
"@xenova/transformers": ">=2.0.0",
|
|
30
|
+
"hnswlib-node": ">=3.0.0"
|
|
30
31
|
},
|
|
31
32
|
"peerDependenciesMeta": {
|
|
32
33
|
"openai": { "optional": true },
|
|
@@ -34,7 +35,8 @@
|
|
|
34
35
|
"ioredis": { "optional": true },
|
|
35
36
|
"better-sqlite3": { "optional": true },
|
|
36
37
|
"pg": { "optional": true },
|
|
37
|
-
"@xenova/transformers": { "optional": true }
|
|
38
|
+
"@xenova/transformers": { "optional": true },
|
|
39
|
+
"hnswlib-node": { "optional": true }
|
|
38
40
|
},
|
|
39
41
|
"author": "Venkata Praveen Kumar Velisetty",
|
|
40
42
|
"repository": {
|
|
@@ -58,5 +60,9 @@
|
|
|
58
60
|
},
|
|
59
61
|
"keywords": ["llm", "cache", "semantic", "embeddings", "openai", "anthropic", "ai"],
|
|
60
62
|
"license": "MIT",
|
|
61
|
-
"engines": { "node": ">=
|
|
63
|
+
"engines": { "node": ">=20.0.0" },
|
|
64
|
+
"publishConfig": {
|
|
65
|
+
"access": "public",
|
|
66
|
+
"provenance": true
|
|
67
|
+
}
|
|
62
68
|
}
|