@betterdb/semantic-cache 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +211 -128
  2. package/dist/SemanticCache.d.ts +85 -5
  3. package/dist/SemanticCache.js +689 -47
  4. package/dist/adapters/ai.js +6 -1
  5. package/dist/adapters/anthropic.d.ts +32 -0
  6. package/dist/adapters/anthropic.js +94 -0
  7. package/dist/adapters/langchain.js +6 -1
  8. package/dist/adapters/langgraph.d.ts +104 -0
  9. package/dist/adapters/langgraph.js +271 -0
  10. package/dist/adapters/llamaindex.d.ts +32 -0
  11. package/dist/adapters/llamaindex.js +76 -0
  12. package/dist/adapters/openai-responses.d.ts +31 -0
  13. package/dist/adapters/openai-responses.js +112 -0
  14. package/dist/adapters/openai.d.ts +42 -0
  15. package/dist/adapters/openai.js +97 -0
  16. package/dist/analytics.d.ts +24 -0
  17. package/dist/analytics.js +116 -0
  18. package/dist/cluster.d.ts +10 -0
  19. package/dist/cluster.js +43 -0
  20. package/dist/defaultCostTable.d.ts +11 -0
  21. package/dist/defaultCostTable.js +1976 -0
  22. package/dist/embed/bedrock.d.ts +32 -0
  23. package/dist/embed/bedrock.js +109 -0
  24. package/dist/embed/cohere.d.ts +34 -0
  25. package/dist/embed/cohere.js +37 -0
  26. package/dist/embed/ollama.d.ts +30 -0
  27. package/dist/embed/ollama.js +24 -0
  28. package/dist/embed/openai.d.ts +31 -0
  29. package/dist/embed/openai.js +66 -0
  30. package/dist/embed/voyage.d.ts +31 -0
  31. package/dist/embed/voyage.js +32 -0
  32. package/dist/index.d.ts +6 -1
  33. package/dist/index.js +11 -1
  34. package/dist/normalizer.d.ts +68 -0
  35. package/dist/normalizer.js +102 -0
  36. package/dist/telemetry.d.ts +3 -0
  37. package/dist/telemetry.js +18 -0
  38. package/dist/types.d.ts +107 -7
  39. package/dist/utils.d.ts +58 -0
  40. package/dist/utils.js +30 -0
  41. package/package.json +81 -6
@@ -2,10 +2,14 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.SemanticCache = void 0;
4
4
  const node_crypto_1 = require("node:crypto");
5
+ const node_crypto_2 = require("node:crypto");
5
6
  const api_1 = require("@opentelemetry/api");
6
7
  const errors_1 = require("./errors");
7
8
  const telemetry_1 = require("./telemetry");
8
9
  const utils_1 = require("./utils");
10
+ const defaultCostTable_1 = require("./defaultCostTable");
11
+ const cluster_1 = require("./cluster");
12
+ const analytics_1 = require("./analytics");
9
13
  const INVALIDATE_BATCH_SIZE = 1000;
10
14
  function errMsg(err) {
11
15
  return err instanceof Error ? err.message : String(err);
@@ -17,15 +21,27 @@ class SemanticCache {
17
21
  indexName;
18
22
  entryPrefix;
19
23
  statsKey;
24
+ similarityWindowKey;
20
25
  defaultThreshold;
21
26
  defaultTtl;
22
27
  categoryThresholds;
23
28
  uncertaintyBand;
24
29
  telemetry;
30
+ costTable;
31
+ embeddingCacheEnabled;
32
+ embeddingCacheTtl;
33
+ embedKeyPrefix;
25
34
  _initialized = false;
26
35
  _dimension = 0;
36
+ _hasBinaryRefs = false;
27
37
  _initPromise = null;
28
38
  _initGeneration = 0;
39
+ analyticsOpts;
40
+ usesDefaultCostTable;
41
+ analytics = analytics_1.NOOP_ANALYTICS;
42
+ statsTimer;
43
+ shutdownCalled = false;
44
+ analyticsInitiated = false;
29
45
  /**
30
46
  * Creates a new SemanticCache instance.
31
47
  *
@@ -42,17 +58,35 @@ class SemanticCache {
42
58
  this.indexName = `${this.name}:idx`;
43
59
  this.entryPrefix = `${this.name}:entry:`;
44
60
  this.statsKey = `${this.name}:__stats`;
61
+ this.similarityWindowKey = `${this.name}:__similarity_window`;
62
+ this.embedKeyPrefix = `${this.name}:embed:`;
45
63
  this.defaultThreshold = options.defaultThreshold ?? 0.1;
46
64
  this.defaultTtl = options.defaultTtl;
47
65
  this.categoryThresholds = options.categoryThresholds ?? {};
48
66
  this.uncertaintyBand = options.uncertaintyBand ?? 0.05;
67
+ // Build effective cost table
68
+ const useDefault = options.useDefaultCostTable ?? true;
69
+ if (!useDefault && !options.costTable) {
70
+ this.costTable = undefined;
71
+ }
72
+ else if (!useDefault) {
73
+ this.costTable = options.costTable;
74
+ }
75
+ else {
76
+ this.costTable = { ...defaultCostTable_1.DEFAULT_COST_TABLE, ...(options.costTable ?? {}) };
77
+ }
78
+ // Embedding cache config
79
+ this.embeddingCacheEnabled = options.embeddingCache?.enabled ?? true;
80
+ this.embeddingCacheTtl = options.embeddingCache?.ttl ?? 86400;
49
81
  this.telemetry = (0, telemetry_1.createTelemetry)({
50
82
  prefix: options.telemetry?.metricsPrefix ?? 'semantic_cache',
51
83
  tracerName: options.telemetry?.tracerName ?? '@betterdb/semantic-cache',
52
84
  registry: options.telemetry?.registry,
53
85
  });
86
+ this.analyticsOpts = options.analytics;
87
+ this.usesDefaultCostTable = useDefault;
54
88
  }
55
- // ── Lifecycle ──────────────────────────────────────────────
89
+ // -- Lifecycle --
56
90
  async initialize() {
57
91
  if (!this._initPromise) {
58
92
  this._initPromise = this._doInitialize().catch((err) => {
@@ -65,7 +99,6 @@ class SemanticCache {
65
99
  async flush() {
66
100
  // Mark uninitialized immediately so concurrent check()/store() calls get
67
101
  // a clear SemanticCacheUsageError instead of cryptic Valkey errors.
68
- // Bump generation so any in-flight _doInitialize() won't overwrite this state.
69
102
  this._initialized = false;
70
103
  this._initPromise = null;
71
104
  this._initGeneration++;
@@ -79,33 +112,59 @@ class SemanticCache {
79
112
  throw new errors_1.ValkeyCommandError('FT.DROPINDEX', err);
80
113
  }
81
114
  }
82
- const entryPattern = `${this.name}:entry:*`;
83
- let cursor = '0';
84
- do {
85
- const [nextCursor, keys] = await this.client.scan(cursor, 'MATCH', entryPattern, 'COUNT', '100');
86
- cursor = nextCursor;
87
- if (keys.length > 0)
88
- await this.client.del(keys);
89
- } while (cursor !== '0');
115
+ // Cluster-aware SCAN for entry keys and embed cache keys
116
+ const patterns = [
117
+ `${this.name}:entry:*`,
118
+ `${this.name}:embed:*`,
119
+ ];
120
+ for (const pattern of patterns) {
121
+ await (0, cluster_1.clusterScan)(this.client, pattern, async (keys, nodeClient) => {
122
+ await nodeClient.del(keys);
123
+ });
124
+ }
90
125
  await this.client.del(this.statsKey);
126
+ await this.client.del(this.similarityWindowKey);
127
+ this.analytics.capture('cache_flush');
128
+ }
129
+ /** Shut down the analytics client and cancel the stats timer. */
130
+ async shutdown() {
131
+ this.shutdownCalled = true;
132
+ if (this.statsTimer) {
133
+ clearInterval(this.statsTimer);
134
+ this.statsTimer = undefined;
135
+ }
136
+ await this.analytics.shutdown();
91
137
  }
92
- // ── Public operations ──────────────────────────────────────
138
+ // -- Public operations --
93
139
  async check(prompt, options) {
94
140
  this.assertInitialized('check');
95
141
  return this.traced('check', async (span) => {
96
142
  const category = options?.category ?? '';
97
- const k = options?.k ?? 1;
98
143
  const threshold = options?.threshold ??
99
144
  (category && this.categoryThresholds[category] !== undefined
100
145
  ? this.categoryThresholds[category]
101
146
  : this.defaultThreshold);
102
- const { vector: embedding, durationSec: embedSec } = await this.embed(prompt);
147
+ // Resolve text and binary refs from prompt
148
+ const { text: promptText, binaryRefs } = await this.resolvePrompt(prompt);
149
+ // Stale model detection
150
+ const checkStale = (options?.staleAfterModelChange ?? false) && !!options?.currentModel;
151
+ // Rerank option
152
+ const rerankOpts = options?.rerank;
153
+ const k = rerankOpts ? rerankOpts.k : (options?.k ?? 1);
154
+ const { vector: embedding, durationSec: embedSec } = await this.embed(promptText);
103
155
  this.assertDimension(embedding);
104
- // FT.SEARCH — Valkey Search 1.2 rejects KNN aliases in RETURN/SORTBY,
105
- // so we omit both. Results include all fields and are pre-sorted by distance.
156
+ // Build filter
157
+ const userFilter = options?.filter;
158
+ // AND semantics: each ref must be present — chain separate TAG clauses.
159
+ const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
160
+ ? (binaryRefs.length === 1
161
+ ? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
162
+ : binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' '))
163
+ : null;
164
+ const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
165
+ const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
166
+ const query = `${filterExpr}=>[KNN ${k} @embedding $vec AS __score]`;
106
167
  const searchStart = performance.now();
107
- const filter = options?.filter;
108
- const query = `${filter ? `(${filter})` : '*'}=>[KNN ${k} @embedding $vec AS __score]`;
109
168
  let rawResult;
110
169
  try {
111
170
  rawResult = await this.client.call('FT.SEARCH', this.indexName, query, 'PARAMS', '2', 'vec', (0, utils_1.encodeFloat32)(embedding), 'LIMIT', '0', String(k), 'DIALECT', '2');
@@ -136,6 +195,9 @@ class SemanticCache {
136
195
  }
137
196
  // Miss (no usable score, or score exceeds threshold)
138
197
  if (isNaN(score) || score > threshold) {
198
+ if (!isNaN(score)) {
199
+ await this.recordSimilarityWindow(score, 'miss', category);
200
+ }
139
201
  await this.recordStat('misses');
140
202
  this.telemetry.metrics.requestsTotal
141
203
  .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
@@ -151,42 +213,148 @@ class SemanticCache {
151
213
  }
152
214
  return result;
153
215
  }
154
- // Hit
155
- const confidence = score >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
216
+ // Rerank: apply rerankFn to all candidates above threshold
217
+ let winnerParsedIndex = 0;
218
+ if (rerankOpts && parsed.length > 0) {
219
+ // Preserve the original parsed[] index alongside each candidate so we
220
+ // can map back even when NaN-scored entries are filtered out.
221
+ const indexedCandidates = parsed
222
+ .map((r, i) => ({ i, s: parseFloat(r.fields['__score'] ?? 'NaN') }))
223
+ .filter(({ s }) => !isNaN(s))
224
+ .map(({ i, s }) => ({
225
+ origIdx: i,
226
+ candidate: { response: parsed[i].fields['response'] ?? '', similarity: s },
227
+ }));
228
+ const picked = await rerankOpts.rerankFn(promptText, indexedCandidates.map((x) => x.candidate));
229
+ // Explicit bounds check: -1 means "reject all"; out-of-range is a caller bug
230
+ // treated as a miss rather than silently falling back to the top candidate.
231
+ if (picked === -1 || picked < 0 || picked >= indexedCandidates.length) {
232
+ await this.recordSimilarityWindow(score, 'miss', category);
233
+ await this.recordStat('misses');
234
+ this.telemetry.metrics.requestsTotal
235
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
236
+ span.setAttributes({ 'cache.hit': false, 'cache.name': this.name, 'cache.reranked': true });
237
+ return { hit: false, confidence: 'miss' };
238
+ }
239
+ // Map back to the original parsed[] index (not the candidates[] index)
240
+ winnerParsedIndex = indexedCandidates[picked].origIdx;
241
+ }
242
+ const winner = parsed[winnerParsedIndex] ?? parsed[0];
243
+ const winnerScore = parseFloat(winner.fields['__score'] ?? String(score));
244
+ // Stale model check: if winner's model differs from currentModel, evict and treat as miss
245
+ if (checkStale) {
246
+ const storedModel = winner.fields['model'] ?? '';
247
+ if (storedModel && storedModel !== options.currentModel) {
248
+ // Evict stale entry
249
+ try {
250
+ await this.client.del(winner.key);
251
+ }
252
+ catch { /* best effort */ }
253
+ await this.recordSimilarityWindow(winnerScore, 'miss', category);
254
+ this.telemetry.metrics.staleModelEvictions.labels({ cache_name: this.name }).inc();
255
+ await this.recordStat('misses');
256
+ this.telemetry.metrics.requestsTotal
257
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
258
+ span.setAttributes({ 'cache.hit': false, 'cache.stale_evicted': true });
259
+ return { hit: false, confidence: 'miss' };
260
+ }
261
+ }
262
+ // All checks passed — record as a genuine hit
263
+ await this.recordSimilarityWindow(winnerScore, 'hit', category);
264
+ const confidence = winnerScore >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
156
265
  await this.recordStat('hits');
157
266
  const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
158
267
  this.telemetry.metrics.requestsTotal
159
268
  .labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
160
- const matchedKey = parsed[0].key;
269
+ const matchedKey = winner.key;
161
270
  if (this.defaultTtl !== undefined && matchedKey) {
162
271
  await this.client.expire(matchedKey, this.defaultTtl);
163
272
  }
273
+ // Cost saved
274
+ let costSaved;
275
+ const costMicrosStr = winner.fields['cost_micros'];
276
+ if (costMicrosStr) {
277
+ const costMicros = parseInt(costMicrosStr, 10);
278
+ if (!isNaN(costMicros) && costMicros > 0) {
279
+ costSaved = costMicros / 1_000_000;
280
+ // Atomically increment cost_saved_micros in stats
281
+ await this.client.hincrby(this.statsKey, 'cost_saved_micros', costMicros);
282
+ this.telemetry.metrics.costSavedTotal
283
+ .labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
284
+ }
285
+ }
286
+ // Content blocks
287
+ let contentBlocks;
288
+ const contentBlocksStr = winner.fields['content_blocks'];
289
+ if (contentBlocksStr) {
290
+ try {
291
+ contentBlocks = JSON.parse(contentBlocksStr);
292
+ }
293
+ catch { /* ignore parse errors */ }
294
+ }
164
295
  span.setAttributes({
165
- 'cache.hit': true, 'cache.similarity': score, 'cache.threshold': threshold,
296
+ 'cache.hit': true, 'cache.similarity': winnerScore, 'cache.threshold': threshold,
166
297
  'cache.confidence': confidence, 'cache.matched_key': matchedKey,
167
298
  'cache.category': categoryLabel, ...timingAttrs,
168
299
  });
169
- return {
170
- hit: true, response: parsed[0].fields['response'],
171
- similarity: score, confidence, matchedKey,
300
+ const result = {
301
+ hit: true, response: winner.fields['response'],
302
+ similarity: winnerScore, confidence, matchedKey,
172
303
  };
304
+ if (costSaved !== undefined)
305
+ result.costSaved = costSaved;
306
+ if (contentBlocks)
307
+ result.contentBlocks = contentBlocks;
308
+ return result;
173
309
  });
174
310
  }
175
311
  async store(prompt, response, options) {
176
312
  this.assertInitialized('store');
177
313
  return this.traced('store', async (span) => {
178
- const { vector: embedding, durationSec: embedSec } = await this.embed(prompt);
314
+ const { text: promptText, binaryRefs } = await this.resolvePrompt(prompt);
315
+ const { vector: embedding, durationSec: embedSec } = await this.embed(promptText);
179
316
  this.assertDimension(embedding);
180
317
  const entryKey = `${this.entryPrefix}${(0, node_crypto_1.randomUUID)()}`;
181
318
  const category = options?.category ?? '';
182
319
  const model = options?.model ?? '';
320
+ // Compute cost if tokens and model provided
321
+ let costMicros;
322
+ if (options?.model &&
323
+ options?.inputTokens !== undefined &&
324
+ options?.outputTokens !== undefined &&
325
+ this.costTable) {
326
+ const pricing = this.costTable[options.model];
327
+ if (pricing) {
328
+ costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
329
+ options.outputTokens * pricing.outputPer1k / 1000) * 1_000_000);
330
+ }
331
+ }
332
+ const hashFields = {
333
+ prompt: promptText,
334
+ response,
335
+ model,
336
+ category,
337
+ inserted_at: Date.now().toString(),
338
+ metadata: JSON.stringify(options?.metadata ?? {}),
339
+ embedding: (0, utils_1.encodeFloat32)(embedding),
340
+ };
341
+ if (binaryRefs.length > 0) {
342
+ hashFields['binary_refs'] = binaryRefs.join(',');
343
+ }
344
+ if (costMicros !== undefined && costMicros > 0) {
345
+ hashFields['cost_micros'] = String(costMicros);
346
+ }
347
+ if (options?.temperature !== undefined) {
348
+ hashFields['temperature'] = String(options.temperature);
349
+ }
350
+ if (options?.topP !== undefined) {
351
+ hashFields['top_p'] = String(options.topP);
352
+ }
353
+ if (options?.seed !== undefined) {
354
+ hashFields['seed'] = String(options.seed);
355
+ }
183
356
  try {
184
- await this.client.hset(entryKey, {
185
- prompt, response, model, category,
186
- inserted_at: Date.now().toString(),
187
- metadata: JSON.stringify(options?.metadata ?? {}),
188
- embedding: (0, utils_1.encodeFloat32)(embedding),
189
- });
357
+ await this.client.hset(entryKey, hashFields);
190
358
  }
191
359
  catch (err) {
192
360
  throw new errors_1.ValkeyCommandError('HSET', err);
@@ -202,11 +370,195 @@ class SemanticCache {
202
370
  return entryKey;
203
371
  });
204
372
  }
373
+ /**
374
+ * Store structured content blocks as the cached response.
375
+ * Populates both the response field (from TextBlock text) and content_blocks (full JSON).
376
+ */
377
+ async storeMultipart(prompt, blocks, options) {
378
+ this.assertInitialized('storeMultipart');
379
+ return this.traced('storeMultipart', async (span) => {
380
+ const { text: promptText, binaryRefs } = await this.resolvePrompt(prompt);
381
+ const { vector: embedding, durationSec: embedSec } = await this.embed(promptText);
382
+ this.assertDimension(embedding);
383
+ // Derive text response from blocks for backward compat
384
+ const textResponse = (0, utils_1.extractText)(blocks);
385
+ const entryKey = `${this.entryPrefix}${(0, node_crypto_1.randomUUID)()}`;
386
+ const category = options?.category ?? '';
387
+ const model = options?.model ?? '';
388
+ let costMicros;
389
+ if (options?.model && options?.inputTokens !== undefined && options?.outputTokens !== undefined && this.costTable) {
390
+ const pricing = this.costTable[options.model];
391
+ if (pricing) {
392
+ costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
393
+ options.outputTokens * pricing.outputPer1k / 1000) * 1_000_000);
394
+ }
395
+ }
396
+ const hashFields = {
397
+ prompt: promptText,
398
+ response: textResponse,
399
+ model,
400
+ category,
401
+ inserted_at: Date.now().toString(),
402
+ metadata: JSON.stringify(options?.metadata ?? {}),
403
+ embedding: (0, utils_1.encodeFloat32)(embedding),
404
+ content_blocks: JSON.stringify(blocks),
405
+ };
406
+ if (binaryRefs.length > 0) {
407
+ hashFields['binary_refs'] = binaryRefs.join(',');
408
+ }
409
+ if (costMicros !== undefined && costMicros > 0) {
410
+ hashFields['cost_micros'] = String(costMicros);
411
+ }
412
+ if (options?.temperature !== undefined)
413
+ hashFields['temperature'] = String(options.temperature);
414
+ if (options?.topP !== undefined)
415
+ hashFields['top_p'] = String(options.topP);
416
+ if (options?.seed !== undefined)
417
+ hashFields['seed'] = String(options.seed);
418
+ try {
419
+ await this.client.hset(entryKey, hashFields);
420
+ }
421
+ catch (err) {
422
+ throw new errors_1.ValkeyCommandError('HSET', err);
423
+ }
424
+ const ttl = options?.ttl ?? this.defaultTtl;
425
+ if (ttl !== undefined)
426
+ await this.client.expire(entryKey, ttl);
427
+ span.setAttributes({
428
+ 'cache.name': this.name, 'cache.key': entryKey, 'cache.ttl': ttl ?? -1,
429
+ 'cache.category': category || 'none', 'cache.model': model || 'none',
430
+ 'embedding_latency_ms': embedSec * 1000,
431
+ });
432
+ return entryKey;
433
+ });
434
+ }
435
+ /**
436
+ * Check multiple prompts in parallel, using pipelined FT.SEARCH calls.
437
+ * Returns results in input order.
438
+ */
439
+ async checkBatch(prompts, options) {
440
+ this.assertInitialized('checkBatch');
441
+ if (prompts.length === 0)
442
+ return [];
443
+ if (options?.rerank) {
444
+ throw new errors_1.SemanticCacheUsageError("checkBatch() does not support the 'rerank' option. Use check() for reranking individual prompts.");
445
+ }
446
+ if (options?.staleAfterModelChange) {
447
+ throw new errors_1.SemanticCacheUsageError("checkBatch() does not support 'staleAfterModelChange'. Use check() for stale-model eviction.");
448
+ }
449
+ return this.traced('checkBatch', async (span) => {
450
+ // Resolve all prompts and embed in parallel
451
+ const resolved = await Promise.all(prompts.map((p) => this.resolvePrompt(p)));
452
+ const embeddings = await Promise.all(resolved.map(({ text }) => this.embed(text)));
453
+ const category = options?.category ?? '';
454
+ const threshold = options?.threshold ??
455
+ (category && this.categoryThresholds[category] !== undefined
456
+ ? this.categoryThresholds[category]
457
+ : this.defaultThreshold);
458
+ const k = options?.k ?? 1;
459
+ const userFilter = options?.filter;
460
+ // Pipeline all FT.SEARCH calls
461
+ const pipeline = this.client.pipeline();
462
+ for (let i = 0; i < prompts.length; i++) {
463
+ const { binaryRefs } = resolved[i];
464
+ const { vector: embedding } = embeddings[i];
465
+ const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
466
+ ? (binaryRefs.length === 1
467
+ ? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
468
+ : binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' '))
469
+ : null;
470
+ const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
471
+ const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
472
+ const query = `${filterExpr}=>[KNN ${k} @embedding $vec AS __score]`;
473
+ pipeline.call('FT.SEARCH', this.indexName, query, 'PARAMS', '2', 'vec', (0, utils_1.encodeFloat32)(embedding), 'LIMIT', '0', String(k), 'DIALECT', '2');
474
+ }
475
+ const pipelineResults = await pipeline.exec();
476
+ span.setAttributes({ 'cache.batch_size': prompts.length, 'cache.name': this.name });
477
+ const results = [];
478
+ const categoryLabel = category || 'none';
479
+ for (let i = 0; i < prompts.length; i++) {
480
+ const pipelineEntry = pipelineResults?.[i];
481
+ const err = pipelineEntry?.[0];
482
+ const rawResult = pipelineEntry?.[1];
483
+ if (err) {
484
+ await this.recordStat('misses');
485
+ this.telemetry.metrics.requestsTotal
486
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
487
+ results.push({ hit: false, confidence: 'miss' });
488
+ continue;
489
+ }
490
+ const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
491
+ if (parsed.length === 0) {
492
+ await this.recordStat('misses');
493
+ this.telemetry.metrics.requestsTotal
494
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
495
+ results.push({ hit: false, confidence: 'miss' });
496
+ continue;
497
+ }
498
+ const scoreStr = parsed[0].fields['__score'];
499
+ const score = scoreStr !== undefined ? parseFloat(scoreStr) : NaN;
500
+ if (isNaN(score) || score > threshold) {
501
+ if (!isNaN(score)) {
502
+ await this.recordSimilarityWindow(score, 'miss', category);
503
+ }
504
+ await this.recordStat('misses');
505
+ this.telemetry.metrics.requestsTotal
506
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
507
+ const result = { hit: false, confidence: 'miss' };
508
+ if (!isNaN(score)) {
509
+ result.similarity = score;
510
+ result.nearestMiss = { similarity: score, deltaToThreshold: score - threshold };
511
+ }
512
+ results.push(result);
513
+ continue;
514
+ }
515
+ await this.recordSimilarityWindow(score, 'hit', category);
516
+ const confidence = score >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
517
+ await this.recordStat('hits');
518
+ const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
519
+ this.telemetry.metrics.requestsTotal
520
+ .labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
521
+ const matchedKey = parsed[0].key;
522
+ if (this.defaultTtl !== undefined && matchedKey) {
523
+ await this.client.expire(matchedKey, this.defaultTtl);
524
+ }
525
+ let costSaved;
526
+ const costMicrosStr = parsed[0].fields['cost_micros'];
527
+ if (costMicrosStr) {
528
+ const costMicros = parseInt(costMicrosStr, 10);
529
+ if (!isNaN(costMicros) && costMicros > 0) {
530
+ costSaved = costMicros / 1_000_000;
531
+ await this.client.hincrby(this.statsKey, 'cost_saved_micros', costMicros);
532
+ this.telemetry.metrics.costSavedTotal
533
+ .labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
534
+ }
535
+ }
536
+ let contentBlocks;
537
+ const contentBlocksStr = parsed[0].fields['content_blocks'];
538
+ if (contentBlocksStr) {
539
+ try {
540
+ contentBlocks = JSON.parse(contentBlocksStr);
541
+ }
542
+ catch { /* ignore */ }
543
+ }
544
+ const result = {
545
+ hit: true, response: parsed[0].fields['response'],
546
+ similarity: score, confidence, matchedKey,
547
+ };
548
+ if (costSaved !== undefined)
549
+ result.costSaved = costSaved;
550
+ if (contentBlocks)
551
+ result.contentBlocks = contentBlocks;
552
+ results.push(result);
553
+ }
554
+ return results;
555
+ });
556
+ }
205
557
  /**
206
558
  * Deletes all entries matching a valkey-search filter expression.
207
559
  *
208
560
  * **Security note:** `filter` is passed directly to FT.SEARCH. Only pass
209
- * trusted, programmatically-constructed expressions never unsanitised
561
+ * trusted, programmatically-constructed expressions - never unsanitised
210
562
  * user input.
211
563
  */
212
564
  async invalidate(filter) {
@@ -242,13 +594,34 @@ class SemanticCache {
242
594
  return { deleted: keys.length, truncated };
243
595
  });
244
596
  }
597
+ /** Delete all entries tagged with the given model name. */
598
+ async invalidateByModel(model) {
599
+ let total = 0;
600
+ let result;
601
+ do {
602
+ result = await this.invalidate(`@model:{${(0, utils_1.escapeTag)(model)}}`);
603
+ total += result.deleted;
604
+ } while (result.truncated);
605
+ return total;
606
+ }
607
+ /** Delete all entries tagged with the given category. */
608
+ async invalidateByCategory(category) {
609
+ let total = 0;
610
+ let result;
611
+ do {
612
+ result = await this.invalidate(`@category:{${(0, utils_1.escapeTag)(category)}}`);
613
+ total += result.deleted;
614
+ } while (result.truncated);
615
+ return total;
616
+ }
245
617
  async stats() {
246
618
  this.assertInitialized('stats');
247
619
  const raw = await this.client.hgetall(this.statsKey);
248
- const hits = parseInt(raw.hits ?? '0', 10);
249
- const misses = parseInt(raw.misses ?? '0', 10);
250
- const total = parseInt(raw.total ?? '0', 10);
251
- return { hits, misses, total, hitRate: total === 0 ? 0 : hits / total };
620
+ const hits = parseInt(raw?.hits ?? '0', 10);
621
+ const misses = parseInt(raw?.misses ?? '0', 10);
622
+ const total = parseInt(raw?.total ?? '0', 10);
623
+ const costSavedMicros = parseInt(raw?.cost_saved_micros ?? '0', 10);
624
+ return { hits, misses, total, hitRate: total === 0 ? 0 : hits / total, costSavedMicros };
252
625
  }
253
626
  async indexInfo() {
254
627
  this.assertInitialized('indexInfo');
@@ -271,27 +644,217 @@ class SemanticCache {
271
644
  }
272
645
  return { name: this.indexName, numDocs, dimension: this._dimension, indexingState };
273
646
  }
274
- // ── Private helpers ────────────────────────────────────────
647
+ /**
648
+ * Analyze the rolling similarity score window and recommend threshold adjustments.
649
+ */
650
+ async thresholdEffectiveness(options) {
651
+ this.assertInitialized('thresholdEffectiveness');
652
+ const minSamples = options?.minSamples ?? 100;
653
+ const category = options?.category;
654
+ const threshold = category && this.categoryThresholds[category] !== undefined
655
+ ? this.categoryThresholds[category]
656
+ : this.defaultThreshold;
657
+ // Read all window entries
658
+ let rawEntries;
659
+ try {
660
+ rawEntries = (await this.client.zrange(this.similarityWindowKey, '0', '-1'));
661
+ }
662
+ catch {
663
+ rawEntries = [];
664
+ }
665
+ // Parse and optionally filter by category
666
+ const entries = [];
667
+ for (const raw of rawEntries) {
668
+ try {
669
+ const entry = JSON.parse(String(raw));
670
+ if (typeof entry.score === 'number' &&
671
+ (entry.result === 'hit' || entry.result === 'miss')) {
672
+ if (!category || entry.category === category) {
673
+ entries.push(entry);
674
+ }
675
+ }
676
+ }
677
+ catch { /* skip corrupt entries */ }
678
+ }
679
+ const sampleCount = entries.length;
680
+ const categoryLabel = category ?? 'all';
681
+ if (sampleCount < minSamples) {
682
+ return {
683
+ category: categoryLabel,
684
+ sampleCount,
685
+ currentThreshold: threshold,
686
+ hitRate: 0,
687
+ uncertainHitRate: 0,
688
+ nearMissRate: 0,
689
+ avgHitSimilarity: 0,
690
+ avgMissSimilarity: 0,
691
+ recommendation: 'insufficient_data',
692
+ reasoning: `Only ${sampleCount} samples collected; ${minSamples} required for a reliable recommendation.`,
693
+ };
694
+ }
695
+ const hits = entries.filter((e) => e.result === 'hit');
696
+ const misses = entries.filter((e) => e.result === 'miss');
697
+ const hitRate = hits.length / sampleCount;
698
+ const uncertainHits = hits.filter((e) => e.score >= threshold - this.uncertaintyBand);
699
+ const uncertainHitRate = hits.length > 0 ? uncertainHits.length / hits.length : 0;
700
+ // Near-misses are scores just ABOVE the threshold (genuine close misses).
701
+ // Scores below the threshold recorded as misses (rerank rejection, stale eviction)
702
+ // must be excluded — they produce negative avgNearMissDelta, causing
703
+ // recommendedThreshold = threshold + negative < threshold, contradicting "loosen".
704
+ const nearMisses = misses.filter((e) => e.score > threshold && e.score <= threshold + 0.03);
705
+ const nearMissRate = misses.length > 0 ? nearMisses.length / misses.length : 0;
706
+ const avgHitSimilarity = hits.length > 0 ? hits.reduce((s, e) => s + e.score, 0) / hits.length : 0;
707
+ const avgMissSimilarity = misses.length > 0 ? misses.reduce((s, e) => s + e.score, 0) / misses.length : 0;
708
+ // avgNearMissDelta: how far above the threshold near-misses are on average
709
+ const avgNearMissDelta = nearMisses.length > 0
710
+ ? nearMisses.reduce((s, e) => s + (e.score - threshold), 0) / nearMisses.length
711
+ : 0;
712
+ let recommendation;
713
+ let recommendedThreshold;
714
+ let reasoning;
715
+ if (uncertainHitRate > 0.2) {
716
+ recommendation = 'tighten_threshold';
717
+ recommendedThreshold = Math.max(0, threshold - this.uncertaintyBand * 1.5);
718
+ reasoning = `${(uncertainHitRate * 100).toFixed(1)}% of hits are in the uncertainty band - tighten the threshold to reduce false positives.`;
719
+ }
720
+ else if (nearMissRate > 0.3 && avgNearMissDelta < 0.03) {
721
+ recommendation = 'loosen_threshold';
722
+ recommendedThreshold = threshold + avgNearMissDelta;
723
+ reasoning = `${(nearMissRate * 100).toFixed(1)}% of misses are very close to the threshold - consider loosening to capture more hits.`;
724
+ }
725
+ else {
726
+ recommendation = 'optimal';
727
+ reasoning = `Hit rate is ${(hitRate * 100).toFixed(1)}% with ${(uncertainHitRate * 100).toFixed(1)}% uncertain hits - threshold appears well-calibrated.`;
728
+ }
729
+ return {
730
+ category: categoryLabel,
731
+ sampleCount,
732
+ currentThreshold: threshold,
733
+ hitRate,
734
+ uncertainHitRate,
735
+ nearMissRate,
736
+ avgHitSimilarity,
737
+ avgMissSimilarity,
738
+ recommendation,
739
+ recommendedThreshold,
740
+ reasoning,
741
+ };
742
+ }
743
+ /**
744
+ * Returns threshold effectiveness results for every category seen in the
745
+ * rolling window, plus one aggregate result for all categories combined.
746
+ */
747
+ async thresholdEffectivenessAll(options) {
748
+ this.assertInitialized('thresholdEffectivenessAll');
749
+ let rawEntries;
750
+ try {
751
+ rawEntries = (await this.client.zrange(this.similarityWindowKey, '0', '-1'));
752
+ }
753
+ catch {
754
+ rawEntries = [];
755
+ }
756
+ // Collect unique categories
757
+ const categories = new Set();
758
+ for (const raw of rawEntries) {
759
+ try {
760
+ const entry = JSON.parse(raw);
761
+ if (entry.category)
762
+ categories.add(entry.category);
763
+ }
764
+ catch { /* skip */ }
765
+ }
766
+ const results = await Promise.all([
767
+ this.thresholdEffectiveness({ minSamples: options?.minSamples }),
768
+ ...[...categories].filter(Boolean).map((cat) => this.thresholdEffectiveness({ category: cat, minSamples: options?.minSamples })),
769
+ ]);
770
+ return results;
771
+ }
772
+ // -- Internal helpers exposed to package adapters --
773
+ /** @internal Default similarity threshold. */
774
+ get _defaultThreshold() { return this.defaultThreshold; }
775
+ /**
776
+ * Execute a stable FT.SEARCH for use by adapters (e.g. LangGraph).
777
+ * SORTBY inserted_at ASC gives stable ordering across paginated calls.
778
+ * @internal
779
+ */
780
+ async _searchEntries(filterExpr, limit, offset) {
781
+ return this.client.call('FT.SEARCH', this.indexName, filterExpr, 'SORTBY', 'inserted_at', 'ASC', 'LIMIT', String(offset), String(limit), 'DIALECT', '2');
782
+ }
783
+ /**
784
+ * Embed text for use by adapters (e.g. LangGraph semantic search).
785
+ * @internal
786
+ */
787
+ async _embedText(text) {
788
+ return this.embed(text);
789
+ }
790
+ // -- Private helpers --
275
791
  async _doInitialize() {
276
792
  const gen = this._initGeneration;
277
793
  return this.traced('initialize', async () => {
278
- const dim = await this.ensureIndexAndGetDimension();
279
- // If flush() ran while we were initializing, don't overwrite its state.
794
+ const { dim, hasBinaryRefs } = await this.ensureIndexAndGetDimension();
280
795
  if (this._initGeneration !== gen)
281
796
  return;
282
797
  this._dimension = dim;
798
+ this._hasBinaryRefs = hasBinaryRefs;
283
799
  this._initialized = true;
800
+ // Fire analytics init once (not on every flush+initialize cycle)
801
+ this.initAnalyticsSafe().catch(() => { });
284
802
  });
285
803
  }
804
+ async initAnalyticsSafe() {
805
+ if (this.analyticsInitiated)
806
+ return;
807
+ this.analyticsInitiated = true;
808
+ try {
809
+ const a = await (0, analytics_1.createAnalytics)(this.analyticsOpts);
810
+ if (this.shutdownCalled) {
811
+ await a.shutdown();
812
+ return;
813
+ }
814
+ this.analytics = a;
815
+ await a.init(this.client, this.name, {
816
+ defaultThreshold: this.defaultThreshold,
817
+ uncertaintyBand: this.uncertaintyBand,
818
+ defaultTtl: this.defaultTtl ?? null,
819
+ hasCostTable: !!this.costTable,
820
+ usesDefaultCostTable: this.usesDefaultCostTable,
821
+ embeddingCacheEnabled: this.embeddingCacheEnabled,
822
+ categoryThresholdCount: Object.keys(this.categoryThresholds).length,
823
+ dimension: this._dimension,
824
+ });
825
+ const intervalMs = this.analyticsOpts?.statsIntervalMs ?? 300_000;
826
+ if (!this.shutdownCalled && intervalMs > 0) {
827
+ this.statsTimer = setInterval(() => this.captureStatsSnapshot(), intervalMs);
828
+ this.statsTimer.unref();
829
+ }
830
+ }
831
+ catch {
832
+ // never throw from analytics
833
+ }
834
+ }
835
+ captureStatsSnapshot() {
836
+ this.stats()
837
+ .then((s) => {
838
+ this.analytics.capture('stats_snapshot', {
839
+ hits: s.hits,
840
+ misses: s.misses,
841
+ hit_rate: s.hitRate,
842
+ cost_saved_micros: s.costSavedMicros,
843
+ });
844
+ })
845
+ .catch(() => { });
846
+ }
286
847
  async ensureIndexAndGetDimension() {
287
848
  // Try reading an existing index
288
849
  try {
289
850
  const info = (await this.client.call('FT.INFO', this.indexName));
290
851
  const dim = this.parseDimensionFromInfo(info);
852
+ const hasBinaryRefs = this.parseHasBinaryRefsFromInfo(info);
291
853
  if (dim > 0)
292
- return dim;
293
- // Couldn't parse dimension from FT.INFO fall back to probe
294
- return (await this.embed('probe')).vector.length;
854
+ return { dim, hasBinaryRefs };
855
+ // Couldn't parse dimension from FT.INFO - fall back to probe
856
+ const probeDim = (await this.embed('probe')).vector.length;
857
+ return { dim: probeDim, hasBinaryRefs };
295
858
  }
296
859
  catch (err) {
297
860
  if (err instanceof errors_1.EmbeddingError)
@@ -300,18 +863,69 @@ class SemanticCache {
300
863
  throw new errors_1.ValkeyCommandError('FT.INFO', err);
301
864
  }
302
865
  }
303
- // Index doesn't exist probe dimension and create it
866
+ // Index doesn't exist - probe dimension and create it
304
867
  const dim = (await this.embed('probe')).vector.length;
305
868
  try {
306
- await this.client.call('FT.CREATE', this.indexName, 'ON', 'HASH', 'PREFIX', '1', this.entryPrefix, 'SCHEMA', 'prompt', 'TEXT', 'NOSTEM', 'response', 'TEXT', 'NOSTEM', 'model', 'TAG', 'category', 'TAG', 'inserted_at', 'NUMERIC', 'SORTABLE', 'embedding', 'VECTOR', 'HNSW', '6', 'TYPE', 'FLOAT32', 'DIM', String(dim), 'DISTANCE_METRIC', 'COSINE');
869
+ await this.client.call('FT.CREATE', this.indexName, 'ON', 'HASH', 'PREFIX', '1', this.entryPrefix, 'SCHEMA', 'prompt', 'TEXT', 'NOSTEM', 'response', 'TEXT', 'NOSTEM', 'model', 'TAG', 'category', 'TAG', 'binary_refs', 'TAG', 'inserted_at', 'NUMERIC', 'SORTABLE', 'temperature', 'NUMERIC', 'top_p', 'NUMERIC', 'seed', 'NUMERIC', 'embedding', 'VECTOR', 'HNSW', '6', 'TYPE', 'FLOAT32', 'DIM', String(dim), 'DISTANCE_METRIC', 'COSINE');
307
870
  }
308
871
  catch (err) {
309
872
  throw new errors_1.ValkeyCommandError('FT.CREATE', err);
310
873
  }
311
- return dim;
874
+ return { dim, hasBinaryRefs: true };
875
+ }
876
+ /** Check if the index schema has a binary_refs field. */
877
+ parseHasBinaryRefsFromInfo(info) {
878
+ for (let i = 0; i < info.length - 1; i += 2) {
879
+ const key = String(info[i]);
880
+ if (key !== 'attributes' && key !== 'fields')
881
+ continue;
882
+ const attributes = info[i + 1];
883
+ if (!Array.isArray(attributes))
884
+ continue;
885
+ for (const attr of attributes) {
886
+ if (!Array.isArray(attr))
887
+ continue;
888
+ for (let j = 0; j < attr.length - 1; j++) {
889
+ if (String(attr[j]) === 'identifier' && String(attr[j + 1]) === 'binary_refs') {
890
+ return true;
891
+ }
892
+ }
893
+ }
894
+ }
895
+ return false;
312
896
  }
313
- /** Wraps embedFn with error handling and duration tracking. */
897
+ /** Resolve a prompt (string or ContentBlock[]) into text + binary refs. */
898
+ resolvePrompt(prompt) {
899
+ if (typeof prompt === 'string') {
900
+ return { text: prompt, binaryRefs: [] };
901
+ }
902
+ const text = (0, utils_1.extractText)(prompt);
903
+ const binaryRefs = (0, utils_1.extractBinaryRefs)(prompt);
904
+ return { text, binaryRefs };
905
+ }
906
+ /** Wraps embedFn with error handling, duration tracking, and optional embedding cache. */
314
907
  async embed(text) {
908
+ // Check embedding cache
909
+ if (this.embeddingCacheEnabled && text) {
910
+ const hash = (0, node_crypto_2.createHash)('sha256').update(text).digest('hex');
911
+ const embedKey = `${this.embedKeyPrefix}${hash}`;
912
+ try {
913
+ const cached = await this.client.getBuffer(embedKey);
914
+ if (cached) {
915
+ this.telemetry.metrics.embeddingCacheTotal
916
+ .labels({ cache_name: this.name, result: 'hit' }).inc();
917
+ // Decode Float32 buffer
918
+ const vector = [];
919
+ for (let i = 0; i < cached.length; i += 4) {
920
+ vector.push(cached.readFloatLE(i));
921
+ }
922
+ return { vector, durationSec: 0 };
923
+ }
924
+ }
925
+ catch { /* ignore cache read errors */ }
926
+ this.telemetry.metrics.embeddingCacheTotal
927
+ .labels({ cache_name: this.name, result: 'miss' }).inc();
928
+ }
315
929
  const start = performance.now();
316
930
  let vector;
317
931
  try {
@@ -324,12 +938,22 @@ class SemanticCache {
324
938
  this.telemetry.metrics.embeddingDuration
325
939
  .labels({ cache_name: this.name })
326
940
  .observe(durationSec);
941
+ // Store in embedding cache
942
+ if (this.embeddingCacheEnabled && text) {
943
+ const hash = (0, node_crypto_2.createHash)('sha256').update(text).digest('hex');
944
+ const embedKey = `${this.embedKeyPrefix}${hash}`;
945
+ try {
946
+ const buf = (0, utils_1.encodeFloat32)(vector);
947
+ await this.client.set(embedKey, buf, 'EX', this.embeddingCacheTtl);
948
+ }
949
+ catch { /* ignore cache write errors */ }
950
+ }
327
951
  return { vector, durationSec };
328
952
  }
329
953
  /**
330
954
  * Wraps a method body in an OTel span with automatic status, end, and
331
955
  * operation duration metric. The span is passed to fn so callers can
332
- * set attributes but callers must NOT call span.end() or span.setStatus(),
956
+ * set attributes - but callers must NOT call span.end() or span.setStatus(),
333
957
  * as traced() handles both.
334
958
  */
335
959
  async traced(operation, fn) {
@@ -359,6 +983,24 @@ class SemanticCache {
359
983
  pipeline.hincrby(this.statsKey, field, 1);
360
984
  await pipeline.exec();
361
985
  }
986
+ /** Append to the rolling similarity window sorted set and trim to 10,000 entries or 7 days. */
987
+ async recordSimilarityWindow(score, result, category) {
988
+ const now = Date.now();
989
+ // Include a unique nonce so identical (score, result, category) tuples are
990
+ // each recorded as distinct ZADD members instead of overwriting each other.
991
+ const member = JSON.stringify({ score, result, category, _n: Math.random() });
992
+ const sevenDaysAgo = now - 7 * 24 * 60 * 60 * 1000;
993
+ try {
994
+ const pipeline = this.client.pipeline();
995
+ pipeline.zadd(this.similarityWindowKey, now, member);
996
+ // Trim by time: remove entries older than 7 days
997
+ pipeline.zremrangebyscore(this.similarityWindowKey, '-inf', sevenDaysAgo);
998
+ // Trim by count: keep at most 10,000 most recent
999
+ pipeline.zremrangebyrank(this.similarityWindowKey, 0, -10001);
1000
+ await pipeline.exec();
1001
+ }
1002
+ catch { /* best effort - never fail on window writes */ }
1003
+ }
362
1004
  assertInitialized(method) {
363
1005
  if (!this._initialized) {
364
1006
  throw new errors_1.SemanticCacheUsageError(`SemanticCache.initialize() must be called before ${method}().`);