@betterdb/semantic-cache 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,7 @@ export declare class SemanticCache {
8
8
  private readonly entryPrefix;
9
9
  private readonly statsKey;
10
10
  private readonly similarityWindowKey;
11
+ private readonly missPendingKey;
11
12
  private readonly configKey;
12
13
  private defaultThreshold;
13
14
  private readonly defaultTtl;
@@ -159,6 +160,19 @@ export declare class SemanticCache {
159
160
  private recordStat;
160
161
  /** Append to the rolling similarity window sorted set and trim to 10,000 entries or 7 days. */
161
162
  private recordSimilarityWindow;
163
+ /**
164
+ * Track a miss so a subsequent store() can backfill its cost into the
165
+ * similarity-window record. Bounded by a 5-minute TTL on the bookkeeping
166
+ * zset — entries beyond that are pruned on every record and backfill.
167
+ */
168
+ private recordMissPending;
169
+ /**
170
+ * After a successful store(), find the oldest pending miss for the same
171
+ * query and update its similarity-window record with the now-known cost.
172
+ * Best-effort — silently no-op if no pending miss exists or the bookkeeping
173
+ * entry has already been pruned.
174
+ */
175
+ private applyCostToPendingMiss;
162
176
  private assertInitialized;
163
177
  private assertDimension;
164
178
  private isIndexNotFoundError;
@@ -16,6 +16,19 @@ const PACKAGE_VERSION = require('../package.json').version;
16
16
  function errMsg(err) {
17
17
  return err instanceof Error ? err.message : String(err);
18
18
  }
19
+ function parseHitCostMicros(raw) {
20
+ if (raw === undefined || raw === null) {
21
+ return null;
22
+ }
23
+ const n = parseInt(raw, 10);
24
+ if (!Number.isFinite(n) || n < 0) {
25
+ return null;
26
+ }
27
+ return n;
28
+ }
29
+ function correlationIdFor(prompt) {
30
+ return (0, node_crypto_2.createHash)('sha256').update(prompt).digest('hex').slice(0, 16);
31
+ }
19
32
  class SemanticCache {
20
33
  client;
21
34
  embedFn;
@@ -24,6 +37,7 @@ class SemanticCache {
24
37
  entryPrefix;
25
38
  statsKey;
26
39
  similarityWindowKey;
40
+ missPendingKey;
27
41
  configKey;
28
42
  defaultThreshold;
29
43
  defaultTtl;
@@ -68,6 +82,7 @@ class SemanticCache {
68
82
  this.entryPrefix = `${this.name}:entry:`;
69
83
  this.statsKey = `${this.name}:__stats`;
70
84
  this.similarityWindowKey = `${this.name}:__similarity_window`;
85
+ this.missPendingKey = `${this.name}:__miss_pending`;
71
86
  this.configKey = `${this.name}:__config`;
72
87
  this.embedKeyPrefix = `${this.name}:embed:`;
73
88
  this.defaultThreshold = options.defaultThreshold ?? 0.1;
@@ -141,10 +156,7 @@ class SemanticCache {
141
156
  }
142
157
  }
143
158
  // Cluster-aware SCAN for entry keys and embed cache keys
144
- const patterns = [
145
- `${this.name}:entry:*`,
146
- `${this.name}:embed:*`,
147
- ];
159
+ const patterns = [`${this.name}:entry:*`, `${this.name}:embed:*`];
148
160
  for (const pattern of patterns) {
149
161
  await (0, cluster_1.clusterScan)(this.client, pattern, async (keys, nodeClient) => {
150
162
  await nodeClient.del(keys);
@@ -152,6 +164,7 @@ class SemanticCache {
152
164
  }
153
165
  await this.client.del(this.statsKey);
154
166
  await this.client.del(this.similarityWindowKey);
167
+ await this.client.del(this.missPendingKey);
155
168
  this.analytics.capture('cache_flush');
156
169
  }
157
170
  /**
@@ -212,9 +225,9 @@ class SemanticCache {
212
225
  const userFilter = options?.filter;
213
226
  // AND semantics: each ref must be present — chain separate TAG clauses.
214
227
  const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
215
- ? (binaryRefs.length === 1
228
+ ? binaryRefs.length === 1
216
229
  ? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
217
- : binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' '))
230
+ : binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' ')
218
231
  : null;
219
232
  const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
220
233
  const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
@@ -230,15 +243,18 @@ class SemanticCache {
230
243
  const searchMs = performance.now() - searchStart;
231
244
  const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
232
245
  const categoryLabel = category || 'none';
233
- const timingAttrs = { 'embedding_latency_ms': embedSec * 1000, 'search_latency_ms': searchMs };
246
+ const timingAttrs = { embedding_latency_ms: embedSec * 1000, search_latency_ms: searchMs };
234
247
  // No candidates at all
235
248
  if (parsed.length === 0) {
236
249
  await this.recordStat('misses');
237
250
  this.telemetry.metrics.requestsTotal
238
- .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
251
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
252
+ .inc();
239
253
  span.setAttributes({
240
- 'cache.hit': false, 'cache.name': this.name,
241
- 'cache.category': categoryLabel, ...timingAttrs,
254
+ 'cache.hit': false,
255
+ 'cache.name': this.name,
256
+ 'cache.category': categoryLabel,
257
+ ...timingAttrs,
242
258
  });
243
259
  return { hit: false, confidence: 'miss' };
244
260
  }
@@ -246,19 +262,24 @@ class SemanticCache {
246
262
  const score = scoreStr !== undefined ? parseFloat(scoreStr) : NaN;
247
263
  if (!isNaN(score)) {
248
264
  this.telemetry.metrics.similarityScore
249
- .labels({ cache_name: this.name, category: categoryLabel }).observe(score);
265
+ .labels({ cache_name: this.name, category: categoryLabel })
266
+ .observe(score);
250
267
  }
251
268
  // Miss (no usable score, or score exceeds threshold)
252
269
  if (isNaN(score) || score > threshold) {
253
270
  if (!isNaN(score)) {
254
- await this.recordSimilarityWindow(score, 'miss', category);
271
+ const missMember = await this.recordSimilarityWindow(score, 'miss', category, null);
272
+ await this.recordMissPending(promptText, missMember);
255
273
  }
256
274
  await this.recordStat('misses');
257
275
  this.telemetry.metrics.requestsTotal
258
- .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
276
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
277
+ .inc();
259
278
  span.setAttributes({
260
- 'cache.hit': false, 'cache.name': this.name,
261
- 'cache.category': categoryLabel, ...timingAttrs,
279
+ 'cache.hit': false,
280
+ 'cache.name': this.name,
281
+ 'cache.category': categoryLabel,
282
+ ...timingAttrs,
262
283
  ...(isNaN(score) ? {} : { 'cache.similarity': score, 'cache.threshold': threshold }),
263
284
  });
264
285
  const result = { hit: false, confidence: 'miss' };
@@ -278,17 +299,23 @@ class SemanticCache {
278
299
  .filter(({ s }) => !isNaN(s))
279
300
  .map(({ i, s }) => ({
280
301
  origIdx: i,
281
- candidate: { response: parsed[i].fields['response'] ?? '', similarity: s },
302
+ candidate: { response: parsed[i].fields['response'] ?? '', similarity: s, prompt: parsed[i].fields['prompt'] ?? '' },
282
303
  }));
283
304
  const picked = await rerankOpts.rerankFn(promptText, indexedCandidates.map((x) => x.candidate));
284
305
  // Explicit bounds check: -1 means "reject all"; out-of-range is a caller bug
285
306
  // treated as a miss rather than silently falling back to the top candidate.
286
307
  if (picked === -1 || picked < 0 || picked >= indexedCandidates.length) {
287
- await this.recordSimilarityWindow(score, 'miss', category);
308
+ const missMember = await this.recordSimilarityWindow(score, 'miss', category, null);
309
+ await this.recordMissPending(promptText, missMember);
288
310
  await this.recordStat('misses');
289
311
  this.telemetry.metrics.requestsTotal
290
- .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
291
- span.setAttributes({ 'cache.hit': false, 'cache.name': this.name, 'cache.reranked': true });
312
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
313
+ .inc();
314
+ span.setAttributes({
315
+ 'cache.hit': false,
316
+ 'cache.name': this.name,
317
+ 'cache.reranked': true,
318
+ });
292
319
  return { hit: false, confidence: 'miss' };
293
320
  }
294
321
  // Map back to the original parsed[] index (not the candidates[] index)
@@ -304,12 +331,16 @@ class SemanticCache {
304
331
  try {
305
332
  await this.client.del(winner.key);
306
333
  }
307
- catch { /* best effort */ }
308
- await this.recordSimilarityWindow(winnerScore, 'miss', category);
334
+ catch {
335
+ /* best effort */
336
+ }
337
+ const missMember = await this.recordSimilarityWindow(winnerScore, 'miss', category, null);
338
+ await this.recordMissPending(promptText, missMember);
309
339
  this.telemetry.metrics.staleModelEvictions.labels({ cache_name: this.name }).inc();
310
340
  await this.recordStat('misses');
311
341
  this.telemetry.metrics.requestsTotal
312
- .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
342
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
343
+ .inc();
313
344
  span.setAttributes({ 'cache.hit': false, 'cache.stale_evicted': true });
314
345
  return { hit: false, confidence: 'miss' };
315
346
  }
@@ -330,6 +361,8 @@ class SemanticCache {
330
361
  similarity: winnerScore,
331
362
  threshold,
332
363
  category: category || undefined,
364
+ // Reserved for consumer judge functions; not consumed by the built-in judge path.
365
+ cachedPrompt: winner.fields['prompt'] ?? '',
333
366
  }), timeoutMs);
334
367
  decision = accepted ? 'accept' : 'reject';
335
368
  }
@@ -363,7 +396,8 @@ class SemanticCache {
363
396
  }
364
397
  else {
365
398
  // reject / error_reject / timeout_reject → treat as miss
366
- await this.recordSimilarityWindow(winnerScore, 'miss', category);
399
+ const missMember = await this.recordSimilarityWindow(winnerScore, 'miss', category, null);
400
+ await this.recordMissPending(promptText, missMember);
367
401
  await this.recordStat('misses');
368
402
  this.telemetry.metrics.requestsTotal
369
403
  .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
@@ -387,27 +421,26 @@ class SemanticCache {
387
421
  }
388
422
  }
389
423
  // --- End judge ---
424
+ const hitCostMicros = parseHitCostMicros(winner.fields['cost_micros']);
390
425
  // Record as genuine hit (moved here from before the judge block)
391
- await this.recordSimilarityWindow(winnerScore, 'hit', category);
426
+ await this.recordSimilarityWindow(winnerScore, 'hit', category, hitCostMicros);
392
427
  await this.recordStat('hits');
393
428
  const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
394
429
  this.telemetry.metrics.requestsTotal
395
- .labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
430
+ .labels({ cache_name: this.name, result: metricResult, category: categoryLabel })
431
+ .inc();
396
432
  if (this.defaultTtl !== undefined && matchedKey) {
397
433
  await this.client.expire(matchedKey, this.defaultTtl);
398
434
  }
399
435
  // Cost saved
400
436
  let costSaved;
401
- const costMicrosStr = winner.fields['cost_micros'];
402
- if (costMicrosStr) {
403
- const costMicros = parseInt(costMicrosStr, 10);
404
- if (!isNaN(costMicros) && costMicros > 0) {
405
- costSaved = costMicros / 1_000_000;
406
- // Atomically increment cost_saved_micros in stats
407
- await this.client.hincrby(this.statsKey, 'cost_saved_micros', costMicros);
408
- this.telemetry.metrics.costSavedTotal
409
- .labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
410
- }
437
+ if (hitCostMicros !== null) {
438
+ costSaved = hitCostMicros / 1_000_000;
439
+ // Atomically increment cost_saved_micros in stats
440
+ await this.client.hincrby(this.statsKey, 'cost_saved_micros', hitCostMicros);
441
+ this.telemetry.metrics.costSavedTotal
442
+ .labels({ cache_name: this.name, category: categoryLabel })
443
+ .inc(costSaved);
411
444
  }
412
445
  // Content blocks
413
446
  let contentBlocks;
@@ -416,16 +449,25 @@ class SemanticCache {
416
449
  try {
417
450
  contentBlocks = JSON.parse(contentBlocksStr);
418
451
  }
419
- catch { /* ignore parse errors */ }
452
+ catch {
453
+ /* ignore parse errors */
454
+ }
420
455
  }
421
456
  span.setAttributes({
422
- 'cache.hit': true, 'cache.similarity': winnerScore, 'cache.threshold': threshold,
423
- 'cache.confidence': confidence, 'cache.matched_key': matchedKey,
424
- 'cache.category': categoryLabel, ...timingAttrs,
457
+ 'cache.hit': true,
458
+ 'cache.similarity': winnerScore,
459
+ 'cache.threshold': threshold,
460
+ 'cache.confidence': confidence,
461
+ 'cache.matched_key': matchedKey,
462
+ 'cache.category': categoryLabel,
463
+ ...timingAttrs,
425
464
  });
426
465
  const result = {
427
- hit: true, response: winner.fields['response'],
428
- similarity: winnerScore, confidence, matchedKey,
466
+ hit: true,
467
+ response: winner.fields['response'],
468
+ similarity: winnerScore,
469
+ confidence,
470
+ matchedKey,
429
471
  };
430
472
  if (costSaved !== undefined)
431
473
  result.costSaved = costSaved;
@@ -451,8 +493,9 @@ class SemanticCache {
451
493
  this.costTable) {
452
494
  const pricing = this.costTable[options.model];
453
495
  if (pricing) {
454
- costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
455
- options.outputTokens * pricing.outputPer1k / 1000) * 1_000_000);
496
+ costMicros = Math.round(((options.inputTokens * pricing.inputPer1k) / 1000 +
497
+ (options.outputTokens * pricing.outputPer1k) / 1000) *
498
+ 1_000_000);
456
499
  }
457
500
  }
458
501
  const hashFields = {
@@ -489,10 +532,16 @@ class SemanticCache {
489
532
  if (ttl !== undefined)
490
533
  await this.client.expire(entryKey, ttl);
491
534
  span.setAttributes({
492
- 'cache.name': this.name, 'cache.key': entryKey, 'cache.ttl': ttl ?? -1,
493
- 'cache.category': category || 'none', 'cache.model': model || 'none',
494
- 'embedding_latency_ms': embedSec * 1000,
535
+ 'cache.name': this.name,
536
+ 'cache.key': entryKey,
537
+ 'cache.ttl': ttl ?? -1,
538
+ 'cache.category': category || 'none',
539
+ 'cache.model': model || 'none',
540
+ embedding_latency_ms: embedSec * 1000,
495
541
  });
542
+ if (costMicros !== undefined && costMicros >= 0) {
543
+ await this.applyCostToPendingMiss(promptText, costMicros);
544
+ }
496
545
  return entryKey;
497
546
  });
498
547
  }
@@ -512,11 +561,15 @@ class SemanticCache {
512
561
  const category = options?.category ?? '';
513
562
  const model = options?.model ?? '';
514
563
  let costMicros;
515
- if (options?.model && options?.inputTokens !== undefined && options?.outputTokens !== undefined && this.costTable) {
564
+ if (options?.model &&
565
+ options?.inputTokens !== undefined &&
566
+ options?.outputTokens !== undefined &&
567
+ this.costTable) {
516
568
  const pricing = this.costTable[options.model];
517
569
  if (pricing) {
518
- costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
519
- options.outputTokens * pricing.outputPer1k / 1000) * 1_000_000);
570
+ costMicros = Math.round(((options.inputTokens * pricing.inputPer1k) / 1000 +
571
+ (options.outputTokens * pricing.outputPer1k) / 1000) *
572
+ 1_000_000);
520
573
  }
521
574
  }
522
575
  const hashFields = {
@@ -535,8 +588,9 @@ class SemanticCache {
535
588
  if (costMicros !== undefined && costMicros > 0) {
536
589
  hashFields['cost_micros'] = String(costMicros);
537
590
  }
538
- if (options?.temperature !== undefined)
591
+ if (options?.temperature !== undefined) {
539
592
  hashFields['temperature'] = String(options.temperature);
593
+ }
540
594
  if (options?.topP !== undefined)
541
595
  hashFields['top_p'] = String(options.topP);
542
596
  if (options?.seed !== undefined)
@@ -551,10 +605,16 @@ class SemanticCache {
551
605
  if (ttl !== undefined)
552
606
  await this.client.expire(entryKey, ttl);
553
607
  span.setAttributes({
554
- 'cache.name': this.name, 'cache.key': entryKey, 'cache.ttl': ttl ?? -1,
555
- 'cache.category': category || 'none', 'cache.model': model || 'none',
556
- 'embedding_latency_ms': embedSec * 1000,
608
+ 'cache.name': this.name,
609
+ 'cache.key': entryKey,
610
+ 'cache.ttl': ttl ?? -1,
611
+ 'cache.category': category || 'none',
612
+ 'cache.model': model || 'none',
613
+ embedding_latency_ms: embedSec * 1000,
557
614
  });
615
+ if (costMicros !== undefined && costMicros >= 0) {
616
+ await this.applyCostToPendingMiss(promptText, costMicros);
617
+ }
558
618
  return entryKey;
559
619
  });
560
620
  }
@@ -592,9 +652,9 @@ class SemanticCache {
592
652
  const { binaryRefs } = resolved[i];
593
653
  const { vector: embedding } = embeddings[i];
594
654
  const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
595
- ? (binaryRefs.length === 1
655
+ ? binaryRefs.length === 1
596
656
  ? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
597
- : binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' '))
657
+ : binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' ')
598
658
  : null;
599
659
  const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
600
660
  const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
@@ -612,7 +672,8 @@ class SemanticCache {
612
672
  if (err) {
613
673
  await this.recordStat('misses');
614
674
  this.telemetry.metrics.requestsTotal
615
- .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
675
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
676
+ .inc();
616
677
  results.push({ hit: false, confidence: 'miss' });
617
678
  continue;
618
679
  }
@@ -620,7 +681,8 @@ class SemanticCache {
620
681
  if (parsed.length === 0) {
621
682
  await this.recordStat('misses');
622
683
  this.telemetry.metrics.requestsTotal
623
- .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
684
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
685
+ .inc();
624
686
  results.push({ hit: false, confidence: 'miss' });
625
687
  continue;
626
688
  }
@@ -628,11 +690,13 @@ class SemanticCache {
628
690
  const score = scoreStr !== undefined ? parseFloat(scoreStr) : NaN;
629
691
  if (isNaN(score) || score > threshold) {
630
692
  if (!isNaN(score)) {
631
- await this.recordSimilarityWindow(score, 'miss', category);
693
+ const missMember = await this.recordSimilarityWindow(score, 'miss', category, null);
694
+ await this.recordMissPending(resolved[i].text, missMember);
632
695
  }
633
696
  await this.recordStat('misses');
634
697
  this.telemetry.metrics.requestsTotal
635
- .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
698
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
699
+ .inc();
636
700
  const result = { hit: false, confidence: 'miss' };
637
701
  if (!isNaN(score)) {
638
702
  result.similarity = score;
@@ -641,26 +705,25 @@ class SemanticCache {
641
705
  results.push(result);
642
706
  continue;
643
707
  }
644
- await this.recordSimilarityWindow(score, 'hit', category);
708
+ const hitCostMicros = parseHitCostMicros(parsed[0].fields['cost_micros']);
709
+ await this.recordSimilarityWindow(score, 'hit', category, hitCostMicros);
645
710
  const confidence = score >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
646
711
  await this.recordStat('hits');
647
712
  const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
648
713
  this.telemetry.metrics.requestsTotal
649
- .labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
714
+ .labels({ cache_name: this.name, result: metricResult, category: categoryLabel })
715
+ .inc();
650
716
  const matchedKey = parsed[0].key;
651
717
  if (this.defaultTtl !== undefined && matchedKey) {
652
718
  await this.client.expire(matchedKey, this.defaultTtl);
653
719
  }
654
720
  let costSaved;
655
- const costMicrosStr = parsed[0].fields['cost_micros'];
656
- if (costMicrosStr) {
657
- const costMicros = parseInt(costMicrosStr, 10);
658
- if (!isNaN(costMicros) && costMicros > 0) {
659
- costSaved = costMicros / 1_000_000;
660
- await this.client.hincrby(this.statsKey, 'cost_saved_micros', costMicros);
661
- this.telemetry.metrics.costSavedTotal
662
- .labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
663
- }
721
+ if (hitCostMicros !== null) {
722
+ costSaved = hitCostMicros / 1_000_000;
723
+ await this.client.hincrby(this.statsKey, 'cost_saved_micros', hitCostMicros);
724
+ this.telemetry.metrics.costSavedTotal
725
+ .labels({ cache_name: this.name, category: categoryLabel })
726
+ .inc(costSaved);
664
727
  }
665
728
  let contentBlocks;
666
729
  const contentBlocksStr = parsed[0].fields['content_blocks'];
@@ -668,11 +731,16 @@ class SemanticCache {
668
731
  try {
669
732
  contentBlocks = JSON.parse(contentBlocksStr);
670
733
  }
671
- catch { /* ignore */ }
734
+ catch {
735
+ /* ignore */
736
+ }
672
737
  }
673
738
  const result = {
674
- hit: true, response: parsed[0].fields['response'],
675
- similarity: score, confidence, matchedKey,
739
+ hit: true,
740
+ response: parsed[0].fields['response'],
741
+ similarity: score,
742
+ confidence,
743
+ matchedKey,
676
744
  };
677
745
  if (costSaved !== undefined)
678
746
  result.costSaved = costSaved;
@@ -703,8 +771,10 @@ class SemanticCache {
703
771
  const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
704
772
  if (parsed.length === 0) {
705
773
  span.setAttributes({
706
- 'cache.name': this.name, 'cache.filter': filter,
707
- 'cache.deleted_count': 0, 'cache.truncated': false,
774
+ 'cache.name': this.name,
775
+ 'cache.filter': filter,
776
+ 'cache.deleted_count': 0,
777
+ 'cache.truncated': false,
708
778
  });
709
779
  return { deleted: 0, truncated: false };
710
780
  }
@@ -717,8 +787,10 @@ class SemanticCache {
717
787
  throw new errors_1.ValkeyCommandError('DEL', err);
718
788
  }
719
789
  span.setAttributes({
720
- 'cache.name': this.name, 'cache.filter': filter,
721
- 'cache.deleted_count': keys.length, 'cache.truncated': truncated,
790
+ 'cache.name': this.name,
791
+ 'cache.filter': filter,
792
+ 'cache.deleted_count': keys.length,
793
+ 'cache.truncated': truncated,
722
794
  });
723
795
  return { deleted: keys.length, truncated };
724
796
  });
@@ -803,7 +875,9 @@ class SemanticCache {
803
875
  }
804
876
  }
805
877
  }
806
- catch { /* skip corrupt entries */ }
878
+ catch {
879
+ /* skip corrupt entries */
880
+ }
807
881
  }
808
882
  const sampleCount = entries.length;
809
883
  const categoryLabel = category ?? 'all';
@@ -890,11 +964,15 @@ class SemanticCache {
890
964
  if (entry.category)
891
965
  categories.add(entry.category);
892
966
  }
893
- catch { /* skip */ }
967
+ catch {
968
+ /* skip */
969
+ }
894
970
  }
895
971
  const results = await Promise.all([
896
972
  this.thresholdEffectiveness({ minSamples: options?.minSamples }),
897
- ...[...categories].filter(Boolean).map((cat) => this.thresholdEffectiveness({ category: cat, minSamples: options?.minSamples })),
973
+ ...[...categories]
974
+ .filter(Boolean)
975
+ .map((cat) => this.thresholdEffectiveness({ category: cat, minSamples: options?.minSamples })),
898
976
  ]);
899
977
  return results;
900
978
  }
@@ -947,7 +1025,9 @@ class SemanticCache {
947
1025
  }
948
1026
  // -- Internal helpers exposed to package adapters --
949
1027
  /** @internal Default similarity threshold. */
950
- get _defaultThreshold() { return this.defaultThreshold; }
1028
+ get _defaultThreshold() {
1029
+ return this.defaultThreshold;
1030
+ }
951
1031
  /** @internal Test-only getter. */
952
1032
  get _categoryThresholds() {
953
1033
  return this.categoryThresholds;
@@ -980,15 +1060,11 @@ class SemanticCache {
980
1060
  this.refreshConfig()
981
1061
  .then((ok) => {
982
1062
  if (!ok) {
983
- this.telemetry.metrics.configRefreshFailed
984
- .labels({ cache_name: this.name })
985
- .inc();
1063
+ this.telemetry.metrics.configRefreshFailed.labels({ cache_name: this.name }).inc();
986
1064
  }
987
1065
  })
988
1066
  .catch(() => {
989
- this.telemetry.metrics.configRefreshFailed
990
- .labels({ cache_name: this.name })
991
- .inc();
1067
+ this.telemetry.metrics.configRefreshFailed.labels({ cache_name: this.name }).inc();
992
1068
  });
993
1069
  };
994
1070
  // Synchronous first refresh: process started immediately after a proposal
@@ -1044,9 +1120,7 @@ class SemanticCache {
1044
1120
  metadata,
1045
1121
  heartbeatIntervalMs: this.discoveryOptions.heartbeatIntervalMs,
1046
1122
  onWriteFailed: () => {
1047
- this.telemetry.metrics.discoveryWriteFailed
1048
- .labels({ cache_name: this.name })
1049
- .inc();
1123
+ this.telemetry.metrics.discoveryWriteFailed.labels({ cache_name: this.name }).inc();
1050
1124
  },
1051
1125
  });
1052
1126
  await manager.register();
@@ -1164,7 +1238,8 @@ class SemanticCache {
1164
1238
  const cached = await this.client.getBuffer(embedKey);
1165
1239
  if (cached) {
1166
1240
  this.telemetry.metrics.embeddingCacheTotal
1167
- .labels({ cache_name: this.name, result: 'hit' }).inc();
1241
+ .labels({ cache_name: this.name, result: 'hit' })
1242
+ .inc();
1168
1243
  // Decode Float32 buffer
1169
1244
  const vector = [];
1170
1245
  for (let i = 0; i < cached.length; i += 4) {
@@ -1173,9 +1248,12 @@ class SemanticCache {
1173
1248
  return { vector, durationSec: 0 };
1174
1249
  }
1175
1250
  }
1176
- catch { /* ignore cache read errors */ }
1251
+ catch {
1252
+ /* ignore cache read errors */
1253
+ }
1177
1254
  this.telemetry.metrics.embeddingCacheTotal
1178
- .labels({ cache_name: this.name, result: 'miss' }).inc();
1255
+ .labels({ cache_name: this.name, result: 'miss' })
1256
+ .inc();
1179
1257
  }
1180
1258
  const start = performance.now();
1181
1259
  let vector;
@@ -1186,9 +1264,7 @@ class SemanticCache {
1186
1264
  throw new errors_1.EmbeddingError(`embedFn failed: ${errMsg(err)}`, err);
1187
1265
  }
1188
1266
  const durationSec = (performance.now() - start) / 1000;
1189
- this.telemetry.metrics.embeddingDuration
1190
- .labels({ cache_name: this.name })
1191
- .observe(durationSec);
1267
+ this.telemetry.metrics.embeddingDuration.labels({ cache_name: this.name }).observe(durationSec);
1192
1268
  // Store in embedding cache
1193
1269
  if (this.embeddingCacheEnabled && text) {
1194
1270
  const hash = (0, node_crypto_2.createHash)('sha256').update(text).digest('hex');
@@ -1197,7 +1273,9 @@ class SemanticCache {
1197
1273
  const buf = (0, utils_1.encodeFloat32)(vector);
1198
1274
  await this.client.set(embedKey, buf, 'EX', this.embeddingCacheTtl);
1199
1275
  }
1200
- catch { /* ignore cache write errors */ }
1276
+ catch {
1277
+ /* ignore cache write errors */
1278
+ }
1201
1279
  }
1202
1280
  return { vector, durationSec };
1203
1281
  }
@@ -1235,22 +1313,99 @@ class SemanticCache {
1235
1313
  await pipeline.exec();
1236
1314
  }
1237
1315
  /** Append to the rolling similarity window sorted set and trim to 10,000 entries or 7 days. */
1238
- async recordSimilarityWindow(score, result, category) {
1316
+ async recordSimilarityWindow(score, result, category, costSavedMicros) {
1239
1317
  const now = Date.now();
1240
- // Include a unique nonce so identical (score, result, category) tuples are
1241
- // each recorded as distinct ZADD members instead of overwriting each other.
1242
- const member = JSON.stringify({ score, result, category, _n: Math.random() });
1318
+ const member = JSON.stringify({
1319
+ score,
1320
+ result,
1321
+ category,
1322
+ _n: Math.random(),
1323
+ cost_saved_micros: costSavedMicros,
1324
+ });
1243
1325
  const sevenDaysAgo = now - 7 * 24 * 60 * 60 * 1000;
1244
1326
  try {
1245
1327
  const pipeline = this.client.pipeline();
1246
1328
  pipeline.zadd(this.similarityWindowKey, now, member);
1247
- // Trim by time: remove entries older than 7 days
1248
1329
  pipeline.zremrangebyscore(this.similarityWindowKey, '-inf', sevenDaysAgo);
1249
- // Trim by count: keep at most 10,000 most recent
1250
1330
  pipeline.zremrangebyrank(this.similarityWindowKey, 0, -10001);
1251
1331
  await pipeline.exec();
1252
1332
  }
1253
- catch { /* best effort - never fail on window writes */ }
1333
+ catch {
1334
+ /* best effort - never fail on window writes */
1335
+ }
1336
+ return member;
1337
+ }
1338
+ /**
1339
+ * Track a miss so a subsequent store() can backfill its cost into the
1340
+ * similarity-window record. Bounded by a 5-minute TTL on the bookkeeping
1341
+ * zset — entries beyond that are pruned on every record and backfill.
1342
+ */
1343
+ async recordMissPending(prompt, similarityMember) {
1344
+ const correlationId = correlationIdFor(prompt);
1345
+ const now = Date.now();
1346
+ const fiveMinutesAgo = now - 5 * 60 * 1000;
1347
+ const entry = JSON.stringify({ correlationId, similarityMember });
1348
+ try {
1349
+ await this.client.zadd(this.missPendingKey, now, entry);
1350
+ await this.client.zremrangebyscore(this.missPendingKey, '-inf', `(${fiveMinutesAgo}`);
1351
+ }
1352
+ catch {
1353
+ /* best effort */
1354
+ }
1355
+ }
1356
+ /**
1357
+ * After a successful store(), find the oldest pending miss for the same
1358
+ * query and update its similarity-window record with the now-known cost.
1359
+ * Best-effort — silently no-op if no pending miss exists or the bookkeeping
1360
+ * entry has already been pruned.
1361
+ */
1362
+ async applyCostToPendingMiss(prompt, costMicros) {
1363
+ const correlationId = correlationIdFor(prompt);
1364
+ const fiveMinutesAgo = Date.now() - 5 * 60 * 1000;
1365
+ try {
1366
+ await this.client.zremrangebyscore(this.missPendingKey, '-inf', `(${fiveMinutesAgo}`);
1367
+ const raw = (await this.client.zrange(this.missPendingKey, '0', '-1', 'WITHSCORES'));
1368
+ let matchedEntry = null;
1369
+ let matchedSimilarityMember = null;
1370
+ for (let i = 0; i < raw.length; i += 2) {
1371
+ const entryStr = raw[i];
1372
+ try {
1373
+ const parsed = JSON.parse(entryStr);
1374
+ if (parsed.correlationId === correlationId) {
1375
+ matchedEntry = entryStr;
1376
+ matchedSimilarityMember = parsed.similarityMember;
1377
+ break;
1378
+ }
1379
+ }
1380
+ catch {
1381
+ /* skip malformed */
1382
+ }
1383
+ }
1384
+ if (matchedEntry === null || matchedSimilarityMember === null) {
1385
+ return;
1386
+ }
1387
+ const rawScore = await this.client.zscore(this.similarityWindowKey, matchedSimilarityMember);
1388
+ if (rawScore === null) {
1389
+ await this.client.zrem(this.missPendingKey, matchedEntry);
1390
+ return;
1391
+ }
1392
+ const similarityScore = Number(rawScore);
1393
+ if (!Number.isFinite(similarityScore)) {
1394
+ await this.client.zrem(this.missPendingKey, matchedEntry);
1395
+ return;
1396
+ }
1397
+ const parsedMember = JSON.parse(matchedSimilarityMember);
1398
+ parsedMember.cost_saved_micros = costMicros;
1399
+ const updatedMember = JSON.stringify(parsedMember);
1400
+ const updatePipeline = this.client.pipeline();
1401
+ updatePipeline.zrem(this.similarityWindowKey, matchedSimilarityMember);
1402
+ updatePipeline.zadd(this.similarityWindowKey, similarityScore, updatedMember);
1403
+ updatePipeline.zrem(this.missPendingKey, matchedEntry);
1404
+ await updatePipeline.exec();
1405
+ }
1406
+ catch {
1407
+ /* never fail store() because of bookkeeping */
1408
+ }
1254
1409
  }
1255
1410
  assertInitialized(method) {
1256
1411
  if (!this._initialized) {
package/dist/index.d.ts CHANGED
@@ -2,6 +2,7 @@ export { SemanticCache } from './SemanticCache';
2
2
  export type { ThresholdEffectivenessResult } from './SemanticCache';
3
3
  export { DEFAULT_COST_TABLE } from './defaultCostTable';
4
4
  export type { SemanticCacheOptions, CacheCheckOptions, CacheStoreOptions, CacheCheckResult, CacheStats, IndexInfo, InvalidateResult, CacheConfidence, EmbedFn, ModelCost, RerankOptions, JudgeOptions, ConfigRefreshOptions, } from './types';
5
+ export { createKeywordOverlapRerank } from './rerank';
5
6
  export { SemanticCacheUsageError, EmbeddingError, ValkeyCommandError, } from './errors';
6
7
  export type { ContentBlock, TextBlock, BinaryBlock, ToolCallBlock, ToolResultBlock, ReasoningBlock, BlockHints, } from './utils';
7
8
  export { escapeTag } from './utils';
package/dist/index.js CHANGED
@@ -1,10 +1,12 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.defaultNormalizer = exports.composeNormalizer = exports.passthrough = exports.fetchAndHash = exports.hashUrl = exports.hashBytes = exports.hashBase64 = exports.escapeTag = exports.ValkeyCommandError = exports.EmbeddingError = exports.SemanticCacheUsageError = exports.DEFAULT_COST_TABLE = exports.SemanticCache = void 0;
3
+ exports.defaultNormalizer = exports.composeNormalizer = exports.passthrough = exports.fetchAndHash = exports.hashUrl = exports.hashBytes = exports.hashBase64 = exports.escapeTag = exports.ValkeyCommandError = exports.EmbeddingError = exports.SemanticCacheUsageError = exports.createKeywordOverlapRerank = exports.DEFAULT_COST_TABLE = exports.SemanticCache = void 0;
4
4
  var SemanticCache_1 = require("./SemanticCache");
5
5
  Object.defineProperty(exports, "SemanticCache", { enumerable: true, get: function () { return SemanticCache_1.SemanticCache; } });
6
6
  var defaultCostTable_1 = require("./defaultCostTable");
7
7
  Object.defineProperty(exports, "DEFAULT_COST_TABLE", { enumerable: true, get: function () { return defaultCostTable_1.DEFAULT_COST_TABLE; } });
8
+ var rerank_1 = require("./rerank");
9
+ Object.defineProperty(exports, "createKeywordOverlapRerank", { enumerable: true, get: function () { return rerank_1.createKeywordOverlapRerank; } });
8
10
  var errors_1 = require("./errors");
9
11
  Object.defineProperty(exports, "SemanticCacheUsageError", { enumerable: true, get: function () { return errors_1.SemanticCacheUsageError; } });
10
12
  Object.defineProperty(exports, "EmbeddingError", { enumerable: true, get: function () { return errors_1.EmbeddingError; } });
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Built-in rerank factories for @betterdb/semantic-cache.
3
+ */
4
+ /**
5
+ * Built-in keyword-overlap reranker.
6
+ *
7
+ * Blends cosine similarity with word overlap and returns the index of the
8
+ * best candidate.
9
+ *
10
+ * @param compare
11
+ * `"prompt"` – overlap of the incoming query against each candidate's stored
12
+ * prompt. Equivalence signal. Catches entity mismatches
13
+ * (e.g. "weather in Paris" vs "weather in Berlin"). Default.
14
+ * `"response"` – overlap of the incoming query against each candidate's cached
15
+ * response. Relevance signal.
16
+ *
17
+ * @param cosineWeight
18
+ * Weight on cosine similarity in [0, 1]. Overlap weight is `1 - cosineWeight`.
19
+ * Default: 0.7 (overlap 0.3).
20
+ *
21
+ * Candidate objects carry: `similarity` (cosine distance, lower = more similar),
22
+ * `response` (string), and `prompt` (string, stored prompt).
23
+ */
24
+ export declare function createKeywordOverlapRerank(options?: {
25
+ compare?: 'prompt' | 'response';
26
+ cosineWeight?: number;
27
+ }): (query: string, candidates: Array<{
28
+ response: string;
29
+ similarity: number;
30
+ prompt: string;
31
+ }>) => Promise<number>;
package/dist/rerank.js ADDED
@@ -0,0 +1,73 @@
1
+ "use strict";
2
+ /**
3
+ * Built-in rerank factories for @betterdb/semantic-cache.
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.createKeywordOverlapRerank = createKeywordOverlapRerank;
7
+ /**
8
+ * Tokenize: lowercase, split on whitespace, strip surrounding punctuation.
9
+ * Deterministic and dependency-free.
10
+ * IDF weighting would attach here at the token-weighting step.
11
+ */
12
+ function tokenize(text) {
13
+ const out = new Set();
14
+ for (const raw of text.toLowerCase().split(/\s+/)) {
15
+ const tok = raw.replace(/^[.,!?;:"'()\[\]{}<>]+|[.,!?;:"'()\[\]{}<>]+$/g, '');
16
+ if (tok)
17
+ out.add(tok);
18
+ }
19
+ return out;
20
+ }
21
+ /**
22
+ * Built-in keyword-overlap reranker.
23
+ *
24
+ * Blends cosine similarity with word overlap and returns the index of the
25
+ * best candidate.
26
+ *
27
+ * @param compare
28
+ * `"prompt"` – overlap of the incoming query against each candidate's stored
29
+ * prompt. Equivalence signal. Catches entity mismatches
30
+ * (e.g. "weather in Paris" vs "weather in Berlin"). Default.
31
+ * `"response"` – overlap of the incoming query against each candidate's cached
32
+ * response. Relevance signal.
33
+ *
34
+ * @param cosineWeight
35
+ * Weight on cosine similarity in [0, 1]. Overlap weight is `1 - cosineWeight`.
36
+ * Default: 0.7 (overlap 0.3).
37
+ *
38
+ * Candidate objects carry: `similarity` (cosine distance, lower = more similar),
39
+ * `response` (string), and `prompt` (string, stored prompt).
40
+ */
41
+ function createKeywordOverlapRerank(options) {
42
+ const compare = options?.compare ?? 'prompt';
43
+ const cosineWeight = options?.cosineWeight ?? 0.7;
44
+ if (cosineWeight < 0 || cosineWeight > 1) {
45
+ throw new Error('cosineWeight must be in [0, 1]');
46
+ }
47
+ const overlapWeight = 1.0 - cosineWeight;
48
+ return async (query, candidates) => {
49
+ const queryTokens = tokenize(query);
50
+ let bestIdx = 0;
51
+ let bestScore = -Infinity;
52
+ for (let i = 0; i < candidates.length; i++) {
53
+ const text = candidates[i][compare] ?? '';
54
+ const candTokens = tokenize(text);
55
+ let overlap = 0;
56
+ if (queryTokens.size > 0) {
57
+ let intersection = 0;
58
+ for (const t of queryTokens) {
59
+ if (candTokens.has(t))
60
+ intersection++;
61
+ }
62
+ overlap = intersection / queryTokens.size;
63
+ }
64
+ const cosineSim = 1.0 - candidates[i].similarity;
65
+ const score = cosineWeight * cosineSim + overlapWeight * overlap;
66
+ if (score > bestScore) {
67
+ bestScore = score;
68
+ bestIdx = i;
69
+ }
70
+ }
71
+ return bestIdx;
72
+ };
73
+ }
package/dist/types.d.ts CHANGED
@@ -127,6 +127,7 @@ export interface RerankOptions {
127
127
  rerankFn: (query: string, candidates: Array<{
128
128
  response: string;
129
129
  similarity: number;
130
+ prompt: string;
130
131
  }>) => Promise<number>;
131
132
  }
132
133
  /**
@@ -162,6 +163,8 @@ export interface JudgeOptions {
162
163
  similarity: number;
163
164
  threshold: number;
164
165
  category: string | undefined;
166
+ /** The stored prompt text for the matched entry. */
167
+ cachedPrompt: string;
165
168
  }) => Promise<boolean>;
166
169
  /**
167
170
  * Behavior when judgeFn throws or exceeds timeoutMs.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@betterdb/semantic-cache",
3
- "version": "0.5.0",
3
+ "version": "0.6.0",
4
4
  "description": "Valkey-native semantic cache for LLM applications with built-in OpenTelemetry and Prometheus instrumentation",
5
5
  "keywords": [
6
6
  "valkey",
@@ -103,13 +103,13 @@
103
103
  },
104
104
  "dependencies": {
105
105
  "@opentelemetry/api": "^1.9.0",
106
+ "posthog-node": ">=4.0.0",
106
107
  "prom-client": "^15.1.3"
107
108
  },
108
109
  "engines": {
109
110
  "node": ">=20.0.0"
110
111
  },
111
112
  "peerDependencies": {
112
- "posthog-node": ">=4.0.0",
113
113
  "@anthropic-ai/sdk": ">=0.90.0",
114
114
  "@langchain/core": ">=0.3.0",
115
115
  "@langchain/langgraph-checkpoint": ">=0.1.0",
@@ -119,9 +119,6 @@
119
119
  "openai": ">=6.0.0"
120
120
  },
121
121
  "peerDependenciesMeta": {
122
- "posthog-node": {
123
- "optional": true
124
- },
125
122
  "@anthropic-ai/sdk": {
126
123
  "optional": true
127
124
  },