@betterdb/semantic-cache 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2026-present BetterDB Inc.
2
+
3
+ Portions of this software are licensed as follows:
4
+
5
+ - All content residing under the "doc/" directory of this repository is licensed under the "Creative Commons: CC BY-SA 4.0 license".
6
+
7
+ - All content that resides under the "proprietary/" directory of this repository, if that directory exists, is licensed under the license defined in "proprietary/LICENSE".
8
+
9
+ - All third-party components incorporated into the BetterDB Software are licensed under the original license provided by the owner of the applicable component.
10
+
11
+ - Content outside of the above-mentioned directories or restrictions above is available under the "MIT Expat" license as defined below.
12
+
13
+ MIT License
14
+
15
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -8,6 +8,7 @@ export declare class SemanticCache {
8
8
  private readonly entryPrefix;
9
9
  private readonly statsKey;
10
10
  private readonly similarityWindowKey;
11
+ private readonly missPendingKey;
11
12
  private readonly configKey;
12
13
  private defaultThreshold;
13
14
  private readonly defaultTtl;
@@ -159,10 +160,21 @@ export declare class SemanticCache {
159
160
  private recordStat;
160
161
  /** Append to the rolling similarity window sorted set and trim to 10,000 entries or 7 days. */
161
162
  private recordSimilarityWindow;
163
+ /**
164
+ * Track a miss so a subsequent store() can backfill its cost into the
165
+ * similarity-window record. Bounded by a 5-minute TTL on the bookkeeping
166
+ * zset — entries beyond that are pruned on every record and backfill.
167
+ */
168
+ private recordMissPending;
169
+ /**
170
+ * After a successful store(), find the oldest pending miss for the same
171
+ * query and update its similarity-window record with the now-known cost.
172
+ * Best-effort — silently no-op if no pending miss exists or the bookkeeping
173
+ * entry has already been pruned.
174
+ */
175
+ private applyCostToPendingMiss;
162
176
  private assertInitialized;
163
177
  private assertDimension;
164
- private isIndexNotFoundError;
165
- private parseDimensionFromInfo;
166
178
  }
167
179
  export interface ThresholdEffectivenessResult {
168
180
  category: string;
@@ -6,6 +6,7 @@ const node_crypto_2 = require("node:crypto");
6
6
  const api_1 = require("@opentelemetry/api");
7
7
  const errors_1 = require("./errors");
8
8
  const telemetry_1 = require("./telemetry");
9
+ const valkey_search_kit_1 = require("@betterdb/valkey-search-kit");
9
10
  const utils_1 = require("./utils");
10
11
  const defaultCostTable_1 = require("./defaultCostTable");
11
12
  const cluster_1 = require("./cluster");
@@ -16,6 +17,19 @@ const PACKAGE_VERSION = require('../package.json').version;
16
17
  function errMsg(err) {
17
18
  return err instanceof Error ? err.message : String(err);
18
19
  }
20
+ function parseHitCostMicros(raw) {
21
+ if (raw === undefined || raw === null) {
22
+ return null;
23
+ }
24
+ const n = parseInt(raw, 10);
25
+ if (!Number.isFinite(n) || n < 0) {
26
+ return null;
27
+ }
28
+ return n;
29
+ }
30
+ function correlationIdFor(prompt) {
31
+ return (0, node_crypto_2.createHash)('sha256').update(prompt).digest('hex').slice(0, 16);
32
+ }
19
33
  class SemanticCache {
20
34
  client;
21
35
  embedFn;
@@ -24,6 +38,7 @@ class SemanticCache {
24
38
  entryPrefix;
25
39
  statsKey;
26
40
  similarityWindowKey;
41
+ missPendingKey;
27
42
  configKey;
28
43
  defaultThreshold;
29
44
  defaultTtl;
@@ -68,6 +83,7 @@ class SemanticCache {
68
83
  this.entryPrefix = `${this.name}:entry:`;
69
84
  this.statsKey = `${this.name}:__stats`;
70
85
  this.similarityWindowKey = `${this.name}:__similarity_window`;
86
+ this.missPendingKey = `${this.name}:__miss_pending`;
71
87
  this.configKey = `${this.name}:__config`;
72
88
  this.embedKeyPrefix = `${this.name}:embed:`;
73
89
  this.defaultThreshold = options.defaultThreshold ?? 0.1;
@@ -136,15 +152,12 @@ class SemanticCache {
136
152
  await this.client.call('FT.DROPINDEX', this.indexName);
137
153
  }
138
154
  catch (err) {
139
- if (!this.isIndexNotFoundError(err)) {
155
+ if (!(0, valkey_search_kit_1.isIndexNotFoundError)(err)) {
140
156
  throw new errors_1.ValkeyCommandError('FT.DROPINDEX', err);
141
157
  }
142
158
  }
143
159
  // Cluster-aware SCAN for entry keys and embed cache keys
144
- const patterns = [
145
- `${this.name}:entry:*`,
146
- `${this.name}:embed:*`,
147
- ];
160
+ const patterns = [`${this.name}:entry:*`, `${this.name}:embed:*`];
148
161
  for (const pattern of patterns) {
149
162
  await (0, cluster_1.clusterScan)(this.client, pattern, async (keys, nodeClient) => {
150
163
  await nodeClient.del(keys);
@@ -152,6 +165,7 @@ class SemanticCache {
152
165
  }
153
166
  await this.client.del(this.statsKey);
154
167
  await this.client.del(this.similarityWindowKey);
168
+ await this.client.del(this.missPendingKey);
155
169
  this.analytics.capture('cache_flush');
156
170
  }
157
171
  /**
@@ -212,9 +226,9 @@ class SemanticCache {
212
226
  const userFilter = options?.filter;
213
227
  // AND semantics: each ref must be present — chain separate TAG clauses.
214
228
  const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
215
- ? (binaryRefs.length === 1
229
+ ? binaryRefs.length === 1
216
230
  ? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
217
- : binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' '))
231
+ : binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' ')
218
232
  : null;
219
233
  const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
220
234
  const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
@@ -230,15 +244,18 @@ class SemanticCache {
230
244
  const searchMs = performance.now() - searchStart;
231
245
  const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
232
246
  const categoryLabel = category || 'none';
233
- const timingAttrs = { 'embedding_latency_ms': embedSec * 1000, 'search_latency_ms': searchMs };
247
+ const timingAttrs = { embedding_latency_ms: embedSec * 1000, search_latency_ms: searchMs };
234
248
  // No candidates at all
235
249
  if (parsed.length === 0) {
236
250
  await this.recordStat('misses');
237
251
  this.telemetry.metrics.requestsTotal
238
- .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
252
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
253
+ .inc();
239
254
  span.setAttributes({
240
- 'cache.hit': false, 'cache.name': this.name,
241
- 'cache.category': categoryLabel, ...timingAttrs,
255
+ 'cache.hit': false,
256
+ 'cache.name': this.name,
257
+ 'cache.category': categoryLabel,
258
+ ...timingAttrs,
242
259
  });
243
260
  return { hit: false, confidence: 'miss' };
244
261
  }
@@ -246,19 +263,24 @@ class SemanticCache {
246
263
  const score = scoreStr !== undefined ? parseFloat(scoreStr) : NaN;
247
264
  if (!isNaN(score)) {
248
265
  this.telemetry.metrics.similarityScore
249
- .labels({ cache_name: this.name, category: categoryLabel }).observe(score);
266
+ .labels({ cache_name: this.name, category: categoryLabel })
267
+ .observe(score);
250
268
  }
251
269
  // Miss (no usable score, or score exceeds threshold)
252
270
  if (isNaN(score) || score > threshold) {
253
271
  if (!isNaN(score)) {
254
- await this.recordSimilarityWindow(score, 'miss', category);
272
+ const missMember = await this.recordSimilarityWindow(score, 'miss', category, null);
273
+ await this.recordMissPending(promptText, missMember);
255
274
  }
256
275
  await this.recordStat('misses');
257
276
  this.telemetry.metrics.requestsTotal
258
- .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
277
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
278
+ .inc();
259
279
  span.setAttributes({
260
- 'cache.hit': false, 'cache.name': this.name,
261
- 'cache.category': categoryLabel, ...timingAttrs,
280
+ 'cache.hit': false,
281
+ 'cache.name': this.name,
282
+ 'cache.category': categoryLabel,
283
+ ...timingAttrs,
262
284
  ...(isNaN(score) ? {} : { 'cache.similarity': score, 'cache.threshold': threshold }),
263
285
  });
264
286
  const result = { hit: false, confidence: 'miss' };
@@ -278,17 +300,23 @@ class SemanticCache {
278
300
  .filter(({ s }) => !isNaN(s))
279
301
  .map(({ i, s }) => ({
280
302
  origIdx: i,
281
- candidate: { response: parsed[i].fields['response'] ?? '', similarity: s },
303
+ candidate: { response: parsed[i].fields['response'] ?? '', similarity: s, prompt: parsed[i].fields['prompt'] ?? '' },
282
304
  }));
283
305
  const picked = await rerankOpts.rerankFn(promptText, indexedCandidates.map((x) => x.candidate));
284
306
  // Explicit bounds check: -1 means "reject all"; out-of-range is a caller bug
285
307
  // treated as a miss rather than silently falling back to the top candidate.
286
308
  if (picked === -1 || picked < 0 || picked >= indexedCandidates.length) {
287
- await this.recordSimilarityWindow(score, 'miss', category);
309
+ const missMember = await this.recordSimilarityWindow(score, 'miss', category, null);
310
+ await this.recordMissPending(promptText, missMember);
288
311
  await this.recordStat('misses');
289
312
  this.telemetry.metrics.requestsTotal
290
- .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
291
- span.setAttributes({ 'cache.hit': false, 'cache.name': this.name, 'cache.reranked': true });
313
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
314
+ .inc();
315
+ span.setAttributes({
316
+ 'cache.hit': false,
317
+ 'cache.name': this.name,
318
+ 'cache.reranked': true,
319
+ });
292
320
  return { hit: false, confidence: 'miss' };
293
321
  }
294
322
  // Map back to the original parsed[] index (not the candidates[] index)
@@ -304,12 +332,16 @@ class SemanticCache {
304
332
  try {
305
333
  await this.client.del(winner.key);
306
334
  }
307
- catch { /* best effort */ }
308
- await this.recordSimilarityWindow(winnerScore, 'miss', category);
335
+ catch {
336
+ /* best effort */
337
+ }
338
+ const missMember = await this.recordSimilarityWindow(winnerScore, 'miss', category, null);
339
+ await this.recordMissPending(promptText, missMember);
309
340
  this.telemetry.metrics.staleModelEvictions.labels({ cache_name: this.name }).inc();
310
341
  await this.recordStat('misses');
311
342
  this.telemetry.metrics.requestsTotal
312
- .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
343
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
344
+ .inc();
313
345
  span.setAttributes({ 'cache.hit': false, 'cache.stale_evicted': true });
314
346
  return { hit: false, confidence: 'miss' };
315
347
  }
@@ -330,6 +362,8 @@ class SemanticCache {
330
362
  similarity: winnerScore,
331
363
  threshold,
332
364
  category: category || undefined,
365
+ // Reserved for consumer judge functions; not consumed by the built-in judge path.
366
+ cachedPrompt: winner.fields['prompt'] ?? '',
333
367
  }), timeoutMs);
334
368
  decision = accepted ? 'accept' : 'reject';
335
369
  }
@@ -363,7 +397,8 @@ class SemanticCache {
363
397
  }
364
398
  else {
365
399
  // reject / error_reject / timeout_reject → treat as miss
366
- await this.recordSimilarityWindow(winnerScore, 'miss', category);
400
+ const missMember = await this.recordSimilarityWindow(winnerScore, 'miss', category, null);
401
+ await this.recordMissPending(promptText, missMember);
367
402
  await this.recordStat('misses');
368
403
  this.telemetry.metrics.requestsTotal
369
404
  .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
@@ -387,27 +422,26 @@ class SemanticCache {
387
422
  }
388
423
  }
389
424
  // --- End judge ---
425
+ const hitCostMicros = parseHitCostMicros(winner.fields['cost_micros']);
390
426
  // Record as genuine hit (moved here from before the judge block)
391
- await this.recordSimilarityWindow(winnerScore, 'hit', category);
427
+ await this.recordSimilarityWindow(winnerScore, 'hit', category, hitCostMicros);
392
428
  await this.recordStat('hits');
393
429
  const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
394
430
  this.telemetry.metrics.requestsTotal
395
- .labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
431
+ .labels({ cache_name: this.name, result: metricResult, category: categoryLabel })
432
+ .inc();
396
433
  if (this.defaultTtl !== undefined && matchedKey) {
397
434
  await this.client.expire(matchedKey, this.defaultTtl);
398
435
  }
399
436
  // Cost saved
400
437
  let costSaved;
401
- const costMicrosStr = winner.fields['cost_micros'];
402
- if (costMicrosStr) {
403
- const costMicros = parseInt(costMicrosStr, 10);
404
- if (!isNaN(costMicros) && costMicros > 0) {
405
- costSaved = costMicros / 1_000_000;
406
- // Atomically increment cost_saved_micros in stats
407
- await this.client.hincrby(this.statsKey, 'cost_saved_micros', costMicros);
408
- this.telemetry.metrics.costSavedTotal
409
- .labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
410
- }
438
+ if (hitCostMicros !== null) {
439
+ costSaved = hitCostMicros / 1_000_000;
440
+ // Atomically increment cost_saved_micros in stats
441
+ await this.client.hincrby(this.statsKey, 'cost_saved_micros', hitCostMicros);
442
+ this.telemetry.metrics.costSavedTotal
443
+ .labels({ cache_name: this.name, category: categoryLabel })
444
+ .inc(costSaved);
411
445
  }
412
446
  // Content blocks
413
447
  let contentBlocks;
@@ -416,16 +450,25 @@ class SemanticCache {
416
450
  try {
417
451
  contentBlocks = JSON.parse(contentBlocksStr);
418
452
  }
419
- catch { /* ignore parse errors */ }
453
+ catch {
454
+ /* ignore parse errors */
455
+ }
420
456
  }
421
457
  span.setAttributes({
422
- 'cache.hit': true, 'cache.similarity': winnerScore, 'cache.threshold': threshold,
423
- 'cache.confidence': confidence, 'cache.matched_key': matchedKey,
424
- 'cache.category': categoryLabel, ...timingAttrs,
458
+ 'cache.hit': true,
459
+ 'cache.similarity': winnerScore,
460
+ 'cache.threshold': threshold,
461
+ 'cache.confidence': confidence,
462
+ 'cache.matched_key': matchedKey,
463
+ 'cache.category': categoryLabel,
464
+ ...timingAttrs,
425
465
  });
426
466
  const result = {
427
- hit: true, response: winner.fields['response'],
428
- similarity: winnerScore, confidence, matchedKey,
467
+ hit: true,
468
+ response: winner.fields['response'],
469
+ similarity: winnerScore,
470
+ confidence,
471
+ matchedKey,
429
472
  };
430
473
  if (costSaved !== undefined)
431
474
  result.costSaved = costSaved;
@@ -451,8 +494,9 @@ class SemanticCache {
451
494
  this.costTable) {
452
495
  const pricing = this.costTable[options.model];
453
496
  if (pricing) {
454
- costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
455
- options.outputTokens * pricing.outputPer1k / 1000) * 1_000_000);
497
+ costMicros = Math.round(((options.inputTokens * pricing.inputPer1k) / 1000 +
498
+ (options.outputTokens * pricing.outputPer1k) / 1000) *
499
+ 1_000_000);
456
500
  }
457
501
  }
458
502
  const hashFields = {
@@ -489,10 +533,16 @@ class SemanticCache {
489
533
  if (ttl !== undefined)
490
534
  await this.client.expire(entryKey, ttl);
491
535
  span.setAttributes({
492
- 'cache.name': this.name, 'cache.key': entryKey, 'cache.ttl': ttl ?? -1,
493
- 'cache.category': category || 'none', 'cache.model': model || 'none',
494
- 'embedding_latency_ms': embedSec * 1000,
536
+ 'cache.name': this.name,
537
+ 'cache.key': entryKey,
538
+ 'cache.ttl': ttl ?? -1,
539
+ 'cache.category': category || 'none',
540
+ 'cache.model': model || 'none',
541
+ embedding_latency_ms: embedSec * 1000,
495
542
  });
543
+ if (costMicros !== undefined && costMicros >= 0) {
544
+ await this.applyCostToPendingMiss(promptText, costMicros);
545
+ }
496
546
  return entryKey;
497
547
  });
498
548
  }
@@ -512,11 +562,15 @@ class SemanticCache {
512
562
  const category = options?.category ?? '';
513
563
  const model = options?.model ?? '';
514
564
  let costMicros;
515
- if (options?.model && options?.inputTokens !== undefined && options?.outputTokens !== undefined && this.costTable) {
565
+ if (options?.model &&
566
+ options?.inputTokens !== undefined &&
567
+ options?.outputTokens !== undefined &&
568
+ this.costTable) {
516
569
  const pricing = this.costTable[options.model];
517
570
  if (pricing) {
518
- costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
519
- options.outputTokens * pricing.outputPer1k / 1000) * 1_000_000);
571
+ costMicros = Math.round(((options.inputTokens * pricing.inputPer1k) / 1000 +
572
+ (options.outputTokens * pricing.outputPer1k) / 1000) *
573
+ 1_000_000);
520
574
  }
521
575
  }
522
576
  const hashFields = {
@@ -535,8 +589,9 @@ class SemanticCache {
535
589
  if (costMicros !== undefined && costMicros > 0) {
536
590
  hashFields['cost_micros'] = String(costMicros);
537
591
  }
538
- if (options?.temperature !== undefined)
592
+ if (options?.temperature !== undefined) {
539
593
  hashFields['temperature'] = String(options.temperature);
594
+ }
540
595
  if (options?.topP !== undefined)
541
596
  hashFields['top_p'] = String(options.topP);
542
597
  if (options?.seed !== undefined)
@@ -551,10 +606,16 @@ class SemanticCache {
551
606
  if (ttl !== undefined)
552
607
  await this.client.expire(entryKey, ttl);
553
608
  span.setAttributes({
554
- 'cache.name': this.name, 'cache.key': entryKey, 'cache.ttl': ttl ?? -1,
555
- 'cache.category': category || 'none', 'cache.model': model || 'none',
556
- 'embedding_latency_ms': embedSec * 1000,
609
+ 'cache.name': this.name,
610
+ 'cache.key': entryKey,
611
+ 'cache.ttl': ttl ?? -1,
612
+ 'cache.category': category || 'none',
613
+ 'cache.model': model || 'none',
614
+ embedding_latency_ms: embedSec * 1000,
557
615
  });
616
+ if (costMicros !== undefined && costMicros >= 0) {
617
+ await this.applyCostToPendingMiss(promptText, costMicros);
618
+ }
558
619
  return entryKey;
559
620
  });
560
621
  }
@@ -592,9 +653,9 @@ class SemanticCache {
592
653
  const { binaryRefs } = resolved[i];
593
654
  const { vector: embedding } = embeddings[i];
594
655
  const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
595
- ? (binaryRefs.length === 1
656
+ ? binaryRefs.length === 1
596
657
  ? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
597
- : binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' '))
658
+ : binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' ')
598
659
  : null;
599
660
  const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
600
661
  const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
@@ -612,7 +673,8 @@ class SemanticCache {
612
673
  if (err) {
613
674
  await this.recordStat('misses');
614
675
  this.telemetry.metrics.requestsTotal
615
- .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
676
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
677
+ .inc();
616
678
  results.push({ hit: false, confidence: 'miss' });
617
679
  continue;
618
680
  }
@@ -620,7 +682,8 @@ class SemanticCache {
620
682
  if (parsed.length === 0) {
621
683
  await this.recordStat('misses');
622
684
  this.telemetry.metrics.requestsTotal
623
- .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
685
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
686
+ .inc();
624
687
  results.push({ hit: false, confidence: 'miss' });
625
688
  continue;
626
689
  }
@@ -628,11 +691,13 @@ class SemanticCache {
628
691
  const score = scoreStr !== undefined ? parseFloat(scoreStr) : NaN;
629
692
  if (isNaN(score) || score > threshold) {
630
693
  if (!isNaN(score)) {
631
- await this.recordSimilarityWindow(score, 'miss', category);
694
+ const missMember = await this.recordSimilarityWindow(score, 'miss', category, null);
695
+ await this.recordMissPending(resolved[i].text, missMember);
632
696
  }
633
697
  await this.recordStat('misses');
634
698
  this.telemetry.metrics.requestsTotal
635
- .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
699
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
700
+ .inc();
636
701
  const result = { hit: false, confidence: 'miss' };
637
702
  if (!isNaN(score)) {
638
703
  result.similarity = score;
@@ -641,26 +706,25 @@ class SemanticCache {
641
706
  results.push(result);
642
707
  continue;
643
708
  }
644
- await this.recordSimilarityWindow(score, 'hit', category);
709
+ const hitCostMicros = parseHitCostMicros(parsed[0].fields['cost_micros']);
710
+ await this.recordSimilarityWindow(score, 'hit', category, hitCostMicros);
645
711
  const confidence = score >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
646
712
  await this.recordStat('hits');
647
713
  const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
648
714
  this.telemetry.metrics.requestsTotal
649
- .labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
715
+ .labels({ cache_name: this.name, result: metricResult, category: categoryLabel })
716
+ .inc();
650
717
  const matchedKey = parsed[0].key;
651
718
  if (this.defaultTtl !== undefined && matchedKey) {
652
719
  await this.client.expire(matchedKey, this.defaultTtl);
653
720
  }
654
721
  let costSaved;
655
- const costMicrosStr = parsed[0].fields['cost_micros'];
656
- if (costMicrosStr) {
657
- const costMicros = parseInt(costMicrosStr, 10);
658
- if (!isNaN(costMicros) && costMicros > 0) {
659
- costSaved = costMicros / 1_000_000;
660
- await this.client.hincrby(this.statsKey, 'cost_saved_micros', costMicros);
661
- this.telemetry.metrics.costSavedTotal
662
- .labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
663
- }
722
+ if (hitCostMicros !== null) {
723
+ costSaved = hitCostMicros / 1_000_000;
724
+ await this.client.hincrby(this.statsKey, 'cost_saved_micros', hitCostMicros);
725
+ this.telemetry.metrics.costSavedTotal
726
+ .labels({ cache_name: this.name, category: categoryLabel })
727
+ .inc(costSaved);
664
728
  }
665
729
  let contentBlocks;
666
730
  const contentBlocksStr = parsed[0].fields['content_blocks'];
@@ -668,11 +732,16 @@ class SemanticCache {
668
732
  try {
669
733
  contentBlocks = JSON.parse(contentBlocksStr);
670
734
  }
671
- catch { /* ignore */ }
735
+ catch {
736
+ /* ignore */
737
+ }
672
738
  }
673
739
  const result = {
674
- hit: true, response: parsed[0].fields['response'],
675
- similarity: score, confidence, matchedKey,
740
+ hit: true,
741
+ response: parsed[0].fields['response'],
742
+ similarity: score,
743
+ confidence,
744
+ matchedKey,
676
745
  };
677
746
  if (costSaved !== undefined)
678
747
  result.costSaved = costSaved;
@@ -703,8 +772,10 @@ class SemanticCache {
703
772
  const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
704
773
  if (parsed.length === 0) {
705
774
  span.setAttributes({
706
- 'cache.name': this.name, 'cache.filter': filter,
707
- 'cache.deleted_count': 0, 'cache.truncated': false,
775
+ 'cache.name': this.name,
776
+ 'cache.filter': filter,
777
+ 'cache.deleted_count': 0,
778
+ 'cache.truncated': false,
708
779
  });
709
780
  return { deleted: 0, truncated: false };
710
781
  }
@@ -717,8 +788,10 @@ class SemanticCache {
717
788
  throw new errors_1.ValkeyCommandError('DEL', err);
718
789
  }
719
790
  span.setAttributes({
720
- 'cache.name': this.name, 'cache.filter': filter,
721
- 'cache.deleted_count': keys.length, 'cache.truncated': truncated,
791
+ 'cache.name': this.name,
792
+ 'cache.filter': filter,
793
+ 'cache.deleted_count': keys.length,
794
+ 'cache.truncated': truncated,
722
795
  });
723
796
  return { deleted: keys.length, truncated };
724
797
  });
@@ -761,16 +834,7 @@ class SemanticCache {
761
834
  catch (err) {
762
835
  throw new errors_1.ValkeyCommandError('FT.INFO', err);
763
836
  }
764
- const info = raw;
765
- let numDocs = 0;
766
- let indexingState = 'unknown';
767
- for (let i = 0; i < info.length - 1; i += 2) {
768
- const key = String(info[i]);
769
- if (key === 'num_docs')
770
- numDocs = parseInt(String(info[i + 1]), 10) || 0;
771
- else if (key === 'indexing')
772
- indexingState = String(info[i + 1]);
773
- }
837
+ const { numDocs, indexingState } = (0, valkey_search_kit_1.parseFtInfoStats)(raw);
774
838
  return { name: this.indexName, numDocs, dimension: this._dimension, indexingState };
775
839
  }
776
840
  /**
@@ -803,7 +867,9 @@ class SemanticCache {
803
867
  }
804
868
  }
805
869
  }
806
- catch { /* skip corrupt entries */ }
870
+ catch {
871
+ /* skip corrupt entries */
872
+ }
807
873
  }
808
874
  const sampleCount = entries.length;
809
875
  const categoryLabel = category ?? 'all';
@@ -890,11 +956,15 @@ class SemanticCache {
890
956
  if (entry.category)
891
957
  categories.add(entry.category);
892
958
  }
893
- catch { /* skip */ }
959
+ catch {
960
+ /* skip */
961
+ }
894
962
  }
895
963
  const results = await Promise.all([
896
964
  this.thresholdEffectiveness({ minSamples: options?.minSamples }),
897
- ...[...categories].filter(Boolean).map((cat) => this.thresholdEffectiveness({ category: cat, minSamples: options?.minSamples })),
965
+ ...[...categories]
966
+ .filter(Boolean)
967
+ .map((cat) => this.thresholdEffectiveness({ category: cat, minSamples: options?.minSamples })),
898
968
  ]);
899
969
  return results;
900
970
  }
@@ -947,7 +1017,9 @@ class SemanticCache {
947
1017
  }
948
1018
  // -- Internal helpers exposed to package adapters --
949
1019
  /** @internal Default similarity threshold. */
950
- get _defaultThreshold() { return this.defaultThreshold; }
1020
+ get _defaultThreshold() {
1021
+ return this.defaultThreshold;
1022
+ }
951
1023
  /** @internal Test-only getter. */
952
1024
  get _categoryThresholds() {
953
1025
  return this.categoryThresholds;
@@ -980,15 +1052,11 @@ class SemanticCache {
980
1052
  this.refreshConfig()
981
1053
  .then((ok) => {
982
1054
  if (!ok) {
983
- this.telemetry.metrics.configRefreshFailed
984
- .labels({ cache_name: this.name })
985
- .inc();
1055
+ this.telemetry.metrics.configRefreshFailed.labels({ cache_name: this.name }).inc();
986
1056
  }
987
1057
  })
988
1058
  .catch(() => {
989
- this.telemetry.metrics.configRefreshFailed
990
- .labels({ cache_name: this.name })
991
- .inc();
1059
+ this.telemetry.metrics.configRefreshFailed.labels({ cache_name: this.name }).inc();
992
1060
  });
993
1061
  };
994
1062
  // Synchronous first refresh: process started immediately after a proposal
@@ -1044,9 +1112,7 @@ class SemanticCache {
1044
1112
  metadata,
1045
1113
  heartbeatIntervalMs: this.discoveryOptions.heartbeatIntervalMs,
1046
1114
  onWriteFailed: () => {
1047
- this.telemetry.metrics.discoveryWriteFailed
1048
- .labels({ cache_name: this.name })
1049
- .inc();
1115
+ this.telemetry.metrics.discoveryWriteFailed.labels({ cache_name: this.name }).inc();
1050
1116
  },
1051
1117
  });
1052
1118
  await manager.register();
@@ -1099,7 +1165,7 @@ class SemanticCache {
1099
1165
  // Try reading an existing index
1100
1166
  try {
1101
1167
  const info = (await this.client.call('FT.INFO', this.indexName));
1102
- const dim = this.parseDimensionFromInfo(info);
1168
+ const dim = (0, valkey_search_kit_1.parseDimensionFromInfo)(info);
1103
1169
  const hasBinaryRefs = this.parseHasBinaryRefsFromInfo(info);
1104
1170
  if (dim > 0)
1105
1171
  return { dim, hasBinaryRefs };
@@ -1110,7 +1176,7 @@ class SemanticCache {
1110
1176
  catch (err) {
1111
1177
  if (err instanceof errors_1.EmbeddingError)
1112
1178
  throw err;
1113
- if (!this.isIndexNotFoundError(err)) {
1179
+ if (!(0, valkey_search_kit_1.isIndexNotFoundError)(err)) {
1114
1180
  throw new errors_1.ValkeyCommandError('FT.INFO', err);
1115
1181
  }
1116
1182
  }
@@ -1164,7 +1230,8 @@ class SemanticCache {
1164
1230
  const cached = await this.client.getBuffer(embedKey);
1165
1231
  if (cached) {
1166
1232
  this.telemetry.metrics.embeddingCacheTotal
1167
- .labels({ cache_name: this.name, result: 'hit' }).inc();
1233
+ .labels({ cache_name: this.name, result: 'hit' })
1234
+ .inc();
1168
1235
  // Decode Float32 buffer
1169
1236
  const vector = [];
1170
1237
  for (let i = 0; i < cached.length; i += 4) {
@@ -1173,9 +1240,12 @@ class SemanticCache {
1173
1240
  return { vector, durationSec: 0 };
1174
1241
  }
1175
1242
  }
1176
- catch { /* ignore cache read errors */ }
1243
+ catch {
1244
+ /* ignore cache read errors */
1245
+ }
1177
1246
  this.telemetry.metrics.embeddingCacheTotal
1178
- .labels({ cache_name: this.name, result: 'miss' }).inc();
1247
+ .labels({ cache_name: this.name, result: 'miss' })
1248
+ .inc();
1179
1249
  }
1180
1250
  const start = performance.now();
1181
1251
  let vector;
@@ -1186,9 +1256,7 @@ class SemanticCache {
1186
1256
  throw new errors_1.EmbeddingError(`embedFn failed: ${errMsg(err)}`, err);
1187
1257
  }
1188
1258
  const durationSec = (performance.now() - start) / 1000;
1189
- this.telemetry.metrics.embeddingDuration
1190
- .labels({ cache_name: this.name })
1191
- .observe(durationSec);
1259
+ this.telemetry.metrics.embeddingDuration.labels({ cache_name: this.name }).observe(durationSec);
1192
1260
  // Store in embedding cache
1193
1261
  if (this.embeddingCacheEnabled && text) {
1194
1262
  const hash = (0, node_crypto_2.createHash)('sha256').update(text).digest('hex');
@@ -1197,7 +1265,9 @@ class SemanticCache {
1197
1265
  const buf = (0, utils_1.encodeFloat32)(vector);
1198
1266
  await this.client.set(embedKey, buf, 'EX', this.embeddingCacheTtl);
1199
1267
  }
1200
- catch { /* ignore cache write errors */ }
1268
+ catch {
1269
+ /* ignore cache write errors */
1270
+ }
1201
1271
  }
1202
1272
  return { vector, durationSec };
1203
1273
  }
@@ -1235,22 +1305,99 @@ class SemanticCache {
1235
1305
  await pipeline.exec();
1236
1306
  }
1237
1307
  /** Append to the rolling similarity window sorted set and trim to 10,000 entries or 7 days. */
1238
- async recordSimilarityWindow(score, result, category) {
1308
+ async recordSimilarityWindow(score, result, category, costSavedMicros) {
1239
1309
  const now = Date.now();
1240
- // Include a unique nonce so identical (score, result, category) tuples are
1241
- // each recorded as distinct ZADD members instead of overwriting each other.
1242
- const member = JSON.stringify({ score, result, category, _n: Math.random() });
1310
+ const member = JSON.stringify({
1311
+ score,
1312
+ result,
1313
+ category,
1314
+ _n: Math.random(),
1315
+ cost_saved_micros: costSavedMicros,
1316
+ });
1243
1317
  const sevenDaysAgo = now - 7 * 24 * 60 * 60 * 1000;
1244
1318
  try {
1245
1319
  const pipeline = this.client.pipeline();
1246
1320
  pipeline.zadd(this.similarityWindowKey, now, member);
1247
- // Trim by time: remove entries older than 7 days
1248
1321
  pipeline.zremrangebyscore(this.similarityWindowKey, '-inf', sevenDaysAgo);
1249
- // Trim by count: keep at most 10,000 most recent
1250
1322
  pipeline.zremrangebyrank(this.similarityWindowKey, 0, -10001);
1251
1323
  await pipeline.exec();
1252
1324
  }
1253
- catch { /* best effort - never fail on window writes */ }
1325
+ catch {
1326
+ /* best effort - never fail on window writes */
1327
+ }
1328
+ return member;
1329
+ }
1330
+ /**
1331
+ * Track a miss so a subsequent store() can backfill its cost into the
1332
+ * similarity-window record. Bounded by a 5-minute TTL on the bookkeeping
1333
+ * zset — entries beyond that are pruned on every record and backfill.
1334
+ */
1335
+ async recordMissPending(prompt, similarityMember) {
1336
+ const correlationId = correlationIdFor(prompt);
1337
+ const now = Date.now();
1338
+ const fiveMinutesAgo = now - 5 * 60 * 1000;
1339
+ const entry = JSON.stringify({ correlationId, similarityMember });
1340
+ try {
1341
+ await this.client.zadd(this.missPendingKey, now, entry);
1342
+ await this.client.zremrangebyscore(this.missPendingKey, '-inf', `(${fiveMinutesAgo}`);
1343
+ }
1344
+ catch {
1345
+ /* best effort */
1346
+ }
1347
+ }
1348
+ /**
1349
+ * After a successful store(), find the oldest pending miss for the same
1350
+ * query and update its similarity-window record with the now-known cost.
1351
+ * Best-effort — silently no-op if no pending miss exists or the bookkeeping
1352
+ * entry has already been pruned.
1353
+ */
1354
+ async applyCostToPendingMiss(prompt, costMicros) {
1355
+ const correlationId = correlationIdFor(prompt);
1356
+ const fiveMinutesAgo = Date.now() - 5 * 60 * 1000;
1357
+ try {
1358
+ await this.client.zremrangebyscore(this.missPendingKey, '-inf', `(${fiveMinutesAgo}`);
1359
+ const raw = (await this.client.zrange(this.missPendingKey, '0', '-1', 'WITHSCORES'));
1360
+ let matchedEntry = null;
1361
+ let matchedSimilarityMember = null;
1362
+ for (let i = 0; i < raw.length; i += 2) {
1363
+ const entryStr = raw[i];
1364
+ try {
1365
+ const parsed = JSON.parse(entryStr);
1366
+ if (parsed.correlationId === correlationId) {
1367
+ matchedEntry = entryStr;
1368
+ matchedSimilarityMember = parsed.similarityMember;
1369
+ break;
1370
+ }
1371
+ }
1372
+ catch {
1373
+ /* skip malformed */
1374
+ }
1375
+ }
1376
+ if (matchedEntry === null || matchedSimilarityMember === null) {
1377
+ return;
1378
+ }
1379
+ const rawScore = await this.client.zscore(this.similarityWindowKey, matchedSimilarityMember);
1380
+ if (rawScore === null) {
1381
+ await this.client.zrem(this.missPendingKey, matchedEntry);
1382
+ return;
1383
+ }
1384
+ const similarityScore = Number(rawScore);
1385
+ if (!Number.isFinite(similarityScore)) {
1386
+ await this.client.zrem(this.missPendingKey, matchedEntry);
1387
+ return;
1388
+ }
1389
+ const parsedMember = JSON.parse(matchedSimilarityMember);
1390
+ parsedMember.cost_saved_micros = costMicros;
1391
+ const updatedMember = JSON.stringify(parsedMember);
1392
+ const updatePipeline = this.client.pipeline();
1393
+ updatePipeline.zrem(this.similarityWindowKey, matchedSimilarityMember);
1394
+ updatePipeline.zadd(this.similarityWindowKey, similarityScore, updatedMember);
1395
+ updatePipeline.zrem(this.missPendingKey, matchedEntry);
1396
+ await updatePipeline.exec();
1397
+ }
1398
+ catch {
1399
+ /* never fail store() because of bookkeeping */
1400
+ }
1254
1401
  }
1255
1402
  assertInitialized(method) {
1256
1403
  if (!this._initialized) {
@@ -1262,49 +1409,6 @@ class SemanticCache {
1262
1409
  throw new errors_1.SemanticCacheUsageError(`Embedding dimension mismatch: index expects ${this._dimension}, embedFn returned ${embedding.length}. Call flush() then initialize() to rebuild.`);
1263
1410
  }
1264
1411
  }
1265
- isIndexNotFoundError(err) {
1266
- const msg = err instanceof Error ? err.message.toLowerCase() : '';
1267
- return (msg.includes('unknown index name') ||
1268
- msg.includes('no such index') ||
1269
- msg.includes('not found'));
1270
- }
1271
- parseDimensionFromInfo(info) {
1272
- for (let i = 0; i < info.length - 1; i += 2) {
1273
- const key = String(info[i]);
1274
- if (key !== 'attributes' && key !== 'fields')
1275
- continue;
1276
- const attributes = info[i + 1];
1277
- if (!Array.isArray(attributes))
1278
- continue;
1279
- for (const attr of attributes) {
1280
- if (!Array.isArray(attr))
1281
- continue;
1282
- let isVector = false;
1283
- let dim = 0;
1284
- for (let j = 0; j < attr.length - 1; j++) {
1285
- const attrKey = String(attr[j]);
1286
- if (attrKey === 'type' && String(attr[j + 1]) === 'VECTOR')
1287
- isVector = true;
1288
- if (attrKey.toLowerCase() === 'dim')
1289
- dim = parseInt(String(attr[j + 1]), 10) || 0;
1290
- // Valkey Search 1.2 nests dimension inside an 'index' sub-array
1291
- if (attrKey === 'index' && Array.isArray(attr[j + 1])) {
1292
- const indexArr = attr[j + 1];
1293
- for (let k = 0; k < indexArr.length - 1; k++) {
1294
- if (String(indexArr[k]) === 'dimensions') {
1295
- const d = parseInt(String(indexArr[k + 1]), 10) || 0;
1296
- if (d > 0)
1297
- dim = d;
1298
- }
1299
- }
1300
- }
1301
- }
1302
- if (isVector && dim > 0)
1303
- return dim;
1304
- }
1305
- }
1306
- return 0;
1307
- }
1308
1412
  }
1309
1413
  exports.SemanticCache = SemanticCache;
1310
1414
  // --- Judge helpers ---
package/dist/index.d.ts CHANGED
@@ -2,6 +2,7 @@ export { SemanticCache } from './SemanticCache';
2
2
  export type { ThresholdEffectivenessResult } from './SemanticCache';
3
3
  export { DEFAULT_COST_TABLE } from './defaultCostTable';
4
4
  export type { SemanticCacheOptions, CacheCheckOptions, CacheStoreOptions, CacheCheckResult, CacheStats, IndexInfo, InvalidateResult, CacheConfidence, EmbedFn, ModelCost, RerankOptions, JudgeOptions, ConfigRefreshOptions, } from './types';
5
+ export { createKeywordOverlapRerank } from './rerank';
5
6
  export { SemanticCacheUsageError, EmbeddingError, ValkeyCommandError, } from './errors';
6
7
  export type { ContentBlock, TextBlock, BinaryBlock, ToolCallBlock, ToolResultBlock, ReasoningBlock, BlockHints, } from './utils';
7
8
  export { escapeTag } from './utils';
package/dist/index.js CHANGED
@@ -1,10 +1,12 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.defaultNormalizer = exports.composeNormalizer = exports.passthrough = exports.fetchAndHash = exports.hashUrl = exports.hashBytes = exports.hashBase64 = exports.escapeTag = exports.ValkeyCommandError = exports.EmbeddingError = exports.SemanticCacheUsageError = exports.DEFAULT_COST_TABLE = exports.SemanticCache = void 0;
3
+ exports.defaultNormalizer = exports.composeNormalizer = exports.passthrough = exports.fetchAndHash = exports.hashUrl = exports.hashBytes = exports.hashBase64 = exports.escapeTag = exports.ValkeyCommandError = exports.EmbeddingError = exports.SemanticCacheUsageError = exports.createKeywordOverlapRerank = exports.DEFAULT_COST_TABLE = exports.SemanticCache = void 0;
4
4
  var SemanticCache_1 = require("./SemanticCache");
5
5
  Object.defineProperty(exports, "SemanticCache", { enumerable: true, get: function () { return SemanticCache_1.SemanticCache; } });
6
6
  var defaultCostTable_1 = require("./defaultCostTable");
7
7
  Object.defineProperty(exports, "DEFAULT_COST_TABLE", { enumerable: true, get: function () { return defaultCostTable_1.DEFAULT_COST_TABLE; } });
8
+ var rerank_1 = require("./rerank");
9
+ Object.defineProperty(exports, "createKeywordOverlapRerank", { enumerable: true, get: function () { return rerank_1.createKeywordOverlapRerank; } });
8
10
  var errors_1 = require("./errors");
9
11
  Object.defineProperty(exports, "SemanticCacheUsageError", { enumerable: true, get: function () { return errors_1.SemanticCacheUsageError; } });
10
12
  Object.defineProperty(exports, "EmbeddingError", { enumerable: true, get: function () { return errors_1.EmbeddingError; } });
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Built-in rerank factories for @betterdb/semantic-cache.
3
+ */
4
+ /**
5
+ * Built-in keyword-overlap reranker.
6
+ *
7
+ * Blends cosine similarity with word overlap and returns the index of the
8
+ * best candidate.
9
+ *
10
+ * @param compare
11
+ * `"prompt"` – overlap of the incoming query against each candidate's stored
12
+ * prompt. Equivalence signal. Catches entity mismatches
13
+ * (e.g. "weather in Paris" vs "weather in Berlin"). Default.
14
+ * `"response"` – overlap of the incoming query against each candidate's cached
15
+ * response. Relevance signal.
16
+ *
17
+ * @param cosineWeight
18
+ * Weight on cosine similarity in [0, 1]. Overlap weight is `1 - cosineWeight`.
19
+ * Default: 0.7 (overlap 0.3).
20
+ *
21
+ * Candidate objects carry: `similarity` (cosine distance, lower = more similar),
22
+ * `response` (string), and `prompt` (string, stored prompt).
23
+ */
24
+ export declare function createKeywordOverlapRerank(options?: {
25
+ compare?: 'prompt' | 'response';
26
+ cosineWeight?: number;
27
+ }): (query: string, candidates: Array<{
28
+ response: string;
29
+ similarity: number;
30
+ prompt: string;
31
+ }>) => Promise<number>;
package/dist/rerank.js ADDED
@@ -0,0 +1,73 @@
1
+ "use strict";
2
+ /**
3
+ * Built-in rerank factories for @betterdb/semantic-cache.
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.createKeywordOverlapRerank = createKeywordOverlapRerank;
7
+ /**
8
+ * Tokenize: lowercase, split on whitespace, strip surrounding punctuation.
9
+ * Deterministic and dependency-free.
10
+ * IDF weighting would attach here at the token-weighting step.
11
+ */
12
+ function tokenize(text) {
13
+ const out = new Set();
14
+ for (const raw of text.toLowerCase().split(/\s+/)) {
15
+ const tok = raw.replace(/^[.,!?;:"'()\[\]{}<>]+|[.,!?;:"'()\[\]{}<>]+$/g, '');
16
+ if (tok)
17
+ out.add(tok);
18
+ }
19
+ return out;
20
+ }
21
+ /**
22
+ * Built-in keyword-overlap reranker.
23
+ *
24
+ * Blends cosine similarity with word overlap and returns the index of the
25
+ * best candidate.
26
+ *
27
+ * @param compare
28
+ * `"prompt"` – overlap of the incoming query against each candidate's stored
29
+ * prompt. Equivalence signal. Catches entity mismatches
30
+ * (e.g. "weather in Paris" vs "weather in Berlin"). Default.
31
+ * `"response"` – overlap of the incoming query against each candidate's cached
32
+ * response. Relevance signal.
33
+ *
34
+ * @param cosineWeight
35
+ * Weight on cosine similarity in [0, 1]. Overlap weight is `1 - cosineWeight`.
36
+ * Default: 0.7 (overlap 0.3).
37
+ *
38
+ * Candidate objects carry: `similarity` (cosine distance, lower = more similar),
39
+ * `response` (string), and `prompt` (string, stored prompt).
40
+ */
41
+ function createKeywordOverlapRerank(options) {
42
+ const compare = options?.compare ?? 'prompt';
43
+ const cosineWeight = options?.cosineWeight ?? 0.7;
44
+ if (cosineWeight < 0 || cosineWeight > 1) {
45
+ throw new Error('cosineWeight must be in [0, 1]');
46
+ }
47
+ const overlapWeight = 1.0 - cosineWeight;
48
+ return async (query, candidates) => {
49
+ const queryTokens = tokenize(query);
50
+ let bestIdx = 0;
51
+ let bestScore = -Infinity;
52
+ for (let i = 0; i < candidates.length; i++) {
53
+ const text = candidates[i][compare] ?? '';
54
+ const candTokens = tokenize(text);
55
+ let overlap = 0;
56
+ if (queryTokens.size > 0) {
57
+ let intersection = 0;
58
+ for (const t of queryTokens) {
59
+ if (candTokens.has(t))
60
+ intersection++;
61
+ }
62
+ overlap = intersection / queryTokens.size;
63
+ }
64
+ const cosineSim = 1.0 - candidates[i].similarity;
65
+ const score = cosineWeight * cosineSim + overlapWeight * overlap;
66
+ if (score > bestScore) {
67
+ bestScore = score;
68
+ bestIdx = i;
69
+ }
70
+ }
71
+ return bestIdx;
72
+ };
73
+ }
package/dist/types.d.ts CHANGED
@@ -127,6 +127,7 @@ export interface RerankOptions {
127
127
  rerankFn: (query: string, candidates: Array<{
128
128
  response: string;
129
129
  similarity: number;
130
+ prompt: string;
130
131
  }>) => Promise<number>;
131
132
  }
132
133
  /**
@@ -162,6 +163,8 @@ export interface JudgeOptions {
162
163
  similarity: number;
163
164
  threshold: number;
164
165
  category: string | undefined;
166
+ /** The stored prompt text for the matched entry. */
167
+ cachedPrompt: string;
165
168
  }) => Promise<boolean>;
166
169
  /**
167
170
  * Behavior when judgeFn throws or exceeds timeoutMs.
package/dist/utils.d.ts CHANGED
@@ -1,10 +1,6 @@
1
1
  /** SHA-256 hex digest of a string. */
2
2
  export declare function sha256(text: string): string;
3
- /** Escape a string for safe use as a Valkey Search TAG filter value.
4
- * Spaces are included because Valkey Search treats unescaped spaces as term
5
- * separators (OR semantics), which would broaden the filter unintentionally.
6
- */
7
- export declare function escapeTag(value: string): string;
3
+ export { escapeTag, encodeFloat32, parseFtSearchResponse } from '@betterdb/valkey-search-kit';
8
4
  export type ContentBlock = TextBlock | BinaryBlock | ToolCallBlock | ToolResultBlock | ReasoningBlock;
9
5
  export interface TextBlock {
10
6
  type: 'text';
@@ -58,26 +54,3 @@ export declare function extractText(blocks: ContentBlock[]): string;
58
54
  * Used for the binary_refs TAG field on cache entries.
59
55
  */
60
56
  export declare function extractBinaryRefs(blocks: ContentBlock[]): string[];
61
- /**
62
- * Encode number[] as a little-endian Float32 Buffer.
63
- * Used to store embeddings as binary HSET field values.
64
- */
65
- export declare function encodeFloat32(vec: number[]): Buffer;
66
- /**
67
- * Parse a raw FT.SEARCH response from iovalkey's client.call().
68
- *
69
- * iovalkey returns FT.SEARCH results in the following shape:
70
- * [totalCount, key1, [field1, val1, field2, val2, ...], key2, [...], ...]
71
- *
72
- * - totalCount is a string (e.g. "2")
73
- * - Each key is a string
74
- * - Each field list is a flat string array: [fieldName, value, fieldName, value, ...]
75
- *
76
- * Returns an array of { key: string, fields: Record<string, string> }.
77
- * Returns [] if totalCount is "0" or the response is empty/malformed.
78
- * Never throws — on any parse error, returns [].
79
- */
80
- export declare function parseFtSearchResponse(raw: unknown): Array<{
81
- key: string;
82
- fields: Record<string, string>;
83
- }>;
package/dist/utils.js CHANGED
@@ -1,23 +1,18 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.parseFtSearchResponse = exports.encodeFloat32 = exports.escapeTag = void 0;
3
4
  exports.sha256 = sha256;
4
- exports.escapeTag = escapeTag;
5
5
  exports.extractText = extractText;
6
6
  exports.extractBinaryRefs = extractBinaryRefs;
7
- exports.encodeFloat32 = encodeFloat32;
8
- exports.parseFtSearchResponse = parseFtSearchResponse;
9
7
  const node_crypto_1 = require("node:crypto");
10
8
  /** SHA-256 hex digest of a string. */
11
9
  function sha256(text) {
12
10
  return (0, node_crypto_1.createHash)('sha256').update(text).digest('hex');
13
11
  }
14
- /** Escape a string for safe use as a Valkey Search TAG filter value.
15
- * Spaces are included because Valkey Search treats unescaped spaces as term
16
- * separators (OR semantics), which would broaden the filter unintentionally.
17
- */
18
- function escapeTag(value) {
19
- return value.replace(/[,.<>{}[\]"':;!@#$%^&*()\-+=~|/\\ ]/g, '\\$&');
20
- }
12
+ var valkey_search_kit_1 = require("@betterdb/valkey-search-kit");
13
+ Object.defineProperty(exports, "escapeTag", { enumerable: true, get: function () { return valkey_search_kit_1.escapeTag; } });
14
+ Object.defineProperty(exports, "encodeFloat32", { enumerable: true, get: function () { return valkey_search_kit_1.encodeFloat32; } });
15
+ Object.defineProperty(exports, "parseFtSearchResponse", { enumerable: true, get: function () { return valkey_search_kit_1.parseFtSearchResponse; } });
21
16
  /**
22
17
  * Extract all text from a ContentBlock array, joining TextBlock.text values with a space.
23
18
  * Used to derive the embedding text from a multi-modal prompt.
@@ -38,70 +33,3 @@ function extractBinaryRefs(blocks) {
38
33
  .map((b) => b.ref)
39
34
  .sort();
40
35
  }
41
- /**
42
- * Encode number[] as a little-endian Float32 Buffer.
43
- * Used to store embeddings as binary HSET field values.
44
- */
45
- function encodeFloat32(vec) {
46
- const buf = Buffer.alloc(vec.length * 4);
47
- for (let i = 0; i < vec.length; i++) {
48
- buf.writeFloatLE(vec[i], i * 4);
49
- }
50
- return buf;
51
- }
52
- /**
53
- * Parse a raw FT.SEARCH response from iovalkey's client.call().
54
- *
55
- * iovalkey returns FT.SEARCH results in the following shape:
56
- * [totalCount, key1, [field1, val1, field2, val2, ...], key2, [...], ...]
57
- *
58
- * - totalCount is a string (e.g. "2")
59
- * - Each key is a string
60
- * - Each field list is a flat string array: [fieldName, value, fieldName, value, ...]
61
- *
62
- * Returns an array of { key: string, fields: Record<string, string> }.
63
- * Returns [] if totalCount is "0" or the response is empty/malformed.
64
- * Never throws — on any parse error, returns [].
65
- */
66
- function parseFtSearchResponse(raw) {
67
- try {
68
- if (!Array.isArray(raw) || raw.length < 1) {
69
- return [];
70
- }
71
- const totalCount = typeof raw[0] === 'string' ? parseInt(raw[0], 10) : Number(raw[0]);
72
- if (!totalCount || totalCount <= 0) {
73
- return [];
74
- }
75
- const results = [];
76
- let i = 1;
77
- while (i < raw.length) {
78
- const key = raw[i];
79
- if (typeof key !== 'string') {
80
- i++;
81
- continue;
82
- }
83
- const fieldList = raw[i + 1];
84
- const fields = {};
85
- if (Array.isArray(fieldList)) {
86
- const len = fieldList.length - (fieldList.length % 2);
87
- for (let j = 0; j < len; j += 2) {
88
- const fieldName = String(fieldList[j]);
89
- const fieldValue = String(fieldList[j + 1]);
90
- fields[fieldName] = fieldValue;
91
- }
92
- i += 2;
93
- }
94
- else {
95
- // No field list follows the key (e.g. RETURN 0 mode)
96
- results.push({ key, fields });
97
- i++;
98
- continue;
99
- }
100
- results.push({ key, fields });
101
- }
102
- return results;
103
- }
104
- catch {
105
- return [];
106
- }
107
- }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@betterdb/semantic-cache",
3
- "version": "0.5.1",
3
+ "version": "0.7.0",
4
4
  "description": "Valkey-native semantic cache for LLM applications with built-in OpenTelemetry and Prometheus instrumentation",
5
5
  "keywords": [
6
6
  "valkey",
@@ -93,18 +93,11 @@
93
93
  "dist",
94
94
  "README.md"
95
95
  ],
96
- "scripts": {
97
- "build": "tsc && node scripts/inject-telemetry-defaults.mjs",
98
- "typecheck": "tsc --noEmit",
99
- "test": "vitest run",
100
- "test:watch": "vitest",
101
- "clean": "rm -rf dist",
102
- "update:pricing": "node scripts/update-model-prices.mjs"
103
- },
104
96
  "dependencies": {
105
97
  "@opentelemetry/api": "^1.9.0",
106
98
  "posthog-node": ">=4.0.0",
107
- "prom-client": "^15.1.3"
99
+ "prom-client": "^15.1.3",
100
+ "@betterdb/valkey-search-kit": "0.1.0"
108
101
  },
109
102
  "engines": {
110
103
  "node": ">=20.0.0"
@@ -137,5 +130,13 @@
137
130
  "openai": {
138
131
  "optional": true
139
132
  }
133
+ },
134
+ "scripts": {
135
+ "build": "tsc && node scripts/inject-telemetry-defaults.mjs",
136
+ "typecheck": "tsc --noEmit",
137
+ "test": "vitest run",
138
+ "test:watch": "vitest",
139
+ "clean": "rm -rf dist",
140
+ "update:pricing": "node scripts/update-model-prices.mjs"
140
141
  }
141
- }
142
+ }