@betterdb/semantic-cache 0.1.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +233 -124
  2. package/dist/SemanticCache.d.ts +127 -7
  3. package/dist/SemanticCache.js +867 -48
  4. package/dist/adapters/ai.js +6 -1
  5. package/dist/adapters/anthropic.d.ts +32 -0
  6. package/dist/adapters/anthropic.js +94 -0
  7. package/dist/adapters/langchain.js +6 -1
  8. package/dist/adapters/langgraph.d.ts +104 -0
  9. package/dist/adapters/langgraph.js +271 -0
  10. package/dist/adapters/llamaindex.d.ts +32 -0
  11. package/dist/adapters/llamaindex.js +76 -0
  12. package/dist/adapters/openai-responses.d.ts +31 -0
  13. package/dist/adapters/openai-responses.js +112 -0
  14. package/dist/adapters/openai.d.ts +42 -0
  15. package/dist/adapters/openai.js +97 -0
  16. package/dist/analytics.d.ts +24 -0
  17. package/dist/analytics.js +116 -0
  18. package/dist/cluster.d.ts +10 -0
  19. package/dist/cluster.js +43 -0
  20. package/dist/defaultCostTable.d.ts +11 -0
  21. package/dist/defaultCostTable.js +1976 -0
  22. package/dist/discovery.d.ts +67 -0
  23. package/dist/discovery.js +140 -0
  24. package/dist/embed/bedrock.d.ts +32 -0
  25. package/dist/embed/bedrock.js +109 -0
  26. package/dist/embed/cohere.d.ts +34 -0
  27. package/dist/embed/cohere.js +37 -0
  28. package/dist/embed/ollama.d.ts +30 -0
  29. package/dist/embed/ollama.js +24 -0
  30. package/dist/embed/openai.d.ts +31 -0
  31. package/dist/embed/openai.js +66 -0
  32. package/dist/embed/voyage.d.ts +31 -0
  33. package/dist/embed/voyage.js +32 -0
  34. package/dist/index.d.ts +8 -1
  35. package/dist/index.js +13 -1
  36. package/dist/normalizer.d.ts +68 -0
  37. package/dist/normalizer.js +102 -0
  38. package/dist/telemetry.d.ts +5 -0
  39. package/dist/telemetry.js +30 -0
  40. package/dist/types.d.ts +128 -7
  41. package/dist/utils.d.ts +58 -0
  42. package/dist/utils.js +30 -0
  43. package/package.json +81 -6
@@ -2,11 +2,17 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.SemanticCache = void 0;
4
4
  const node_crypto_1 = require("node:crypto");
5
+ const node_crypto_2 = require("node:crypto");
5
6
  const api_1 = require("@opentelemetry/api");
6
7
  const errors_1 = require("./errors");
7
8
  const telemetry_1 = require("./telemetry");
8
9
  const utils_1 = require("./utils");
10
+ const defaultCostTable_1 = require("./defaultCostTable");
11
+ const cluster_1 = require("./cluster");
12
+ const analytics_1 = require("./analytics");
13
+ const discovery_1 = require("./discovery");
9
14
  const INVALIDATE_BATCH_SIZE = 1000;
15
+ const PACKAGE_VERSION = require('../package.json').version;
10
16
  function errMsg(err) {
11
17
  return err instanceof Error ? err.message : String(err);
12
18
  }
@@ -17,15 +23,34 @@ class SemanticCache {
17
23
  indexName;
18
24
  entryPrefix;
19
25
  statsKey;
26
+ similarityWindowKey;
27
+ configKey;
20
28
  defaultThreshold;
21
29
  defaultTtl;
22
30
  categoryThresholds;
23
31
  uncertaintyBand;
24
32
  telemetry;
33
+ costTable;
34
+ embeddingCacheEnabled;
35
+ embeddingCacheTtl;
36
+ embedKeyPrefix;
37
+ discoveryOptions;
38
+ _initialDefaultThreshold;
39
+ _initialCategoryThresholds;
40
+ configRefreshOptions;
41
+ configRefreshTimer;
42
+ discovery = null;
25
43
  _initialized = false;
26
44
  _dimension = 0;
45
+ _hasBinaryRefs = false;
27
46
  _initPromise = null;
28
47
  _initGeneration = 0;
48
+ analyticsOpts;
49
+ usesDefaultCostTable;
50
+ analytics = analytics_1.NOOP_ANALYTICS;
51
+ statsTimer;
52
+ shutdownCalled = false;
53
+ analyticsInitiated = false;
29
54
  /**
30
55
  * Creates a new SemanticCache instance.
31
56
  *
@@ -42,17 +67,46 @@ class SemanticCache {
42
67
  this.indexName = `${this.name}:idx`;
43
68
  this.entryPrefix = `${this.name}:entry:`;
44
69
  this.statsKey = `${this.name}:__stats`;
70
+ this.similarityWindowKey = `${this.name}:__similarity_window`;
71
+ this.configKey = `${this.name}:__config`;
72
+ this.embedKeyPrefix = `${this.name}:embed:`;
45
73
  this.defaultThreshold = options.defaultThreshold ?? 0.1;
46
74
  this.defaultTtl = options.defaultTtl;
47
75
  this.categoryThresholds = options.categoryThresholds ?? {};
48
76
  this.uncertaintyBand = options.uncertaintyBand ?? 0.05;
77
+ // Build effective cost table
78
+ const useDefault = options.useDefaultCostTable ?? true;
79
+ if (!useDefault && !options.costTable) {
80
+ this.costTable = undefined;
81
+ }
82
+ else if (!useDefault) {
83
+ this.costTable = options.costTable;
84
+ }
85
+ else {
86
+ this.costTable = { ...defaultCostTable_1.DEFAULT_COST_TABLE, ...(options.costTable ?? {}) };
87
+ }
88
+ // Embedding cache config
89
+ this.embeddingCacheEnabled = options.embeddingCache?.enabled ?? true;
90
+ this.embeddingCacheTtl = options.embeddingCache?.ttl ?? 86400;
49
91
  this.telemetry = (0, telemetry_1.createTelemetry)({
50
92
  prefix: options.telemetry?.metricsPrefix ?? 'semantic_cache',
51
93
  tracerName: options.telemetry?.tracerName ?? '@betterdb/semantic-cache',
52
94
  registry: options.telemetry?.registry,
53
95
  });
96
+ this.analyticsOpts = options.analytics;
97
+ this.usesDefaultCostTable = useDefault;
98
+ this.discoveryOptions = options.discovery ?? {};
99
+ // Capture constructor values as fallback when __config fields are absent
100
+ this._initialDefaultThreshold = this.defaultThreshold;
101
+ this._initialCategoryThresholds = { ...this.categoryThresholds };
102
+ // Refresh options
103
+ const refresh = options.configRefresh ?? {};
104
+ this.configRefreshOptions = {
105
+ enabled: refresh.enabled ?? true,
106
+ intervalMs: Math.max(1000, refresh.intervalMs ?? 30_000),
107
+ };
54
108
  }
55
- // ── Lifecycle ──────────────────────────────────────────────
109
+ // -- Lifecycle --
56
110
  async initialize() {
57
111
  if (!this._initPromise) {
58
112
  this._initPromise = this._doInitialize().catch((err) => {
@@ -65,10 +119,17 @@ class SemanticCache {
65
119
  async flush() {
66
120
  // Mark uninitialized immediately so concurrent check()/store() calls get
67
121
  // a clear SemanticCacheUsageError instead of cryptic Valkey errors.
68
- // Bump generation so any in-flight _doInitialize() won't overwrite this state.
69
122
  this._initialized = false;
70
123
  this._initPromise = null;
71
124
  this._initGeneration++;
125
+ // Capture and null the discovery ref synchronously, before any await,
126
+ // so a concurrent _doInitialize() (started after _initGeneration++) can't
127
+ // race in and have its new manager overwritten by this flush.
128
+ const discoveryToStop = this.discovery;
129
+ this.discovery = null;
130
+ if (discoveryToStop) {
131
+ await discoveryToStop.stop({ deleteHeartbeat: true });
132
+ }
72
133
  // Valkey Search 1.2 does not support the DD (Delete Documents) flag on
73
134
  // FT.DROPINDEX. Drop the index first, then clean up keys separately.
74
135
  try {
@@ -79,33 +140,86 @@ class SemanticCache {
79
140
  throw new errors_1.ValkeyCommandError('FT.DROPINDEX', err);
80
141
  }
81
142
  }
82
- const entryPattern = `${this.name}:entry:*`;
83
- let cursor = '0';
84
- do {
85
- const [nextCursor, keys] = await this.client.scan(cursor, 'MATCH', entryPattern, 'COUNT', '100');
86
- cursor = nextCursor;
87
- if (keys.length > 0)
88
- await this.client.del(keys);
89
- } while (cursor !== '0');
143
+ // Cluster-aware SCAN for entry keys and embed cache keys
144
+ const patterns = [
145
+ `${this.name}:entry:*`,
146
+ `${this.name}:embed:*`,
147
+ ];
148
+ for (const pattern of patterns) {
149
+ await (0, cluster_1.clusterScan)(this.client, pattern, async (keys, nodeClient) => {
150
+ await nodeClient.del(keys);
151
+ });
152
+ }
90
153
  await this.client.del(this.statsKey);
154
+ await this.client.del(this.similarityWindowKey);
155
+ this.analytics.capture('cache_flush');
156
+ }
157
+ /**
158
+ * Shut down the analytics client, cancel the stats timer, and stop the
159
+ * discovery heartbeat. Safe to call multiple times.
160
+ */
161
+ async shutdown() {
162
+ this.shutdownCalled = true;
163
+ if (this.configRefreshTimer) {
164
+ clearInterval(this.configRefreshTimer);
165
+ this.configRefreshTimer = undefined;
166
+ }
167
+ if (this.statsTimer) {
168
+ clearInterval(this.statsTimer);
169
+ this.statsTimer = undefined;
170
+ }
171
+ await this.analytics.shutdown();
172
+ await this.dispose();
91
173
  }
92
- // ── Public operations ──────────────────────────────────────
174
+ /**
175
+ * Graceful shutdown of the discovery layer — stops the heartbeat and
176
+ * deletes this instance's heartbeat key so Monitor marks the cache offline
177
+ * immediately. Does NOT touch the registry hash, the FT index, or any
178
+ * entries. Safe to call multiple times.
179
+ */
180
+ async dispose() {
181
+ if (this.configRefreshTimer) {
182
+ clearInterval(this.configRefreshTimer);
183
+ this.configRefreshTimer = undefined;
184
+ }
185
+ if (this._initPromise) {
186
+ await this._initPromise.catch(() => { });
187
+ }
188
+ if (this.discovery) {
189
+ await this.discovery.stop({ deleteHeartbeat: true });
190
+ this.discovery = null;
191
+ }
192
+ }
193
+ // -- Public operations --
93
194
  async check(prompt, options) {
94
195
  this.assertInitialized('check');
95
196
  return this.traced('check', async (span) => {
96
197
  const category = options?.category ?? '';
97
- const k = options?.k ?? 1;
98
198
  const threshold = options?.threshold ??
99
199
  (category && this.categoryThresholds[category] !== undefined
100
200
  ? this.categoryThresholds[category]
101
201
  : this.defaultThreshold);
102
- const { vector: embedding, durationSec: embedSec } = await this.embed(prompt);
202
+ // Resolve text and binary refs from prompt
203
+ const { text: promptText, binaryRefs } = await this.resolvePrompt(prompt);
204
+ // Stale model detection
205
+ const checkStale = (options?.staleAfterModelChange ?? false) && !!options?.currentModel;
206
+ // Rerank option
207
+ const rerankOpts = options?.rerank;
208
+ const k = rerankOpts ? rerankOpts.k : (options?.k ?? 1);
209
+ const { vector: embedding, durationSec: embedSec } = await this.embed(promptText);
103
210
  this.assertDimension(embedding);
104
- // FT.SEARCH — Valkey Search 1.2 rejects KNN aliases in RETURN/SORTBY,
105
- // so we omit both. Results include all fields and are pre-sorted by distance.
211
+ // Build filter
212
+ const userFilter = options?.filter;
213
+ // AND semantics: each ref must be present — chain separate TAG clauses.
214
+ const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
215
+ ? (binaryRefs.length === 1
216
+ ? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
217
+ : binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' '))
218
+ : null;
219
+ const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
220
+ const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
221
+ const query = `${filterExpr}=>[KNN ${k} @embedding $vec AS __score]`;
106
222
  const searchStart = performance.now();
107
- const filter = options?.filter;
108
- const query = `${filter ? `(${filter})` : '*'}=>[KNN ${k} @embedding $vec AS __score]`;
109
223
  let rawResult;
110
224
  try {
111
225
  rawResult = await this.client.call('FT.SEARCH', this.indexName, query, 'PARAMS', '2', 'vec', (0, utils_1.encodeFloat32)(embedding), 'LIMIT', '0', String(k), 'DIALECT', '2');
@@ -136,6 +250,9 @@ class SemanticCache {
136
250
  }
137
251
  // Miss (no usable score, or score exceeds threshold)
138
252
  if (isNaN(score) || score > threshold) {
253
+ if (!isNaN(score)) {
254
+ await this.recordSimilarityWindow(score, 'miss', category);
255
+ }
139
256
  await this.recordStat('misses');
140
257
  this.telemetry.metrics.requestsTotal
141
258
  .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
@@ -151,42 +268,148 @@ class SemanticCache {
151
268
  }
152
269
  return result;
153
270
  }
154
- // Hit
155
- const confidence = score >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
271
+ // Rerank: apply rerankFn to all candidates above threshold
272
+ let winnerParsedIndex = 0;
273
+ if (rerankOpts && parsed.length > 0) {
274
+ // Preserve the original parsed[] index alongside each candidate so we
275
+ // can map back even when NaN-scored entries are filtered out.
276
+ const indexedCandidates = parsed
277
+ .map((r, i) => ({ i, s: parseFloat(r.fields['__score'] ?? 'NaN') }))
278
+ .filter(({ s }) => !isNaN(s))
279
+ .map(({ i, s }) => ({
280
+ origIdx: i,
281
+ candidate: { response: parsed[i].fields['response'] ?? '', similarity: s },
282
+ }));
283
+ const picked = await rerankOpts.rerankFn(promptText, indexedCandidates.map((x) => x.candidate));
284
+ // Explicit bounds check: -1 means "reject all"; out-of-range is a caller bug
285
+ // treated as a miss rather than silently falling back to the top candidate.
286
+ if (picked === -1 || picked < 0 || picked >= indexedCandidates.length) {
287
+ await this.recordSimilarityWindow(score, 'miss', category);
288
+ await this.recordStat('misses');
289
+ this.telemetry.metrics.requestsTotal
290
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
291
+ span.setAttributes({ 'cache.hit': false, 'cache.name': this.name, 'cache.reranked': true });
292
+ return { hit: false, confidence: 'miss' };
293
+ }
294
+ // Map back to the original parsed[] index (not the candidates[] index)
295
+ winnerParsedIndex = indexedCandidates[picked].origIdx;
296
+ }
297
+ const winner = parsed[winnerParsedIndex] ?? parsed[0];
298
+ const winnerScore = parseFloat(winner.fields['__score'] ?? String(score));
299
+ // Stale model check: if winner's model differs from currentModel, evict and treat as miss
300
+ if (checkStale) {
301
+ const storedModel = winner.fields['model'] ?? '';
302
+ if (storedModel && storedModel !== options.currentModel) {
303
+ // Evict stale entry
304
+ try {
305
+ await this.client.del(winner.key);
306
+ }
307
+ catch { /* best effort */ }
308
+ await this.recordSimilarityWindow(winnerScore, 'miss', category);
309
+ this.telemetry.metrics.staleModelEvictions.labels({ cache_name: this.name }).inc();
310
+ await this.recordStat('misses');
311
+ this.telemetry.metrics.requestsTotal
312
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
313
+ span.setAttributes({ 'cache.hit': false, 'cache.stale_evicted': true });
314
+ return { hit: false, confidence: 'miss' };
315
+ }
316
+ }
317
+ // All checks passed — record as a genuine hit
318
+ await this.recordSimilarityWindow(winnerScore, 'hit', category);
319
+ const confidence = winnerScore >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
156
320
  await this.recordStat('hits');
157
321
  const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
158
322
  this.telemetry.metrics.requestsTotal
159
323
  .labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
160
- const matchedKey = parsed[0].key;
324
+ const matchedKey = winner.key;
161
325
  if (this.defaultTtl !== undefined && matchedKey) {
162
326
  await this.client.expire(matchedKey, this.defaultTtl);
163
327
  }
328
+ // Cost saved
329
+ let costSaved;
330
+ const costMicrosStr = winner.fields['cost_micros'];
331
+ if (costMicrosStr) {
332
+ const costMicros = parseInt(costMicrosStr, 10);
333
+ if (!isNaN(costMicros) && costMicros > 0) {
334
+ costSaved = costMicros / 1_000_000;
335
+ // Atomically increment cost_saved_micros in stats
336
+ await this.client.hincrby(this.statsKey, 'cost_saved_micros', costMicros);
337
+ this.telemetry.metrics.costSavedTotal
338
+ .labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
339
+ }
340
+ }
341
+ // Content blocks
342
+ let contentBlocks;
343
+ const contentBlocksStr = winner.fields['content_blocks'];
344
+ if (contentBlocksStr) {
345
+ try {
346
+ contentBlocks = JSON.parse(contentBlocksStr);
347
+ }
348
+ catch { /* ignore parse errors */ }
349
+ }
164
350
  span.setAttributes({
165
- 'cache.hit': true, 'cache.similarity': score, 'cache.threshold': threshold,
351
+ 'cache.hit': true, 'cache.similarity': winnerScore, 'cache.threshold': threshold,
166
352
  'cache.confidence': confidence, 'cache.matched_key': matchedKey,
167
353
  'cache.category': categoryLabel, ...timingAttrs,
168
354
  });
169
- return {
170
- hit: true, response: parsed[0].fields['response'],
171
- similarity: score, confidence, matchedKey,
355
+ const result = {
356
+ hit: true, response: winner.fields['response'],
357
+ similarity: winnerScore, confidence, matchedKey,
172
358
  };
359
+ if (costSaved !== undefined)
360
+ result.costSaved = costSaved;
361
+ if (contentBlocks)
362
+ result.contentBlocks = contentBlocks;
363
+ return result;
173
364
  });
174
365
  }
175
366
  async store(prompt, response, options) {
176
367
  this.assertInitialized('store');
177
368
  return this.traced('store', async (span) => {
178
- const { vector: embedding, durationSec: embedSec } = await this.embed(prompt);
369
+ const { text: promptText, binaryRefs } = await this.resolvePrompt(prompt);
370
+ const { vector: embedding, durationSec: embedSec } = await this.embed(promptText);
179
371
  this.assertDimension(embedding);
180
372
  const entryKey = `${this.entryPrefix}${(0, node_crypto_1.randomUUID)()}`;
181
373
  const category = options?.category ?? '';
182
374
  const model = options?.model ?? '';
375
+ // Compute cost if tokens and model provided
376
+ let costMicros;
377
+ if (options?.model &&
378
+ options?.inputTokens !== undefined &&
379
+ options?.outputTokens !== undefined &&
380
+ this.costTable) {
381
+ const pricing = this.costTable[options.model];
382
+ if (pricing) {
383
+ costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
384
+ options.outputTokens * pricing.outputPer1k / 1000) * 1_000_000);
385
+ }
386
+ }
387
+ const hashFields = {
388
+ prompt: promptText,
389
+ response,
390
+ model,
391
+ category,
392
+ inserted_at: Date.now().toString(),
393
+ metadata: JSON.stringify(options?.metadata ?? {}),
394
+ embedding: (0, utils_1.encodeFloat32)(embedding),
395
+ };
396
+ if (binaryRefs.length > 0) {
397
+ hashFields['binary_refs'] = binaryRefs.join(',');
398
+ }
399
+ if (costMicros !== undefined && costMicros > 0) {
400
+ hashFields['cost_micros'] = String(costMicros);
401
+ }
402
+ if (options?.temperature !== undefined) {
403
+ hashFields['temperature'] = String(options.temperature);
404
+ }
405
+ if (options?.topP !== undefined) {
406
+ hashFields['top_p'] = String(options.topP);
407
+ }
408
+ if (options?.seed !== undefined) {
409
+ hashFields['seed'] = String(options.seed);
410
+ }
183
411
  try {
184
- await this.client.hset(entryKey, {
185
- prompt, response, model, category,
186
- inserted_at: Date.now().toString(),
187
- metadata: JSON.stringify(options?.metadata ?? {}),
188
- embedding: (0, utils_1.encodeFloat32)(embedding),
189
- });
412
+ await this.client.hset(entryKey, hashFields);
190
413
  }
191
414
  catch (err) {
192
415
  throw new errors_1.ValkeyCommandError('HSET', err);
@@ -202,11 +425,195 @@ class SemanticCache {
202
425
  return entryKey;
203
426
  });
204
427
  }
428
+ /**
429
+ * Store structured content blocks as the cached response.
430
+ * Populates both the response field (from TextBlock text) and content_blocks (full JSON).
431
+ */
432
+ async storeMultipart(prompt, blocks, options) {
433
+ this.assertInitialized('storeMultipart');
434
+ return this.traced('storeMultipart', async (span) => {
435
+ const { text: promptText, binaryRefs } = await this.resolvePrompt(prompt);
436
+ const { vector: embedding, durationSec: embedSec } = await this.embed(promptText);
437
+ this.assertDimension(embedding);
438
+ // Derive text response from blocks for backward compat
439
+ const textResponse = (0, utils_1.extractText)(blocks);
440
+ const entryKey = `${this.entryPrefix}${(0, node_crypto_1.randomUUID)()}`;
441
+ const category = options?.category ?? '';
442
+ const model = options?.model ?? '';
443
+ let costMicros;
444
+ if (options?.model && options?.inputTokens !== undefined && options?.outputTokens !== undefined && this.costTable) {
445
+ const pricing = this.costTable[options.model];
446
+ if (pricing) {
447
+ costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
448
+ options.outputTokens * pricing.outputPer1k / 1000) * 1_000_000);
449
+ }
450
+ }
451
+ const hashFields = {
452
+ prompt: promptText,
453
+ response: textResponse,
454
+ model,
455
+ category,
456
+ inserted_at: Date.now().toString(),
457
+ metadata: JSON.stringify(options?.metadata ?? {}),
458
+ embedding: (0, utils_1.encodeFloat32)(embedding),
459
+ content_blocks: JSON.stringify(blocks),
460
+ };
461
+ if (binaryRefs.length > 0) {
462
+ hashFields['binary_refs'] = binaryRefs.join(',');
463
+ }
464
+ if (costMicros !== undefined && costMicros > 0) {
465
+ hashFields['cost_micros'] = String(costMicros);
466
+ }
467
+ if (options?.temperature !== undefined)
468
+ hashFields['temperature'] = String(options.temperature);
469
+ if (options?.topP !== undefined)
470
+ hashFields['top_p'] = String(options.topP);
471
+ if (options?.seed !== undefined)
472
+ hashFields['seed'] = String(options.seed);
473
+ try {
474
+ await this.client.hset(entryKey, hashFields);
475
+ }
476
+ catch (err) {
477
+ throw new errors_1.ValkeyCommandError('HSET', err);
478
+ }
479
+ const ttl = options?.ttl ?? this.defaultTtl;
480
+ if (ttl !== undefined)
481
+ await this.client.expire(entryKey, ttl);
482
+ span.setAttributes({
483
+ 'cache.name': this.name, 'cache.key': entryKey, 'cache.ttl': ttl ?? -1,
484
+ 'cache.category': category || 'none', 'cache.model': model || 'none',
485
+ 'embedding_latency_ms': embedSec * 1000,
486
+ });
487
+ return entryKey;
488
+ });
489
+ }
490
+ /**
491
+ * Check multiple prompts in parallel, using pipelined FT.SEARCH calls.
492
+ * Returns results in input order.
493
+ */
494
+ async checkBatch(prompts, options) {
495
+ this.assertInitialized('checkBatch');
496
+ if (prompts.length === 0)
497
+ return [];
498
+ if (options?.rerank) {
499
+ throw new errors_1.SemanticCacheUsageError("checkBatch() does not support the 'rerank' option. Use check() for reranking individual prompts.");
500
+ }
501
+ if (options?.staleAfterModelChange) {
502
+ throw new errors_1.SemanticCacheUsageError("checkBatch() does not support 'staleAfterModelChange'. Use check() for stale-model eviction.");
503
+ }
504
+ return this.traced('checkBatch', async (span) => {
505
+ // Resolve all prompts and embed in parallel
506
+ const resolved = await Promise.all(prompts.map((p) => this.resolvePrompt(p)));
507
+ const embeddings = await Promise.all(resolved.map(({ text }) => this.embed(text)));
508
+ const category = options?.category ?? '';
509
+ const threshold = options?.threshold ??
510
+ (category && this.categoryThresholds[category] !== undefined
511
+ ? this.categoryThresholds[category]
512
+ : this.defaultThreshold);
513
+ const k = options?.k ?? 1;
514
+ const userFilter = options?.filter;
515
+ // Pipeline all FT.SEARCH calls
516
+ const pipeline = this.client.pipeline();
517
+ for (let i = 0; i < prompts.length; i++) {
518
+ const { binaryRefs } = resolved[i];
519
+ const { vector: embedding } = embeddings[i];
520
+ const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
521
+ ? (binaryRefs.length === 1
522
+ ? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
523
+ : binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' '))
524
+ : null;
525
+ const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
526
+ const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
527
+ const query = `${filterExpr}=>[KNN ${k} @embedding $vec AS __score]`;
528
+ pipeline.call('FT.SEARCH', this.indexName, query, 'PARAMS', '2', 'vec', (0, utils_1.encodeFloat32)(embedding), 'LIMIT', '0', String(k), 'DIALECT', '2');
529
+ }
530
+ const pipelineResults = await pipeline.exec();
531
+ span.setAttributes({ 'cache.batch_size': prompts.length, 'cache.name': this.name });
532
+ const results = [];
533
+ const categoryLabel = category || 'none';
534
+ for (let i = 0; i < prompts.length; i++) {
535
+ const pipelineEntry = pipelineResults?.[i];
536
+ const err = pipelineEntry?.[0];
537
+ const rawResult = pipelineEntry?.[1];
538
+ if (err) {
539
+ await this.recordStat('misses');
540
+ this.telemetry.metrics.requestsTotal
541
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
542
+ results.push({ hit: false, confidence: 'miss' });
543
+ continue;
544
+ }
545
+ const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
546
+ if (parsed.length === 0) {
547
+ await this.recordStat('misses');
548
+ this.telemetry.metrics.requestsTotal
549
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
550
+ results.push({ hit: false, confidence: 'miss' });
551
+ continue;
552
+ }
553
+ const scoreStr = parsed[0].fields['__score'];
554
+ const score = scoreStr !== undefined ? parseFloat(scoreStr) : NaN;
555
+ if (isNaN(score) || score > threshold) {
556
+ if (!isNaN(score)) {
557
+ await this.recordSimilarityWindow(score, 'miss', category);
558
+ }
559
+ await this.recordStat('misses');
560
+ this.telemetry.metrics.requestsTotal
561
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
562
+ const result = { hit: false, confidence: 'miss' };
563
+ if (!isNaN(score)) {
564
+ result.similarity = score;
565
+ result.nearestMiss = { similarity: score, deltaToThreshold: score - threshold };
566
+ }
567
+ results.push(result);
568
+ continue;
569
+ }
570
+ await this.recordSimilarityWindow(score, 'hit', category);
571
+ const confidence = score >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
572
+ await this.recordStat('hits');
573
+ const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
574
+ this.telemetry.metrics.requestsTotal
575
+ .labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
576
+ const matchedKey = parsed[0].key;
577
+ if (this.defaultTtl !== undefined && matchedKey) {
578
+ await this.client.expire(matchedKey, this.defaultTtl);
579
+ }
580
+ let costSaved;
581
+ const costMicrosStr = parsed[0].fields['cost_micros'];
582
+ if (costMicrosStr) {
583
+ const costMicros = parseInt(costMicrosStr, 10);
584
+ if (!isNaN(costMicros) && costMicros > 0) {
585
+ costSaved = costMicros / 1_000_000;
586
+ await this.client.hincrby(this.statsKey, 'cost_saved_micros', costMicros);
587
+ this.telemetry.metrics.costSavedTotal
588
+ .labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
589
+ }
590
+ }
591
+ let contentBlocks;
592
+ const contentBlocksStr = parsed[0].fields['content_blocks'];
593
+ if (contentBlocksStr) {
594
+ try {
595
+ contentBlocks = JSON.parse(contentBlocksStr);
596
+ }
597
+ catch { /* ignore */ }
598
+ }
599
+ const result = {
600
+ hit: true, response: parsed[0].fields['response'],
601
+ similarity: score, confidence, matchedKey,
602
+ };
603
+ if (costSaved !== undefined)
604
+ result.costSaved = costSaved;
605
+ if (contentBlocks)
606
+ result.contentBlocks = contentBlocks;
607
+ results.push(result);
608
+ }
609
+ return results;
610
+ });
611
+ }
205
612
  /**
206
613
  * Deletes all entries matching a valkey-search filter expression.
207
614
  *
208
615
  * **Security note:** `filter` is passed directly to FT.SEARCH. Only pass
209
- * trusted, programmatically-constructed expressions never unsanitised
616
+ * trusted, programmatically-constructed expressions - never unsanitised
210
617
  * user input.
211
618
  */
212
619
  async invalidate(filter) {
@@ -242,13 +649,34 @@ class SemanticCache {
242
649
  return { deleted: keys.length, truncated };
243
650
  });
244
651
  }
652
+ /** Delete all entries tagged with the given model name. */
653
+ async invalidateByModel(model) {
654
+ let total = 0;
655
+ let result;
656
+ do {
657
+ result = await this.invalidate(`@model:{${(0, utils_1.escapeTag)(model)}}`);
658
+ total += result.deleted;
659
+ } while (result.truncated);
660
+ return total;
661
+ }
662
+ /** Delete all entries tagged with the given category. */
663
+ async invalidateByCategory(category) {
664
+ let total = 0;
665
+ let result;
666
+ do {
667
+ result = await this.invalidate(`@category:{${(0, utils_1.escapeTag)(category)}}`);
668
+ total += result.deleted;
669
+ } while (result.truncated);
670
+ return total;
671
+ }
245
672
  async stats() {
246
673
  this.assertInitialized('stats');
247
674
  const raw = await this.client.hgetall(this.statsKey);
248
- const hits = parseInt(raw.hits ?? '0', 10);
249
- const misses = parseInt(raw.misses ?? '0', 10);
250
- const total = parseInt(raw.total ?? '0', 10);
251
- return { hits, misses, total, hitRate: total === 0 ? 0 : hits / total };
675
+ const hits = parseInt(raw?.hits ?? '0', 10);
676
+ const misses = parseInt(raw?.misses ?? '0', 10);
677
+ const total = parseInt(raw?.total ?? '0', 10);
678
+ const costSavedMicros = parseInt(raw?.cost_saved_micros ?? '0', 10);
679
+ return { hits, misses, total, hitRate: total === 0 ? 0 : hits / total, costSavedMicros };
252
680
  }
253
681
  async indexInfo() {
254
682
  this.assertInitialized('indexInfo');
@@ -271,27 +699,339 @@ class SemanticCache {
271
699
  }
272
700
  return { name: this.indexName, numDocs, dimension: this._dimension, indexingState };
273
701
  }
274
- // ── Private helpers ────────────────────────────────────────
702
+ /**
703
+ * Analyze the rolling similarity score window and recommend threshold adjustments.
704
+ */
705
+ async thresholdEffectiveness(options) {
706
+ this.assertInitialized('thresholdEffectiveness');
707
+ const minSamples = options?.minSamples ?? 100;
708
+ const category = options?.category;
709
+ const threshold = category && this.categoryThresholds[category] !== undefined
710
+ ? this.categoryThresholds[category]
711
+ : this.defaultThreshold;
712
+ // Read all window entries
713
+ let rawEntries;
714
+ try {
715
+ rawEntries = (await this.client.zrange(this.similarityWindowKey, '0', '-1'));
716
+ }
717
+ catch {
718
+ rawEntries = [];
719
+ }
720
+ // Parse and optionally filter by category
721
+ const entries = [];
722
+ for (const raw of rawEntries) {
723
+ try {
724
+ const entry = JSON.parse(String(raw));
725
+ if (typeof entry.score === 'number' &&
726
+ (entry.result === 'hit' || entry.result === 'miss')) {
727
+ if (!category || entry.category === category) {
728
+ entries.push(entry);
729
+ }
730
+ }
731
+ }
732
+ catch { /* skip corrupt entries */ }
733
+ }
734
+ const sampleCount = entries.length;
735
+ const categoryLabel = category ?? 'all';
736
+ if (sampleCount < minSamples) {
737
+ return {
738
+ category: categoryLabel,
739
+ sampleCount,
740
+ currentThreshold: threshold,
741
+ hitRate: 0,
742
+ uncertainHitRate: 0,
743
+ nearMissRate: 0,
744
+ avgHitSimilarity: 0,
745
+ avgMissSimilarity: 0,
746
+ recommendation: 'insufficient_data',
747
+ reasoning: `Only ${sampleCount} samples collected; ${minSamples} required for a reliable recommendation.`,
748
+ };
749
+ }
750
+ const hits = entries.filter((e) => e.result === 'hit');
751
+ const misses = entries.filter((e) => e.result === 'miss');
752
+ const hitRate = hits.length / sampleCount;
753
+ const uncertainHits = hits.filter((e) => e.score >= threshold - this.uncertaintyBand);
754
+ const uncertainHitRate = hits.length > 0 ? uncertainHits.length / hits.length : 0;
755
+ // Near-misses are scores just ABOVE the threshold (genuine close misses).
756
+ // Scores below the threshold recorded as misses (rerank rejection, stale eviction)
757
+ // must be excluded — they produce negative avgNearMissDelta, causing
758
+ // recommendedThreshold = threshold + negative < threshold, contradicting "loosen".
759
+ const nearMisses = misses.filter((e) => e.score > threshold && e.score <= threshold + 0.03);
760
+ const nearMissRate = misses.length > 0 ? nearMisses.length / misses.length : 0;
761
+ const avgHitSimilarity = hits.length > 0 ? hits.reduce((s, e) => s + e.score, 0) / hits.length : 0;
762
+ const avgMissSimilarity = misses.length > 0 ? misses.reduce((s, e) => s + e.score, 0) / misses.length : 0;
763
+ // avgNearMissDelta: how far above the threshold near-misses are on average
764
+ const avgNearMissDelta = nearMisses.length > 0
765
+ ? nearMisses.reduce((s, e) => s + (e.score - threshold), 0) / nearMisses.length
766
+ : 0;
767
+ let recommendation;
768
+ let recommendedThreshold;
769
+ let reasoning;
770
+ if (uncertainHitRate > 0.2) {
771
+ recommendation = 'tighten_threshold';
772
+ recommendedThreshold = Math.max(0, threshold - this.uncertaintyBand * 1.5);
773
+ reasoning = `${(uncertainHitRate * 100).toFixed(1)}% of hits are in the uncertainty band - tighten the threshold to reduce false positives.`;
774
+ }
775
+ else if (nearMissRate > 0.3 && avgNearMissDelta < 0.03) {
776
+ recommendation = 'loosen_threshold';
777
+ recommendedThreshold = threshold + avgNearMissDelta;
778
+ reasoning = `${(nearMissRate * 100).toFixed(1)}% of misses are very close to the threshold - consider loosening to capture more hits.`;
779
+ }
780
+ else {
781
+ recommendation = 'optimal';
782
+ reasoning = `Hit rate is ${(hitRate * 100).toFixed(1)}% with ${(uncertainHitRate * 100).toFixed(1)}% uncertain hits - threshold appears well-calibrated.`;
783
+ }
784
+ return {
785
+ category: categoryLabel,
786
+ sampleCount,
787
+ currentThreshold: threshold,
788
+ hitRate,
789
+ uncertainHitRate,
790
+ nearMissRate,
791
+ avgHitSimilarity,
792
+ avgMissSimilarity,
793
+ recommendation,
794
+ recommendedThreshold,
795
+ reasoning,
796
+ };
797
+ }
798
+ /**
799
+ * Returns threshold effectiveness results for every category seen in the
800
+ * rolling window, plus one aggregate result for all categories combined.
801
+ */
802
+ async thresholdEffectivenessAll(options) {
803
+ this.assertInitialized('thresholdEffectivenessAll');
804
+ let rawEntries;
805
+ try {
806
+ rawEntries = (await this.client.zrange(this.similarityWindowKey, '0', '-1'));
807
+ }
808
+ catch {
809
+ rawEntries = [];
810
+ }
811
+ // Collect unique categories
812
+ const categories = new Set();
813
+ for (const raw of rawEntries) {
814
+ try {
815
+ const entry = JSON.parse(raw);
816
+ if (entry.category)
817
+ categories.add(entry.category);
818
+ }
819
+ catch { /* skip */ }
820
+ }
821
+ const results = await Promise.all([
822
+ this.thresholdEffectiveness({ minSamples: options?.minSamples }),
823
+ ...[...categories].filter(Boolean).map((cat) => this.thresholdEffectiveness({ category: cat, minSamples: options?.minSamples })),
824
+ ]);
825
+ return results;
826
+ }
827
+ /**
828
+ * Refresh threshold config from Valkey. Returns true on a successful HGETALL,
829
+ * false if the call threw.
830
+ *
831
+ * Field semantics:
832
+ * - "threshold" -> updates defaultThreshold
833
+ * - "threshold:{category}" -> updates categoryThresholds[category]
834
+ * - "threshold:" (empty) -> ignored
835
+ * - non-numeric values -> ignored
836
+ * - out-of-range values -> ignored (must be 0 <= x <= 2)
837
+ *
838
+ * Categories present in memory but absent from the hash fall back to their
839
+ * constructor values (or are removed if no constructor override existed).
840
+ * The default threshold likewise falls back to its constructor value if
841
+ * `threshold` is absent from the hash.
842
+ */
843
+ async refreshConfig() {
844
+ let raw = null;
845
+ try {
846
+ raw = await this.client.hgetall(this.configKey);
847
+ }
848
+ catch {
849
+ return false;
850
+ }
851
+ let nextDefault = this._initialDefaultThreshold;
852
+ const nextCategory = { ...this._initialCategoryThresholds };
853
+ if (raw) {
854
+ for (const [field, value] of Object.entries(raw)) {
855
+ const parsed = Number(value);
856
+ if (!Number.isFinite(parsed) || parsed < 0 || parsed > 2) {
857
+ continue;
858
+ }
859
+ if (field === 'threshold') {
860
+ nextDefault = parsed;
861
+ }
862
+ else if (field.startsWith('threshold:')) {
863
+ const category = field.slice('threshold:'.length);
864
+ if (category.length > 0) {
865
+ nextCategory[category] = parsed;
866
+ }
867
+ }
868
+ }
869
+ }
870
+ this.defaultThreshold = nextDefault;
871
+ this.categoryThresholds = nextCategory;
872
+ return true;
873
+ }
874
+ // -- Internal helpers exposed to package adapters --
875
+ /** @internal Default similarity threshold. */
876
+ get _defaultThreshold() { return this.defaultThreshold; }
877
+ /** @internal Test-only getter. */
878
+ get _categoryThresholds() {
879
+ return this.categoryThresholds;
880
+ }
881
+ /** @internal Test-only getter. */
882
+ get _configRefreshIntervalMs() {
883
+ return this.configRefreshOptions.intervalMs;
884
+ }
885
+ /**
886
+ * Execute a stable FT.SEARCH for use by adapters (e.g. LangGraph).
887
+ * SORTBY inserted_at ASC gives stable ordering across paginated calls.
888
+ * @internal
889
+ */
890
+ async _searchEntries(filterExpr, limit, offset) {
891
+ return this.client.call('FT.SEARCH', this.indexName, filterExpr, 'SORTBY', 'inserted_at', 'ASC', 'LIMIT', String(offset), String(limit), 'DIALECT', '2');
892
+ }
893
+ /**
894
+ * Embed text for use by adapters (e.g. LangGraph semantic search).
895
+ * @internal
896
+ */
897
+ async _embedText(text) {
898
+ return this.embed(text);
899
+ }
900
+ // -- Private helpers --
901
+ startConfigRefresh() {
902
+ if (!this.configRefreshOptions.enabled) {
903
+ return;
904
+ }
905
+ const tick = () => {
906
+ this.refreshConfig()
907
+ .then((ok) => {
908
+ if (!ok) {
909
+ this.telemetry.metrics.configRefreshFailed
910
+ .labels({ cache_name: this.name })
911
+ .inc();
912
+ }
913
+ })
914
+ .catch(() => {
915
+ this.telemetry.metrics.configRefreshFailed
916
+ .labels({ cache_name: this.name })
917
+ .inc();
918
+ });
919
+ };
920
+ // Synchronous first refresh: process started immediately after a proposal
921
+ // was applied picks up the change without waiting for the first tick.
922
+ tick();
923
+ this.configRefreshTimer = setInterval(tick, this.configRefreshOptions.intervalMs);
924
+ if (typeof this.configRefreshTimer.unref === 'function') {
925
+ this.configRefreshTimer.unref();
926
+ }
927
+ }
275
928
  async _doInitialize() {
276
929
  const gen = this._initGeneration;
277
930
  return this.traced('initialize', async () => {
278
- const dim = await this.ensureIndexAndGetDimension();
279
- // If flush() ran while we were initializing, don't overwrite its state.
280
- if (this._initGeneration !== gen)
931
+ const { dim, hasBinaryRefs } = await this.ensureIndexAndGetDimension();
932
+ if (this._initGeneration !== gen) {
281
933
  return;
934
+ }
282
935
  this._dimension = dim;
936
+ this._hasBinaryRefs = hasBinaryRefs;
937
+ // registerDiscovery() may throw SemanticCacheUsageError on a name
938
+ // collision. Mark the cache initialized only after discovery succeeds
939
+ // so a colliding caller cannot subsequently call check()/store()
940
+ // against another owner's keys.
941
+ const manager = await this.registerDiscovery();
942
+ if (this._initGeneration !== gen) {
943
+ if (manager) {
944
+ await manager.stop({ deleteHeartbeat: true });
945
+ }
946
+ return;
947
+ }
948
+ this.discovery = manager;
283
949
  this._initialized = true;
950
+ this.startConfigRefresh();
951
+ // Fire analytics init once (not on every flush+initialize cycle)
952
+ this.initAnalyticsSafe().catch(() => { });
284
953
  });
285
954
  }
955
+ async registerDiscovery() {
956
+ if (this.discoveryOptions.enabled === false) {
957
+ return null;
958
+ }
959
+ const metadata = (0, discovery_1.buildSemanticMetadata)({
960
+ name: this.name,
961
+ version: PACKAGE_VERSION,
962
+ defaultThreshold: this.defaultThreshold,
963
+ categoryThresholds: this.categoryThresholds,
964
+ uncertaintyBand: this.uncertaintyBand,
965
+ includeCategories: this.discoveryOptions.includeCategories ?? true,
966
+ });
967
+ const manager = new discovery_1.DiscoveryManager({
968
+ client: this.client,
969
+ name: this.name,
970
+ metadata,
971
+ heartbeatIntervalMs: this.discoveryOptions.heartbeatIntervalMs,
972
+ onWriteFailed: () => {
973
+ this.telemetry.metrics.discoveryWriteFailed
974
+ .labels({ cache_name: this.name })
975
+ .inc();
976
+ },
977
+ });
978
+ await manager.register();
979
+ return manager;
980
+ }
981
+ async initAnalyticsSafe() {
982
+ if (this.analyticsInitiated)
983
+ return;
984
+ this.analyticsInitiated = true;
985
+ try {
986
+ const a = await (0, analytics_1.createAnalytics)(this.analyticsOpts);
987
+ if (this.shutdownCalled) {
988
+ await a.shutdown();
989
+ return;
990
+ }
991
+ this.analytics = a;
992
+ await a.init(this.client, this.name, {
993
+ defaultThreshold: this.defaultThreshold,
994
+ uncertaintyBand: this.uncertaintyBand,
995
+ defaultTtl: this.defaultTtl ?? null,
996
+ hasCostTable: !!this.costTable,
997
+ usesDefaultCostTable: this.usesDefaultCostTable,
998
+ embeddingCacheEnabled: this.embeddingCacheEnabled,
999
+ categoryThresholdCount: Object.keys(this.categoryThresholds).length,
1000
+ dimension: this._dimension,
1001
+ });
1002
+ const intervalMs = this.analyticsOpts?.statsIntervalMs ?? 300_000;
1003
+ if (!this.shutdownCalled && intervalMs > 0) {
1004
+ this.statsTimer = setInterval(() => this.captureStatsSnapshot(), intervalMs);
1005
+ this.statsTimer.unref();
1006
+ }
1007
+ }
1008
+ catch {
1009
+ // never throw from analytics
1010
+ }
1011
+ }
1012
+ captureStatsSnapshot() {
1013
+ this.stats()
1014
+ .then((s) => {
1015
+ this.analytics.capture('stats_snapshot', {
1016
+ hits: s.hits,
1017
+ misses: s.misses,
1018
+ hit_rate: s.hitRate,
1019
+ cost_saved_micros: s.costSavedMicros,
1020
+ });
1021
+ })
1022
+ .catch(() => { });
1023
+ }
286
1024
  async ensureIndexAndGetDimension() {
287
1025
  // Try reading an existing index
288
1026
  try {
289
1027
  const info = (await this.client.call('FT.INFO', this.indexName));
290
1028
  const dim = this.parseDimensionFromInfo(info);
1029
+ const hasBinaryRefs = this.parseHasBinaryRefsFromInfo(info);
291
1030
  if (dim > 0)
292
- return dim;
293
- // Couldn't parse dimension from FT.INFO fall back to probe
294
- return (await this.embed('probe')).vector.length;
1031
+ return { dim, hasBinaryRefs };
1032
+ // Couldn't parse dimension from FT.INFO - fall back to probe
1033
+ const probeDim = (await this.embed('probe')).vector.length;
1034
+ return { dim: probeDim, hasBinaryRefs };
295
1035
  }
296
1036
  catch (err) {
297
1037
  if (err instanceof errors_1.EmbeddingError)
@@ -300,18 +1040,69 @@ class SemanticCache {
300
1040
  throw new errors_1.ValkeyCommandError('FT.INFO', err);
301
1041
  }
302
1042
  }
303
- // Index doesn't exist probe dimension and create it
1043
+ // Index doesn't exist - probe dimension and create it
304
1044
  const dim = (await this.embed('probe')).vector.length;
305
1045
  try {
306
- await this.client.call('FT.CREATE', this.indexName, 'ON', 'HASH', 'PREFIX', '1', this.entryPrefix, 'SCHEMA', 'prompt', 'TEXT', 'NOSTEM', 'response', 'TEXT', 'NOSTEM', 'model', 'TAG', 'category', 'TAG', 'inserted_at', 'NUMERIC', 'SORTABLE', 'embedding', 'VECTOR', 'HNSW', '6', 'TYPE', 'FLOAT32', 'DIM', String(dim), 'DISTANCE_METRIC', 'COSINE');
1046
+ await this.client.call('FT.CREATE', this.indexName, 'ON', 'HASH', 'PREFIX', '1', this.entryPrefix, 'SCHEMA', 'prompt', 'TEXT', 'NOSTEM', 'response', 'TEXT', 'NOSTEM', 'model', 'TAG', 'category', 'TAG', 'binary_refs', 'TAG', 'inserted_at', 'NUMERIC', 'SORTABLE', 'temperature', 'NUMERIC', 'top_p', 'NUMERIC', 'seed', 'NUMERIC', 'embedding', 'VECTOR', 'HNSW', '6', 'TYPE', 'FLOAT32', 'DIM', String(dim), 'DISTANCE_METRIC', 'COSINE');
307
1047
  }
308
1048
  catch (err) {
309
1049
  throw new errors_1.ValkeyCommandError('FT.CREATE', err);
310
1050
  }
311
- return dim;
1051
+ return { dim, hasBinaryRefs: true };
1052
+ }
1053
+ /** Check if the index schema has a binary_refs field. */
1054
+ parseHasBinaryRefsFromInfo(info) {
1055
+ for (let i = 0; i < info.length - 1; i += 2) {
1056
+ const key = String(info[i]);
1057
+ if (key !== 'attributes' && key !== 'fields')
1058
+ continue;
1059
+ const attributes = info[i + 1];
1060
+ if (!Array.isArray(attributes))
1061
+ continue;
1062
+ for (const attr of attributes) {
1063
+ if (!Array.isArray(attr))
1064
+ continue;
1065
+ for (let j = 0; j < attr.length - 1; j++) {
1066
+ if (String(attr[j]) === 'identifier' && String(attr[j + 1]) === 'binary_refs') {
1067
+ return true;
1068
+ }
1069
+ }
1070
+ }
1071
+ }
1072
+ return false;
312
1073
  }
313
- /** Wraps embedFn with error handling and duration tracking. */
1074
+ /** Resolve a prompt (string or ContentBlock[]) into text + binary refs. */
1075
+ resolvePrompt(prompt) {
1076
+ if (typeof prompt === 'string') {
1077
+ return { text: prompt, binaryRefs: [] };
1078
+ }
1079
+ const text = (0, utils_1.extractText)(prompt);
1080
+ const binaryRefs = (0, utils_1.extractBinaryRefs)(prompt);
1081
+ return { text, binaryRefs };
1082
+ }
1083
+ /** Wraps embedFn with error handling, duration tracking, and optional embedding cache. */
314
1084
  async embed(text) {
1085
+ // Check embedding cache
1086
+ if (this.embeddingCacheEnabled && text) {
1087
+ const hash = (0, node_crypto_2.createHash)('sha256').update(text).digest('hex');
1088
+ const embedKey = `${this.embedKeyPrefix}${hash}`;
1089
+ try {
1090
+ const cached = await this.client.getBuffer(embedKey);
1091
+ if (cached) {
1092
+ this.telemetry.metrics.embeddingCacheTotal
1093
+ .labels({ cache_name: this.name, result: 'hit' }).inc();
1094
+ // Decode Float32 buffer
1095
+ const vector = [];
1096
+ for (let i = 0; i < cached.length; i += 4) {
1097
+ vector.push(cached.readFloatLE(i));
1098
+ }
1099
+ return { vector, durationSec: 0 };
1100
+ }
1101
+ }
1102
+ catch { /* ignore cache read errors */ }
1103
+ this.telemetry.metrics.embeddingCacheTotal
1104
+ .labels({ cache_name: this.name, result: 'miss' }).inc();
1105
+ }
315
1106
  const start = performance.now();
316
1107
  let vector;
317
1108
  try {
@@ -324,12 +1115,22 @@ class SemanticCache {
324
1115
  this.telemetry.metrics.embeddingDuration
325
1116
  .labels({ cache_name: this.name })
326
1117
  .observe(durationSec);
1118
+ // Store in embedding cache
1119
+ if (this.embeddingCacheEnabled && text) {
1120
+ const hash = (0, node_crypto_2.createHash)('sha256').update(text).digest('hex');
1121
+ const embedKey = `${this.embedKeyPrefix}${hash}`;
1122
+ try {
1123
+ const buf = (0, utils_1.encodeFloat32)(vector);
1124
+ await this.client.set(embedKey, buf, 'EX', this.embeddingCacheTtl);
1125
+ }
1126
+ catch { /* ignore cache write errors */ }
1127
+ }
327
1128
  return { vector, durationSec };
328
1129
  }
329
1130
  /**
330
1131
  * Wraps a method body in an OTel span with automatic status, end, and
331
1132
  * operation duration metric. The span is passed to fn so callers can
332
- * set attributes but callers must NOT call span.end() or span.setStatus(),
1133
+ * set attributes - but callers must NOT call span.end() or span.setStatus(),
333
1134
  * as traced() handles both.
334
1135
  */
335
1136
  async traced(operation, fn) {
@@ -359,6 +1160,24 @@ class SemanticCache {
359
1160
  pipeline.hincrby(this.statsKey, field, 1);
360
1161
  await pipeline.exec();
361
1162
  }
1163
+ /** Append to the rolling similarity window sorted set and trim to 10,000 entries or 7 days. */
1164
+ async recordSimilarityWindow(score, result, category) {
1165
+ const now = Date.now();
1166
+ // Include a unique nonce so identical (score, result, category) tuples are
1167
+ // each recorded as distinct ZADD members instead of overwriting each other.
1168
+ const member = JSON.stringify({ score, result, category, _n: Math.random() });
1169
+ const sevenDaysAgo = now - 7 * 24 * 60 * 60 * 1000;
1170
+ try {
1171
+ const pipeline = this.client.pipeline();
1172
+ pipeline.zadd(this.similarityWindowKey, now, member);
1173
+ // Trim by time: remove entries older than 7 days
1174
+ pipeline.zremrangebyscore(this.similarityWindowKey, '-inf', sevenDaysAgo);
1175
+ // Trim by count: keep at most 10,000 most recent
1176
+ pipeline.zremrangebyrank(this.similarityWindowKey, 0, -10001);
1177
+ await pipeline.exec();
1178
+ }
1179
+ catch { /* best effort - never fail on window writes */ }
1180
+ }
362
1181
  assertInitialized(method) {
363
1182
  if (!this._initialized) {
364
1183
  throw new errors_1.SemanticCacheUsageError(`SemanticCache.initialize() must be called before ${method}().`);