@betterdb/semantic-cache 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +211 -128
- package/dist/SemanticCache.d.ts +85 -5
- package/dist/SemanticCache.js +689 -47
- package/dist/adapters/ai.js +6 -1
- package/dist/adapters/anthropic.d.ts +32 -0
- package/dist/adapters/anthropic.js +94 -0
- package/dist/adapters/langchain.js +6 -1
- package/dist/adapters/langgraph.d.ts +104 -0
- package/dist/adapters/langgraph.js +271 -0
- package/dist/adapters/llamaindex.d.ts +32 -0
- package/dist/adapters/llamaindex.js +76 -0
- package/dist/adapters/openai-responses.d.ts +31 -0
- package/dist/adapters/openai-responses.js +112 -0
- package/dist/adapters/openai.d.ts +42 -0
- package/dist/adapters/openai.js +97 -0
- package/dist/analytics.d.ts +24 -0
- package/dist/analytics.js +116 -0
- package/dist/cluster.d.ts +10 -0
- package/dist/cluster.js +43 -0
- package/dist/defaultCostTable.d.ts +11 -0
- package/dist/defaultCostTable.js +1976 -0
- package/dist/embed/bedrock.d.ts +32 -0
- package/dist/embed/bedrock.js +109 -0
- package/dist/embed/cohere.d.ts +34 -0
- package/dist/embed/cohere.js +37 -0
- package/dist/embed/ollama.d.ts +30 -0
- package/dist/embed/ollama.js +24 -0
- package/dist/embed/openai.d.ts +31 -0
- package/dist/embed/openai.js +66 -0
- package/dist/embed/voyage.d.ts +31 -0
- package/dist/embed/voyage.js +32 -0
- package/dist/index.d.ts +6 -1
- package/dist/index.js +11 -1
- package/dist/normalizer.d.ts +68 -0
- package/dist/normalizer.js +102 -0
- package/dist/telemetry.d.ts +3 -0
- package/dist/telemetry.js +18 -0
- package/dist/types.d.ts +107 -7
- package/dist/utils.d.ts +58 -0
- package/dist/utils.js +30 -0
- package/package.json +81 -6
package/dist/SemanticCache.js
CHANGED
|
@@ -2,10 +2,14 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.SemanticCache = void 0;
|
|
4
4
|
const node_crypto_1 = require("node:crypto");
|
|
5
|
+
const node_crypto_2 = require("node:crypto");
|
|
5
6
|
const api_1 = require("@opentelemetry/api");
|
|
6
7
|
const errors_1 = require("./errors");
|
|
7
8
|
const telemetry_1 = require("./telemetry");
|
|
8
9
|
const utils_1 = require("./utils");
|
|
10
|
+
const defaultCostTable_1 = require("./defaultCostTable");
|
|
11
|
+
const cluster_1 = require("./cluster");
|
|
12
|
+
const analytics_1 = require("./analytics");
|
|
9
13
|
const INVALIDATE_BATCH_SIZE = 1000;
|
|
10
14
|
function errMsg(err) {
|
|
11
15
|
return err instanceof Error ? err.message : String(err);
|
|
@@ -17,15 +21,27 @@ class SemanticCache {
|
|
|
17
21
|
indexName;
|
|
18
22
|
entryPrefix;
|
|
19
23
|
statsKey;
|
|
24
|
+
similarityWindowKey;
|
|
20
25
|
defaultThreshold;
|
|
21
26
|
defaultTtl;
|
|
22
27
|
categoryThresholds;
|
|
23
28
|
uncertaintyBand;
|
|
24
29
|
telemetry;
|
|
30
|
+
costTable;
|
|
31
|
+
embeddingCacheEnabled;
|
|
32
|
+
embeddingCacheTtl;
|
|
33
|
+
embedKeyPrefix;
|
|
25
34
|
_initialized = false;
|
|
26
35
|
_dimension = 0;
|
|
36
|
+
_hasBinaryRefs = false;
|
|
27
37
|
_initPromise = null;
|
|
28
38
|
_initGeneration = 0;
|
|
39
|
+
analyticsOpts;
|
|
40
|
+
usesDefaultCostTable;
|
|
41
|
+
analytics = analytics_1.NOOP_ANALYTICS;
|
|
42
|
+
statsTimer;
|
|
43
|
+
shutdownCalled = false;
|
|
44
|
+
analyticsInitiated = false;
|
|
29
45
|
/**
|
|
30
46
|
* Creates a new SemanticCache instance.
|
|
31
47
|
*
|
|
@@ -42,17 +58,35 @@ class SemanticCache {
|
|
|
42
58
|
this.indexName = `${this.name}:idx`;
|
|
43
59
|
this.entryPrefix = `${this.name}:entry:`;
|
|
44
60
|
this.statsKey = `${this.name}:__stats`;
|
|
61
|
+
this.similarityWindowKey = `${this.name}:__similarity_window`;
|
|
62
|
+
this.embedKeyPrefix = `${this.name}:embed:`;
|
|
45
63
|
this.defaultThreshold = options.defaultThreshold ?? 0.1;
|
|
46
64
|
this.defaultTtl = options.defaultTtl;
|
|
47
65
|
this.categoryThresholds = options.categoryThresholds ?? {};
|
|
48
66
|
this.uncertaintyBand = options.uncertaintyBand ?? 0.05;
|
|
67
|
+
// Build effective cost table
|
|
68
|
+
const useDefault = options.useDefaultCostTable ?? true;
|
|
69
|
+
if (!useDefault && !options.costTable) {
|
|
70
|
+
this.costTable = undefined;
|
|
71
|
+
}
|
|
72
|
+
else if (!useDefault) {
|
|
73
|
+
this.costTable = options.costTable;
|
|
74
|
+
}
|
|
75
|
+
else {
|
|
76
|
+
this.costTable = { ...defaultCostTable_1.DEFAULT_COST_TABLE, ...(options.costTable ?? {}) };
|
|
77
|
+
}
|
|
78
|
+
// Embedding cache config
|
|
79
|
+
this.embeddingCacheEnabled = options.embeddingCache?.enabled ?? true;
|
|
80
|
+
this.embeddingCacheTtl = options.embeddingCache?.ttl ?? 86400;
|
|
49
81
|
this.telemetry = (0, telemetry_1.createTelemetry)({
|
|
50
82
|
prefix: options.telemetry?.metricsPrefix ?? 'semantic_cache',
|
|
51
83
|
tracerName: options.telemetry?.tracerName ?? '@betterdb/semantic-cache',
|
|
52
84
|
registry: options.telemetry?.registry,
|
|
53
85
|
});
|
|
86
|
+
this.analyticsOpts = options.analytics;
|
|
87
|
+
this.usesDefaultCostTable = useDefault;
|
|
54
88
|
}
|
|
55
|
-
//
|
|
89
|
+
// -- Lifecycle --
|
|
56
90
|
async initialize() {
|
|
57
91
|
if (!this._initPromise) {
|
|
58
92
|
this._initPromise = this._doInitialize().catch((err) => {
|
|
@@ -65,7 +99,6 @@ class SemanticCache {
|
|
|
65
99
|
async flush() {
|
|
66
100
|
// Mark uninitialized immediately so concurrent check()/store() calls get
|
|
67
101
|
// a clear SemanticCacheUsageError instead of cryptic Valkey errors.
|
|
68
|
-
// Bump generation so any in-flight _doInitialize() won't overwrite this state.
|
|
69
102
|
this._initialized = false;
|
|
70
103
|
this._initPromise = null;
|
|
71
104
|
this._initGeneration++;
|
|
@@ -79,33 +112,59 @@ class SemanticCache {
|
|
|
79
112
|
throw new errors_1.ValkeyCommandError('FT.DROPINDEX', err);
|
|
80
113
|
}
|
|
81
114
|
}
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
115
|
+
// Cluster-aware SCAN for entry keys and embed cache keys
|
|
116
|
+
const patterns = [
|
|
117
|
+
`${this.name}:entry:*`,
|
|
118
|
+
`${this.name}:embed:*`,
|
|
119
|
+
];
|
|
120
|
+
for (const pattern of patterns) {
|
|
121
|
+
await (0, cluster_1.clusterScan)(this.client, pattern, async (keys, nodeClient) => {
|
|
122
|
+
await nodeClient.del(keys);
|
|
123
|
+
});
|
|
124
|
+
}
|
|
90
125
|
await this.client.del(this.statsKey);
|
|
126
|
+
await this.client.del(this.similarityWindowKey);
|
|
127
|
+
this.analytics.capture('cache_flush');
|
|
128
|
+
}
|
|
129
|
+
/** Shut down the analytics client and cancel the stats timer. */
|
|
130
|
+
async shutdown() {
|
|
131
|
+
this.shutdownCalled = true;
|
|
132
|
+
if (this.statsTimer) {
|
|
133
|
+
clearInterval(this.statsTimer);
|
|
134
|
+
this.statsTimer = undefined;
|
|
135
|
+
}
|
|
136
|
+
await this.analytics.shutdown();
|
|
91
137
|
}
|
|
92
|
-
//
|
|
138
|
+
// -- Public operations --
|
|
93
139
|
async check(prompt, options) {
|
|
94
140
|
this.assertInitialized('check');
|
|
95
141
|
return this.traced('check', async (span) => {
|
|
96
142
|
const category = options?.category ?? '';
|
|
97
|
-
const k = options?.k ?? 1;
|
|
98
143
|
const threshold = options?.threshold ??
|
|
99
144
|
(category && this.categoryThresholds[category] !== undefined
|
|
100
145
|
? this.categoryThresholds[category]
|
|
101
146
|
: this.defaultThreshold);
|
|
102
|
-
|
|
147
|
+
// Resolve text and binary refs from prompt
|
|
148
|
+
const { text: promptText, binaryRefs } = await this.resolvePrompt(prompt);
|
|
149
|
+
// Stale model detection
|
|
150
|
+
const checkStale = (options?.staleAfterModelChange ?? false) && !!options?.currentModel;
|
|
151
|
+
// Rerank option
|
|
152
|
+
const rerankOpts = options?.rerank;
|
|
153
|
+
const k = rerankOpts ? rerankOpts.k : (options?.k ?? 1);
|
|
154
|
+
const { vector: embedding, durationSec: embedSec } = await this.embed(promptText);
|
|
103
155
|
this.assertDimension(embedding);
|
|
104
|
-
//
|
|
105
|
-
|
|
156
|
+
// Build filter
|
|
157
|
+
const userFilter = options?.filter;
|
|
158
|
+
// AND semantics: each ref must be present — chain separate TAG clauses.
|
|
159
|
+
const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
|
|
160
|
+
? (binaryRefs.length === 1
|
|
161
|
+
? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
|
|
162
|
+
: binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' '))
|
|
163
|
+
: null;
|
|
164
|
+
const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
|
|
165
|
+
const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
|
|
166
|
+
const query = `${filterExpr}=>[KNN ${k} @embedding $vec AS __score]`;
|
|
106
167
|
const searchStart = performance.now();
|
|
107
|
-
const filter = options?.filter;
|
|
108
|
-
const query = `${filter ? `(${filter})` : '*'}=>[KNN ${k} @embedding $vec AS __score]`;
|
|
109
168
|
let rawResult;
|
|
110
169
|
try {
|
|
111
170
|
rawResult = await this.client.call('FT.SEARCH', this.indexName, query, 'PARAMS', '2', 'vec', (0, utils_1.encodeFloat32)(embedding), 'LIMIT', '0', String(k), 'DIALECT', '2');
|
|
@@ -136,6 +195,9 @@ class SemanticCache {
|
|
|
136
195
|
}
|
|
137
196
|
// Miss (no usable score, or score exceeds threshold)
|
|
138
197
|
if (isNaN(score) || score > threshold) {
|
|
198
|
+
if (!isNaN(score)) {
|
|
199
|
+
await this.recordSimilarityWindow(score, 'miss', category);
|
|
200
|
+
}
|
|
139
201
|
await this.recordStat('misses');
|
|
140
202
|
this.telemetry.metrics.requestsTotal
|
|
141
203
|
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
|
|
@@ -151,42 +213,148 @@ class SemanticCache {
|
|
|
151
213
|
}
|
|
152
214
|
return result;
|
|
153
215
|
}
|
|
154
|
-
//
|
|
155
|
-
|
|
216
|
+
// Rerank: apply rerankFn to all candidates above threshold
|
|
217
|
+
let winnerParsedIndex = 0;
|
|
218
|
+
if (rerankOpts && parsed.length > 0) {
|
|
219
|
+
// Preserve the original parsed[] index alongside each candidate so we
|
|
220
|
+
// can map back even when NaN-scored entries are filtered out.
|
|
221
|
+
const indexedCandidates = parsed
|
|
222
|
+
.map((r, i) => ({ i, s: parseFloat(r.fields['__score'] ?? 'NaN') }))
|
|
223
|
+
.filter(({ s }) => !isNaN(s))
|
|
224
|
+
.map(({ i, s }) => ({
|
|
225
|
+
origIdx: i,
|
|
226
|
+
candidate: { response: parsed[i].fields['response'] ?? '', similarity: s },
|
|
227
|
+
}));
|
|
228
|
+
const picked = await rerankOpts.rerankFn(promptText, indexedCandidates.map((x) => x.candidate));
|
|
229
|
+
// Explicit bounds check: -1 means "reject all"; out-of-range is a caller bug
|
|
230
|
+
// treated as a miss rather than silently falling back to the top candidate.
|
|
231
|
+
if (picked === -1 || picked < 0 || picked >= indexedCandidates.length) {
|
|
232
|
+
await this.recordSimilarityWindow(score, 'miss', category);
|
|
233
|
+
await this.recordStat('misses');
|
|
234
|
+
this.telemetry.metrics.requestsTotal
|
|
235
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
|
|
236
|
+
span.setAttributes({ 'cache.hit': false, 'cache.name': this.name, 'cache.reranked': true });
|
|
237
|
+
return { hit: false, confidence: 'miss' };
|
|
238
|
+
}
|
|
239
|
+
// Map back to the original parsed[] index (not the candidates[] index)
|
|
240
|
+
winnerParsedIndex = indexedCandidates[picked].origIdx;
|
|
241
|
+
}
|
|
242
|
+
const winner = parsed[winnerParsedIndex] ?? parsed[0];
|
|
243
|
+
const winnerScore = parseFloat(winner.fields['__score'] ?? String(score));
|
|
244
|
+
// Stale model check: if winner's model differs from currentModel, evict and treat as miss
|
|
245
|
+
if (checkStale) {
|
|
246
|
+
const storedModel = winner.fields['model'] ?? '';
|
|
247
|
+
if (storedModel && storedModel !== options.currentModel) {
|
|
248
|
+
// Evict stale entry
|
|
249
|
+
try {
|
|
250
|
+
await this.client.del(winner.key);
|
|
251
|
+
}
|
|
252
|
+
catch { /* best effort */ }
|
|
253
|
+
await this.recordSimilarityWindow(winnerScore, 'miss', category);
|
|
254
|
+
this.telemetry.metrics.staleModelEvictions.labels({ cache_name: this.name }).inc();
|
|
255
|
+
await this.recordStat('misses');
|
|
256
|
+
this.telemetry.metrics.requestsTotal
|
|
257
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
|
|
258
|
+
span.setAttributes({ 'cache.hit': false, 'cache.stale_evicted': true });
|
|
259
|
+
return { hit: false, confidence: 'miss' };
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
// All checks passed — record as a genuine hit
|
|
263
|
+
await this.recordSimilarityWindow(winnerScore, 'hit', category);
|
|
264
|
+
const confidence = winnerScore >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
|
|
156
265
|
await this.recordStat('hits');
|
|
157
266
|
const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
|
|
158
267
|
this.telemetry.metrics.requestsTotal
|
|
159
268
|
.labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
|
|
160
|
-
const matchedKey =
|
|
269
|
+
const matchedKey = winner.key;
|
|
161
270
|
if (this.defaultTtl !== undefined && matchedKey) {
|
|
162
271
|
await this.client.expire(matchedKey, this.defaultTtl);
|
|
163
272
|
}
|
|
273
|
+
// Cost saved
|
|
274
|
+
let costSaved;
|
|
275
|
+
const costMicrosStr = winner.fields['cost_micros'];
|
|
276
|
+
if (costMicrosStr) {
|
|
277
|
+
const costMicros = parseInt(costMicrosStr, 10);
|
|
278
|
+
if (!isNaN(costMicros) && costMicros > 0) {
|
|
279
|
+
costSaved = costMicros / 1_000_000;
|
|
280
|
+
// Atomically increment cost_saved_micros in stats
|
|
281
|
+
await this.client.hincrby(this.statsKey, 'cost_saved_micros', costMicros);
|
|
282
|
+
this.telemetry.metrics.costSavedTotal
|
|
283
|
+
.labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
// Content blocks
|
|
287
|
+
let contentBlocks;
|
|
288
|
+
const contentBlocksStr = winner.fields['content_blocks'];
|
|
289
|
+
if (contentBlocksStr) {
|
|
290
|
+
try {
|
|
291
|
+
contentBlocks = JSON.parse(contentBlocksStr);
|
|
292
|
+
}
|
|
293
|
+
catch { /* ignore parse errors */ }
|
|
294
|
+
}
|
|
164
295
|
span.setAttributes({
|
|
165
|
-
'cache.hit': true, 'cache.similarity':
|
|
296
|
+
'cache.hit': true, 'cache.similarity': winnerScore, 'cache.threshold': threshold,
|
|
166
297
|
'cache.confidence': confidence, 'cache.matched_key': matchedKey,
|
|
167
298
|
'cache.category': categoryLabel, ...timingAttrs,
|
|
168
299
|
});
|
|
169
|
-
|
|
170
|
-
hit: true, response:
|
|
171
|
-
similarity:
|
|
300
|
+
const result = {
|
|
301
|
+
hit: true, response: winner.fields['response'],
|
|
302
|
+
similarity: winnerScore, confidence, matchedKey,
|
|
172
303
|
};
|
|
304
|
+
if (costSaved !== undefined)
|
|
305
|
+
result.costSaved = costSaved;
|
|
306
|
+
if (contentBlocks)
|
|
307
|
+
result.contentBlocks = contentBlocks;
|
|
308
|
+
return result;
|
|
173
309
|
});
|
|
174
310
|
}
|
|
175
311
|
async store(prompt, response, options) {
|
|
176
312
|
this.assertInitialized('store');
|
|
177
313
|
return this.traced('store', async (span) => {
|
|
178
|
-
const {
|
|
314
|
+
const { text: promptText, binaryRefs } = await this.resolvePrompt(prompt);
|
|
315
|
+
const { vector: embedding, durationSec: embedSec } = await this.embed(promptText);
|
|
179
316
|
this.assertDimension(embedding);
|
|
180
317
|
const entryKey = `${this.entryPrefix}${(0, node_crypto_1.randomUUID)()}`;
|
|
181
318
|
const category = options?.category ?? '';
|
|
182
319
|
const model = options?.model ?? '';
|
|
320
|
+
// Compute cost if tokens and model provided
|
|
321
|
+
let costMicros;
|
|
322
|
+
if (options?.model &&
|
|
323
|
+
options?.inputTokens !== undefined &&
|
|
324
|
+
options?.outputTokens !== undefined &&
|
|
325
|
+
this.costTable) {
|
|
326
|
+
const pricing = this.costTable[options.model];
|
|
327
|
+
if (pricing) {
|
|
328
|
+
costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
|
|
329
|
+
options.outputTokens * pricing.outputPer1k / 1000) * 1_000_000);
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
const hashFields = {
|
|
333
|
+
prompt: promptText,
|
|
334
|
+
response,
|
|
335
|
+
model,
|
|
336
|
+
category,
|
|
337
|
+
inserted_at: Date.now().toString(),
|
|
338
|
+
metadata: JSON.stringify(options?.metadata ?? {}),
|
|
339
|
+
embedding: (0, utils_1.encodeFloat32)(embedding),
|
|
340
|
+
};
|
|
341
|
+
if (binaryRefs.length > 0) {
|
|
342
|
+
hashFields['binary_refs'] = binaryRefs.join(',');
|
|
343
|
+
}
|
|
344
|
+
if (costMicros !== undefined && costMicros > 0) {
|
|
345
|
+
hashFields['cost_micros'] = String(costMicros);
|
|
346
|
+
}
|
|
347
|
+
if (options?.temperature !== undefined) {
|
|
348
|
+
hashFields['temperature'] = String(options.temperature);
|
|
349
|
+
}
|
|
350
|
+
if (options?.topP !== undefined) {
|
|
351
|
+
hashFields['top_p'] = String(options.topP);
|
|
352
|
+
}
|
|
353
|
+
if (options?.seed !== undefined) {
|
|
354
|
+
hashFields['seed'] = String(options.seed);
|
|
355
|
+
}
|
|
183
356
|
try {
|
|
184
|
-
await this.client.hset(entryKey,
|
|
185
|
-
prompt, response, model, category,
|
|
186
|
-
inserted_at: Date.now().toString(),
|
|
187
|
-
metadata: JSON.stringify(options?.metadata ?? {}),
|
|
188
|
-
embedding: (0, utils_1.encodeFloat32)(embedding),
|
|
189
|
-
});
|
|
357
|
+
await this.client.hset(entryKey, hashFields);
|
|
190
358
|
}
|
|
191
359
|
catch (err) {
|
|
192
360
|
throw new errors_1.ValkeyCommandError('HSET', err);
|
|
@@ -202,11 +370,195 @@ class SemanticCache {
|
|
|
202
370
|
return entryKey;
|
|
203
371
|
});
|
|
204
372
|
}
|
|
373
|
+
/**
|
|
374
|
+
* Store structured content blocks as the cached response.
|
|
375
|
+
* Populates both the response field (from TextBlock text) and content_blocks (full JSON).
|
|
376
|
+
*/
|
|
377
|
+
async storeMultipart(prompt, blocks, options) {
|
|
378
|
+
this.assertInitialized('storeMultipart');
|
|
379
|
+
return this.traced('storeMultipart', async (span) => {
|
|
380
|
+
const { text: promptText, binaryRefs } = await this.resolvePrompt(prompt);
|
|
381
|
+
const { vector: embedding, durationSec: embedSec } = await this.embed(promptText);
|
|
382
|
+
this.assertDimension(embedding);
|
|
383
|
+
// Derive text response from blocks for backward compat
|
|
384
|
+
const textResponse = (0, utils_1.extractText)(blocks);
|
|
385
|
+
const entryKey = `${this.entryPrefix}${(0, node_crypto_1.randomUUID)()}`;
|
|
386
|
+
const category = options?.category ?? '';
|
|
387
|
+
const model = options?.model ?? '';
|
|
388
|
+
let costMicros;
|
|
389
|
+
if (options?.model && options?.inputTokens !== undefined && options?.outputTokens !== undefined && this.costTable) {
|
|
390
|
+
const pricing = this.costTable[options.model];
|
|
391
|
+
if (pricing) {
|
|
392
|
+
costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
|
|
393
|
+
options.outputTokens * pricing.outputPer1k / 1000) * 1_000_000);
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
const hashFields = {
|
|
397
|
+
prompt: promptText,
|
|
398
|
+
response: textResponse,
|
|
399
|
+
model,
|
|
400
|
+
category,
|
|
401
|
+
inserted_at: Date.now().toString(),
|
|
402
|
+
metadata: JSON.stringify(options?.metadata ?? {}),
|
|
403
|
+
embedding: (0, utils_1.encodeFloat32)(embedding),
|
|
404
|
+
content_blocks: JSON.stringify(blocks),
|
|
405
|
+
};
|
|
406
|
+
if (binaryRefs.length > 0) {
|
|
407
|
+
hashFields['binary_refs'] = binaryRefs.join(',');
|
|
408
|
+
}
|
|
409
|
+
if (costMicros !== undefined && costMicros > 0) {
|
|
410
|
+
hashFields['cost_micros'] = String(costMicros);
|
|
411
|
+
}
|
|
412
|
+
if (options?.temperature !== undefined)
|
|
413
|
+
hashFields['temperature'] = String(options.temperature);
|
|
414
|
+
if (options?.topP !== undefined)
|
|
415
|
+
hashFields['top_p'] = String(options.topP);
|
|
416
|
+
if (options?.seed !== undefined)
|
|
417
|
+
hashFields['seed'] = String(options.seed);
|
|
418
|
+
try {
|
|
419
|
+
await this.client.hset(entryKey, hashFields);
|
|
420
|
+
}
|
|
421
|
+
catch (err) {
|
|
422
|
+
throw new errors_1.ValkeyCommandError('HSET', err);
|
|
423
|
+
}
|
|
424
|
+
const ttl = options?.ttl ?? this.defaultTtl;
|
|
425
|
+
if (ttl !== undefined)
|
|
426
|
+
await this.client.expire(entryKey, ttl);
|
|
427
|
+
span.setAttributes({
|
|
428
|
+
'cache.name': this.name, 'cache.key': entryKey, 'cache.ttl': ttl ?? -1,
|
|
429
|
+
'cache.category': category || 'none', 'cache.model': model || 'none',
|
|
430
|
+
'embedding_latency_ms': embedSec * 1000,
|
|
431
|
+
});
|
|
432
|
+
return entryKey;
|
|
433
|
+
});
|
|
434
|
+
}
|
|
435
|
+
/**
|
|
436
|
+
* Check multiple prompts in parallel, using pipelined FT.SEARCH calls.
|
|
437
|
+
* Returns results in input order.
|
|
438
|
+
*/
|
|
439
|
+
async checkBatch(prompts, options) {
|
|
440
|
+
this.assertInitialized('checkBatch');
|
|
441
|
+
if (prompts.length === 0)
|
|
442
|
+
return [];
|
|
443
|
+
if (options?.rerank) {
|
|
444
|
+
throw new errors_1.SemanticCacheUsageError("checkBatch() does not support the 'rerank' option. Use check() for reranking individual prompts.");
|
|
445
|
+
}
|
|
446
|
+
if (options?.staleAfterModelChange) {
|
|
447
|
+
throw new errors_1.SemanticCacheUsageError("checkBatch() does not support 'staleAfterModelChange'. Use check() for stale-model eviction.");
|
|
448
|
+
}
|
|
449
|
+
return this.traced('checkBatch', async (span) => {
|
|
450
|
+
// Resolve all prompts and embed in parallel
|
|
451
|
+
const resolved = await Promise.all(prompts.map((p) => this.resolvePrompt(p)));
|
|
452
|
+
const embeddings = await Promise.all(resolved.map(({ text }) => this.embed(text)));
|
|
453
|
+
const category = options?.category ?? '';
|
|
454
|
+
const threshold = options?.threshold ??
|
|
455
|
+
(category && this.categoryThresholds[category] !== undefined
|
|
456
|
+
? this.categoryThresholds[category]
|
|
457
|
+
: this.defaultThreshold);
|
|
458
|
+
const k = options?.k ?? 1;
|
|
459
|
+
const userFilter = options?.filter;
|
|
460
|
+
// Pipeline all FT.SEARCH calls
|
|
461
|
+
const pipeline = this.client.pipeline();
|
|
462
|
+
for (let i = 0; i < prompts.length; i++) {
|
|
463
|
+
const { binaryRefs } = resolved[i];
|
|
464
|
+
const { vector: embedding } = embeddings[i];
|
|
465
|
+
const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
|
|
466
|
+
? (binaryRefs.length === 1
|
|
467
|
+
? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
|
|
468
|
+
: binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' '))
|
|
469
|
+
: null;
|
|
470
|
+
const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
|
|
471
|
+
const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
|
|
472
|
+
const query = `${filterExpr}=>[KNN ${k} @embedding $vec AS __score]`;
|
|
473
|
+
pipeline.call('FT.SEARCH', this.indexName, query, 'PARAMS', '2', 'vec', (0, utils_1.encodeFloat32)(embedding), 'LIMIT', '0', String(k), 'DIALECT', '2');
|
|
474
|
+
}
|
|
475
|
+
const pipelineResults = await pipeline.exec();
|
|
476
|
+
span.setAttributes({ 'cache.batch_size': prompts.length, 'cache.name': this.name });
|
|
477
|
+
const results = [];
|
|
478
|
+
const categoryLabel = category || 'none';
|
|
479
|
+
for (let i = 0; i < prompts.length; i++) {
|
|
480
|
+
const pipelineEntry = pipelineResults?.[i];
|
|
481
|
+
const err = pipelineEntry?.[0];
|
|
482
|
+
const rawResult = pipelineEntry?.[1];
|
|
483
|
+
if (err) {
|
|
484
|
+
await this.recordStat('misses');
|
|
485
|
+
this.telemetry.metrics.requestsTotal
|
|
486
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
|
|
487
|
+
results.push({ hit: false, confidence: 'miss' });
|
|
488
|
+
continue;
|
|
489
|
+
}
|
|
490
|
+
const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
|
|
491
|
+
if (parsed.length === 0) {
|
|
492
|
+
await this.recordStat('misses');
|
|
493
|
+
this.telemetry.metrics.requestsTotal
|
|
494
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
|
|
495
|
+
results.push({ hit: false, confidence: 'miss' });
|
|
496
|
+
continue;
|
|
497
|
+
}
|
|
498
|
+
const scoreStr = parsed[0].fields['__score'];
|
|
499
|
+
const score = scoreStr !== undefined ? parseFloat(scoreStr) : NaN;
|
|
500
|
+
if (isNaN(score) || score > threshold) {
|
|
501
|
+
if (!isNaN(score)) {
|
|
502
|
+
await this.recordSimilarityWindow(score, 'miss', category);
|
|
503
|
+
}
|
|
504
|
+
await this.recordStat('misses');
|
|
505
|
+
this.telemetry.metrics.requestsTotal
|
|
506
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
|
|
507
|
+
const result = { hit: false, confidence: 'miss' };
|
|
508
|
+
if (!isNaN(score)) {
|
|
509
|
+
result.similarity = score;
|
|
510
|
+
result.nearestMiss = { similarity: score, deltaToThreshold: score - threshold };
|
|
511
|
+
}
|
|
512
|
+
results.push(result);
|
|
513
|
+
continue;
|
|
514
|
+
}
|
|
515
|
+
await this.recordSimilarityWindow(score, 'hit', category);
|
|
516
|
+
const confidence = score >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
|
|
517
|
+
await this.recordStat('hits');
|
|
518
|
+
const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
|
|
519
|
+
this.telemetry.metrics.requestsTotal
|
|
520
|
+
.labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
|
|
521
|
+
const matchedKey = parsed[0].key;
|
|
522
|
+
if (this.defaultTtl !== undefined && matchedKey) {
|
|
523
|
+
await this.client.expire(matchedKey, this.defaultTtl);
|
|
524
|
+
}
|
|
525
|
+
let costSaved;
|
|
526
|
+
const costMicrosStr = parsed[0].fields['cost_micros'];
|
|
527
|
+
if (costMicrosStr) {
|
|
528
|
+
const costMicros = parseInt(costMicrosStr, 10);
|
|
529
|
+
if (!isNaN(costMicros) && costMicros > 0) {
|
|
530
|
+
costSaved = costMicros / 1_000_000;
|
|
531
|
+
await this.client.hincrby(this.statsKey, 'cost_saved_micros', costMicros);
|
|
532
|
+
this.telemetry.metrics.costSavedTotal
|
|
533
|
+
.labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
let contentBlocks;
|
|
537
|
+
const contentBlocksStr = parsed[0].fields['content_blocks'];
|
|
538
|
+
if (contentBlocksStr) {
|
|
539
|
+
try {
|
|
540
|
+
contentBlocks = JSON.parse(contentBlocksStr);
|
|
541
|
+
}
|
|
542
|
+
catch { /* ignore */ }
|
|
543
|
+
}
|
|
544
|
+
const result = {
|
|
545
|
+
hit: true, response: parsed[0].fields['response'],
|
|
546
|
+
similarity: score, confidence, matchedKey,
|
|
547
|
+
};
|
|
548
|
+
if (costSaved !== undefined)
|
|
549
|
+
result.costSaved = costSaved;
|
|
550
|
+
if (contentBlocks)
|
|
551
|
+
result.contentBlocks = contentBlocks;
|
|
552
|
+
results.push(result);
|
|
553
|
+
}
|
|
554
|
+
return results;
|
|
555
|
+
});
|
|
556
|
+
}
|
|
205
557
|
/**
|
|
206
558
|
* Deletes all entries matching a valkey-search filter expression.
|
|
207
559
|
*
|
|
208
560
|
* **Security note:** `filter` is passed directly to FT.SEARCH. Only pass
|
|
209
|
-
* trusted, programmatically-constructed expressions
|
|
561
|
+
* trusted, programmatically-constructed expressions - never unsanitised
|
|
210
562
|
* user input.
|
|
211
563
|
*/
|
|
212
564
|
async invalidate(filter) {
|
|
@@ -242,13 +594,34 @@ class SemanticCache {
|
|
|
242
594
|
return { deleted: keys.length, truncated };
|
|
243
595
|
});
|
|
244
596
|
}
|
|
597
|
+
/** Delete all entries tagged with the given model name. */
|
|
598
|
+
async invalidateByModel(model) {
|
|
599
|
+
let total = 0;
|
|
600
|
+
let result;
|
|
601
|
+
do {
|
|
602
|
+
result = await this.invalidate(`@model:{${(0, utils_1.escapeTag)(model)}}`);
|
|
603
|
+
total += result.deleted;
|
|
604
|
+
} while (result.truncated);
|
|
605
|
+
return total;
|
|
606
|
+
}
|
|
607
|
+
/** Delete all entries tagged with the given category. */
|
|
608
|
+
async invalidateByCategory(category) {
|
|
609
|
+
let total = 0;
|
|
610
|
+
let result;
|
|
611
|
+
do {
|
|
612
|
+
result = await this.invalidate(`@category:{${(0, utils_1.escapeTag)(category)}}`);
|
|
613
|
+
total += result.deleted;
|
|
614
|
+
} while (result.truncated);
|
|
615
|
+
return total;
|
|
616
|
+
}
|
|
245
617
|
async stats() {
|
|
246
618
|
this.assertInitialized('stats');
|
|
247
619
|
const raw = await this.client.hgetall(this.statsKey);
|
|
248
|
-
const hits = parseInt(raw
|
|
249
|
-
const misses = parseInt(raw
|
|
250
|
-
const total = parseInt(raw
|
|
251
|
-
|
|
620
|
+
const hits = parseInt(raw?.hits ?? '0', 10);
|
|
621
|
+
const misses = parseInt(raw?.misses ?? '0', 10);
|
|
622
|
+
const total = parseInt(raw?.total ?? '0', 10);
|
|
623
|
+
const costSavedMicros = parseInt(raw?.cost_saved_micros ?? '0', 10);
|
|
624
|
+
return { hits, misses, total, hitRate: total === 0 ? 0 : hits / total, costSavedMicros };
|
|
252
625
|
}
|
|
253
626
|
async indexInfo() {
|
|
254
627
|
this.assertInitialized('indexInfo');
|
|
@@ -271,27 +644,217 @@ class SemanticCache {
|
|
|
271
644
|
}
|
|
272
645
|
return { name: this.indexName, numDocs, dimension: this._dimension, indexingState };
|
|
273
646
|
}
|
|
274
|
-
|
|
647
|
+
/**
|
|
648
|
+
* Analyze the rolling similarity score window and recommend threshold adjustments.
|
|
649
|
+
*/
|
|
650
|
+
async thresholdEffectiveness(options) {
|
|
651
|
+
this.assertInitialized('thresholdEffectiveness');
|
|
652
|
+
const minSamples = options?.minSamples ?? 100;
|
|
653
|
+
const category = options?.category;
|
|
654
|
+
const threshold = category && this.categoryThresholds[category] !== undefined
|
|
655
|
+
? this.categoryThresholds[category]
|
|
656
|
+
: this.defaultThreshold;
|
|
657
|
+
// Read all window entries
|
|
658
|
+
let rawEntries;
|
|
659
|
+
try {
|
|
660
|
+
rawEntries = (await this.client.zrange(this.similarityWindowKey, '0', '-1'));
|
|
661
|
+
}
|
|
662
|
+
catch {
|
|
663
|
+
rawEntries = [];
|
|
664
|
+
}
|
|
665
|
+
// Parse and optionally filter by category
|
|
666
|
+
const entries = [];
|
|
667
|
+
for (const raw of rawEntries) {
|
|
668
|
+
try {
|
|
669
|
+
const entry = JSON.parse(String(raw));
|
|
670
|
+
if (typeof entry.score === 'number' &&
|
|
671
|
+
(entry.result === 'hit' || entry.result === 'miss')) {
|
|
672
|
+
if (!category || entry.category === category) {
|
|
673
|
+
entries.push(entry);
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
catch { /* skip corrupt entries */ }
|
|
678
|
+
}
|
|
679
|
+
const sampleCount = entries.length;
|
|
680
|
+
const categoryLabel = category ?? 'all';
|
|
681
|
+
if (sampleCount < minSamples) {
|
|
682
|
+
return {
|
|
683
|
+
category: categoryLabel,
|
|
684
|
+
sampleCount,
|
|
685
|
+
currentThreshold: threshold,
|
|
686
|
+
hitRate: 0,
|
|
687
|
+
uncertainHitRate: 0,
|
|
688
|
+
nearMissRate: 0,
|
|
689
|
+
avgHitSimilarity: 0,
|
|
690
|
+
avgMissSimilarity: 0,
|
|
691
|
+
recommendation: 'insufficient_data',
|
|
692
|
+
reasoning: `Only ${sampleCount} samples collected; ${minSamples} required for a reliable recommendation.`,
|
|
693
|
+
};
|
|
694
|
+
}
|
|
695
|
+
const hits = entries.filter((e) => e.result === 'hit');
|
|
696
|
+
const misses = entries.filter((e) => e.result === 'miss');
|
|
697
|
+
const hitRate = hits.length / sampleCount;
|
|
698
|
+
const uncertainHits = hits.filter((e) => e.score >= threshold - this.uncertaintyBand);
|
|
699
|
+
const uncertainHitRate = hits.length > 0 ? uncertainHits.length / hits.length : 0;
|
|
700
|
+
// Near-misses are scores just ABOVE the threshold (genuine close misses).
|
|
701
|
+
// Scores below the threshold recorded as misses (rerank rejection, stale eviction)
|
|
702
|
+
// must be excluded — they produce negative avgNearMissDelta, causing
|
|
703
|
+
// recommendedThreshold = threshold + negative < threshold, contradicting "loosen".
|
|
704
|
+
const nearMisses = misses.filter((e) => e.score > threshold && e.score <= threshold + 0.03);
|
|
705
|
+
const nearMissRate = misses.length > 0 ? nearMisses.length / misses.length : 0;
|
|
706
|
+
const avgHitSimilarity = hits.length > 0 ? hits.reduce((s, e) => s + e.score, 0) / hits.length : 0;
|
|
707
|
+
const avgMissSimilarity = misses.length > 0 ? misses.reduce((s, e) => s + e.score, 0) / misses.length : 0;
|
|
708
|
+
// avgNearMissDelta: how far above the threshold near-misses are on average
|
|
709
|
+
const avgNearMissDelta = nearMisses.length > 0
|
|
710
|
+
? nearMisses.reduce((s, e) => s + (e.score - threshold), 0) / nearMisses.length
|
|
711
|
+
: 0;
|
|
712
|
+
let recommendation;
|
|
713
|
+
let recommendedThreshold;
|
|
714
|
+
let reasoning;
|
|
715
|
+
if (uncertainHitRate > 0.2) {
|
|
716
|
+
recommendation = 'tighten_threshold';
|
|
717
|
+
recommendedThreshold = Math.max(0, threshold - this.uncertaintyBand * 1.5);
|
|
718
|
+
reasoning = `${(uncertainHitRate * 100).toFixed(1)}% of hits are in the uncertainty band - tighten the threshold to reduce false positives.`;
|
|
719
|
+
}
|
|
720
|
+
else if (nearMissRate > 0.3 && avgNearMissDelta < 0.03) {
|
|
721
|
+
recommendation = 'loosen_threshold';
|
|
722
|
+
recommendedThreshold = threshold + avgNearMissDelta;
|
|
723
|
+
reasoning = `${(nearMissRate * 100).toFixed(1)}% of misses are very close to the threshold - consider loosening to capture more hits.`;
|
|
724
|
+
}
|
|
725
|
+
else {
|
|
726
|
+
recommendation = 'optimal';
|
|
727
|
+
reasoning = `Hit rate is ${(hitRate * 100).toFixed(1)}% with ${(uncertainHitRate * 100).toFixed(1)}% uncertain hits - threshold appears well-calibrated.`;
|
|
728
|
+
}
|
|
729
|
+
return {
|
|
730
|
+
category: categoryLabel,
|
|
731
|
+
sampleCount,
|
|
732
|
+
currentThreshold: threshold,
|
|
733
|
+
hitRate,
|
|
734
|
+
uncertainHitRate,
|
|
735
|
+
nearMissRate,
|
|
736
|
+
avgHitSimilarity,
|
|
737
|
+
avgMissSimilarity,
|
|
738
|
+
recommendation,
|
|
739
|
+
recommendedThreshold,
|
|
740
|
+
reasoning,
|
|
741
|
+
};
|
|
742
|
+
}
|
|
743
|
+
/**
|
|
744
|
+
* Returns threshold effectiveness results for every category seen in the
|
|
745
|
+
* rolling window, plus one aggregate result for all categories combined.
|
|
746
|
+
*/
|
|
747
|
+
async thresholdEffectivenessAll(options) {
|
|
748
|
+
this.assertInitialized('thresholdEffectivenessAll');
|
|
749
|
+
let rawEntries;
|
|
750
|
+
try {
|
|
751
|
+
rawEntries = (await this.client.zrange(this.similarityWindowKey, '0', '-1'));
|
|
752
|
+
}
|
|
753
|
+
catch {
|
|
754
|
+
rawEntries = [];
|
|
755
|
+
}
|
|
756
|
+
// Collect unique categories
|
|
757
|
+
const categories = new Set();
|
|
758
|
+
for (const raw of rawEntries) {
|
|
759
|
+
try {
|
|
760
|
+
const entry = JSON.parse(raw);
|
|
761
|
+
if (entry.category)
|
|
762
|
+
categories.add(entry.category);
|
|
763
|
+
}
|
|
764
|
+
catch { /* skip */ }
|
|
765
|
+
}
|
|
766
|
+
const results = await Promise.all([
|
|
767
|
+
this.thresholdEffectiveness({ minSamples: options?.minSamples }),
|
|
768
|
+
...[...categories].filter(Boolean).map((cat) => this.thresholdEffectiveness({ category: cat, minSamples: options?.minSamples })),
|
|
769
|
+
]);
|
|
770
|
+
return results;
|
|
771
|
+
}
|
|
772
|
+
// -- Internal helpers exposed to package adapters --
|
|
773
|
+
/** @internal Default similarity threshold. */
|
|
774
|
+
get _defaultThreshold() { return this.defaultThreshold; }
|
|
775
|
+
/**
|
|
776
|
+
* Execute a stable FT.SEARCH for use by adapters (e.g. LangGraph).
|
|
777
|
+
* SORTBY inserted_at ASC gives stable ordering across paginated calls.
|
|
778
|
+
* @internal
|
|
779
|
+
*/
|
|
780
|
+
async _searchEntries(filterExpr, limit, offset) {
|
|
781
|
+
return this.client.call('FT.SEARCH', this.indexName, filterExpr, 'SORTBY', 'inserted_at', 'ASC', 'LIMIT', String(offset), String(limit), 'DIALECT', '2');
|
|
782
|
+
}
|
|
783
|
+
/**
|
|
784
|
+
* Embed text for use by adapters (e.g. LangGraph semantic search).
|
|
785
|
+
* @internal
|
|
786
|
+
*/
|
|
787
|
+
async _embedText(text) {
|
|
788
|
+
return this.embed(text);
|
|
789
|
+
}
|
|
790
|
+
// -- Private helpers --
|
|
275
791
|
async _doInitialize() {
|
|
276
792
|
const gen = this._initGeneration;
|
|
277
793
|
return this.traced('initialize', async () => {
|
|
278
|
-
const dim = await this.ensureIndexAndGetDimension();
|
|
279
|
-
// If flush() ran while we were initializing, don't overwrite its state.
|
|
794
|
+
const { dim, hasBinaryRefs } = await this.ensureIndexAndGetDimension();
|
|
280
795
|
if (this._initGeneration !== gen)
|
|
281
796
|
return;
|
|
282
797
|
this._dimension = dim;
|
|
798
|
+
this._hasBinaryRefs = hasBinaryRefs;
|
|
283
799
|
this._initialized = true;
|
|
800
|
+
// Fire analytics init once (not on every flush+initialize cycle)
|
|
801
|
+
this.initAnalyticsSafe().catch(() => { });
|
|
284
802
|
});
|
|
285
803
|
}
|
|
804
|
+
async initAnalyticsSafe() {
|
|
805
|
+
if (this.analyticsInitiated)
|
|
806
|
+
return;
|
|
807
|
+
this.analyticsInitiated = true;
|
|
808
|
+
try {
|
|
809
|
+
const a = await (0, analytics_1.createAnalytics)(this.analyticsOpts);
|
|
810
|
+
if (this.shutdownCalled) {
|
|
811
|
+
await a.shutdown();
|
|
812
|
+
return;
|
|
813
|
+
}
|
|
814
|
+
this.analytics = a;
|
|
815
|
+
await a.init(this.client, this.name, {
|
|
816
|
+
defaultThreshold: this.defaultThreshold,
|
|
817
|
+
uncertaintyBand: this.uncertaintyBand,
|
|
818
|
+
defaultTtl: this.defaultTtl ?? null,
|
|
819
|
+
hasCostTable: !!this.costTable,
|
|
820
|
+
usesDefaultCostTable: this.usesDefaultCostTable,
|
|
821
|
+
embeddingCacheEnabled: this.embeddingCacheEnabled,
|
|
822
|
+
categoryThresholdCount: Object.keys(this.categoryThresholds).length,
|
|
823
|
+
dimension: this._dimension,
|
|
824
|
+
});
|
|
825
|
+
const intervalMs = this.analyticsOpts?.statsIntervalMs ?? 300_000;
|
|
826
|
+
if (!this.shutdownCalled && intervalMs > 0) {
|
|
827
|
+
this.statsTimer = setInterval(() => this.captureStatsSnapshot(), intervalMs);
|
|
828
|
+
this.statsTimer.unref();
|
|
829
|
+
}
|
|
830
|
+
}
|
|
831
|
+
catch {
|
|
832
|
+
// never throw from analytics
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
captureStatsSnapshot() {
|
|
836
|
+
this.stats()
|
|
837
|
+
.then((s) => {
|
|
838
|
+
this.analytics.capture('stats_snapshot', {
|
|
839
|
+
hits: s.hits,
|
|
840
|
+
misses: s.misses,
|
|
841
|
+
hit_rate: s.hitRate,
|
|
842
|
+
cost_saved_micros: s.costSavedMicros,
|
|
843
|
+
});
|
|
844
|
+
})
|
|
845
|
+
.catch(() => { });
|
|
846
|
+
}
|
|
286
847
|
async ensureIndexAndGetDimension() {
|
|
287
848
|
// Try reading an existing index
|
|
288
849
|
try {
|
|
289
850
|
const info = (await this.client.call('FT.INFO', this.indexName));
|
|
290
851
|
const dim = this.parseDimensionFromInfo(info);
|
|
852
|
+
const hasBinaryRefs = this.parseHasBinaryRefsFromInfo(info);
|
|
291
853
|
if (dim > 0)
|
|
292
|
-
return dim;
|
|
293
|
-
// Couldn't parse dimension from FT.INFO
|
|
294
|
-
|
|
854
|
+
return { dim, hasBinaryRefs };
|
|
855
|
+
// Couldn't parse dimension from FT.INFO - fall back to probe
|
|
856
|
+
const probeDim = (await this.embed('probe')).vector.length;
|
|
857
|
+
return { dim: probeDim, hasBinaryRefs };
|
|
295
858
|
}
|
|
296
859
|
catch (err) {
|
|
297
860
|
if (err instanceof errors_1.EmbeddingError)
|
|
@@ -300,18 +863,69 @@ class SemanticCache {
|
|
|
300
863
|
throw new errors_1.ValkeyCommandError('FT.INFO', err);
|
|
301
864
|
}
|
|
302
865
|
}
|
|
303
|
-
// Index doesn't exist
|
|
866
|
+
// Index doesn't exist - probe dimension and create it
|
|
304
867
|
const dim = (await this.embed('probe')).vector.length;
|
|
305
868
|
try {
|
|
306
|
-
await this.client.call('FT.CREATE', this.indexName, 'ON', 'HASH', 'PREFIX', '1', this.entryPrefix, 'SCHEMA', 'prompt', 'TEXT', 'NOSTEM', 'response', 'TEXT', 'NOSTEM', 'model', 'TAG', 'category', 'TAG', 'inserted_at', 'NUMERIC', 'SORTABLE', 'embedding', 'VECTOR', 'HNSW', '6', 'TYPE', 'FLOAT32', 'DIM', String(dim), 'DISTANCE_METRIC', 'COSINE');
|
|
869
|
+
await this.client.call('FT.CREATE', this.indexName, 'ON', 'HASH', 'PREFIX', '1', this.entryPrefix, 'SCHEMA', 'prompt', 'TEXT', 'NOSTEM', 'response', 'TEXT', 'NOSTEM', 'model', 'TAG', 'category', 'TAG', 'binary_refs', 'TAG', 'inserted_at', 'NUMERIC', 'SORTABLE', 'temperature', 'NUMERIC', 'top_p', 'NUMERIC', 'seed', 'NUMERIC', 'embedding', 'VECTOR', 'HNSW', '6', 'TYPE', 'FLOAT32', 'DIM', String(dim), 'DISTANCE_METRIC', 'COSINE');
|
|
307
870
|
}
|
|
308
871
|
catch (err) {
|
|
309
872
|
throw new errors_1.ValkeyCommandError('FT.CREATE', err);
|
|
310
873
|
}
|
|
311
|
-
return dim;
|
|
874
|
+
return { dim, hasBinaryRefs: true };
|
|
875
|
+
}
|
|
876
|
+
/** Check if the index schema has a binary_refs field. */
|
|
877
|
+
parseHasBinaryRefsFromInfo(info) {
|
|
878
|
+
for (let i = 0; i < info.length - 1; i += 2) {
|
|
879
|
+
const key = String(info[i]);
|
|
880
|
+
if (key !== 'attributes' && key !== 'fields')
|
|
881
|
+
continue;
|
|
882
|
+
const attributes = info[i + 1];
|
|
883
|
+
if (!Array.isArray(attributes))
|
|
884
|
+
continue;
|
|
885
|
+
for (const attr of attributes) {
|
|
886
|
+
if (!Array.isArray(attr))
|
|
887
|
+
continue;
|
|
888
|
+
for (let j = 0; j < attr.length - 1; j++) {
|
|
889
|
+
if (String(attr[j]) === 'identifier' && String(attr[j + 1]) === 'binary_refs') {
|
|
890
|
+
return true;
|
|
891
|
+
}
|
|
892
|
+
}
|
|
893
|
+
}
|
|
894
|
+
}
|
|
895
|
+
return false;
|
|
312
896
|
}
|
|
313
|
-
/**
|
|
897
|
+
/** Resolve a prompt (string or ContentBlock[]) into text + binary refs. */
|
|
898
|
+
resolvePrompt(prompt) {
|
|
899
|
+
if (typeof prompt === 'string') {
|
|
900
|
+
return { text: prompt, binaryRefs: [] };
|
|
901
|
+
}
|
|
902
|
+
const text = (0, utils_1.extractText)(prompt);
|
|
903
|
+
const binaryRefs = (0, utils_1.extractBinaryRefs)(prompt);
|
|
904
|
+
return { text, binaryRefs };
|
|
905
|
+
}
|
|
906
|
+
/** Wraps embedFn with error handling, duration tracking, and optional embedding cache. */
|
|
314
907
|
async embed(text) {
|
|
908
|
+
// Check embedding cache
|
|
909
|
+
if (this.embeddingCacheEnabled && text) {
|
|
910
|
+
const hash = (0, node_crypto_2.createHash)('sha256').update(text).digest('hex');
|
|
911
|
+
const embedKey = `${this.embedKeyPrefix}${hash}`;
|
|
912
|
+
try {
|
|
913
|
+
const cached = await this.client.getBuffer(embedKey);
|
|
914
|
+
if (cached) {
|
|
915
|
+
this.telemetry.metrics.embeddingCacheTotal
|
|
916
|
+
.labels({ cache_name: this.name, result: 'hit' }).inc();
|
|
917
|
+
// Decode Float32 buffer
|
|
918
|
+
const vector = [];
|
|
919
|
+
for (let i = 0; i < cached.length; i += 4) {
|
|
920
|
+
vector.push(cached.readFloatLE(i));
|
|
921
|
+
}
|
|
922
|
+
return { vector, durationSec: 0 };
|
|
923
|
+
}
|
|
924
|
+
}
|
|
925
|
+
catch { /* ignore cache read errors */ }
|
|
926
|
+
this.telemetry.metrics.embeddingCacheTotal
|
|
927
|
+
.labels({ cache_name: this.name, result: 'miss' }).inc();
|
|
928
|
+
}
|
|
315
929
|
const start = performance.now();
|
|
316
930
|
let vector;
|
|
317
931
|
try {
|
|
@@ -324,12 +938,22 @@ class SemanticCache {
|
|
|
324
938
|
this.telemetry.metrics.embeddingDuration
|
|
325
939
|
.labels({ cache_name: this.name })
|
|
326
940
|
.observe(durationSec);
|
|
941
|
+
// Store in embedding cache
|
|
942
|
+
if (this.embeddingCacheEnabled && text) {
|
|
943
|
+
const hash = (0, node_crypto_2.createHash)('sha256').update(text).digest('hex');
|
|
944
|
+
const embedKey = `${this.embedKeyPrefix}${hash}`;
|
|
945
|
+
try {
|
|
946
|
+
const buf = (0, utils_1.encodeFloat32)(vector);
|
|
947
|
+
await this.client.set(embedKey, buf, 'EX', this.embeddingCacheTtl);
|
|
948
|
+
}
|
|
949
|
+
catch { /* ignore cache write errors */ }
|
|
950
|
+
}
|
|
327
951
|
return { vector, durationSec };
|
|
328
952
|
}
|
|
329
953
|
/**
|
|
330
954
|
* Wraps a method body in an OTel span with automatic status, end, and
|
|
331
955
|
* operation duration metric. The span is passed to fn so callers can
|
|
332
|
-
* set attributes
|
|
956
|
+
* set attributes - but callers must NOT call span.end() or span.setStatus(),
|
|
333
957
|
* as traced() handles both.
|
|
334
958
|
*/
|
|
335
959
|
async traced(operation, fn) {
|
|
@@ -359,6 +983,24 @@ class SemanticCache {
|
|
|
359
983
|
pipeline.hincrby(this.statsKey, field, 1);
|
|
360
984
|
await pipeline.exec();
|
|
361
985
|
}
|
|
986
|
+
/** Append to the rolling similarity window sorted set and trim to 10,000 entries or 7 days. */
|
|
987
|
+
async recordSimilarityWindow(score, result, category) {
|
|
988
|
+
const now = Date.now();
|
|
989
|
+
// Include a unique nonce so identical (score, result, category) tuples are
|
|
990
|
+
// each recorded as distinct ZADD members instead of overwriting each other.
|
|
991
|
+
const member = JSON.stringify({ score, result, category, _n: Math.random() });
|
|
992
|
+
const sevenDaysAgo = now - 7 * 24 * 60 * 60 * 1000;
|
|
993
|
+
try {
|
|
994
|
+
const pipeline = this.client.pipeline();
|
|
995
|
+
pipeline.zadd(this.similarityWindowKey, now, member);
|
|
996
|
+
// Trim by time: remove entries older than 7 days
|
|
997
|
+
pipeline.zremrangebyscore(this.similarityWindowKey, '-inf', sevenDaysAgo);
|
|
998
|
+
// Trim by count: keep at most 10,000 most recent
|
|
999
|
+
pipeline.zremrangebyrank(this.similarityWindowKey, 0, -10001);
|
|
1000
|
+
await pipeline.exec();
|
|
1001
|
+
}
|
|
1002
|
+
catch { /* best effort - never fail on window writes */ }
|
|
1003
|
+
}
|
|
362
1004
|
assertInitialized(method) {
|
|
363
1005
|
if (!this._initialized) {
|
|
364
1006
|
throw new errors_1.SemanticCacheUsageError(`SemanticCache.initialize() must be called before ${method}().`);
|