@betterdb/semantic-cache 0.1.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +233 -124
- package/dist/SemanticCache.d.ts +127 -7
- package/dist/SemanticCache.js +867 -48
- package/dist/adapters/ai.js +6 -1
- package/dist/adapters/anthropic.d.ts +32 -0
- package/dist/adapters/anthropic.js +94 -0
- package/dist/adapters/langchain.js +6 -1
- package/dist/adapters/langgraph.d.ts +104 -0
- package/dist/adapters/langgraph.js +271 -0
- package/dist/adapters/llamaindex.d.ts +32 -0
- package/dist/adapters/llamaindex.js +76 -0
- package/dist/adapters/openai-responses.d.ts +31 -0
- package/dist/adapters/openai-responses.js +112 -0
- package/dist/adapters/openai.d.ts +42 -0
- package/dist/adapters/openai.js +97 -0
- package/dist/analytics.d.ts +24 -0
- package/dist/analytics.js +116 -0
- package/dist/cluster.d.ts +10 -0
- package/dist/cluster.js +43 -0
- package/dist/defaultCostTable.d.ts +11 -0
- package/dist/defaultCostTable.js +1976 -0
- package/dist/discovery.d.ts +67 -0
- package/dist/discovery.js +140 -0
- package/dist/embed/bedrock.d.ts +32 -0
- package/dist/embed/bedrock.js +109 -0
- package/dist/embed/cohere.d.ts +34 -0
- package/dist/embed/cohere.js +37 -0
- package/dist/embed/ollama.d.ts +30 -0
- package/dist/embed/ollama.js +24 -0
- package/dist/embed/openai.d.ts +31 -0
- package/dist/embed/openai.js +66 -0
- package/dist/embed/voyage.d.ts +31 -0
- package/dist/embed/voyage.js +32 -0
- package/dist/index.d.ts +8 -1
- package/dist/index.js +13 -1
- package/dist/normalizer.d.ts +68 -0
- package/dist/normalizer.js +102 -0
- package/dist/telemetry.d.ts +5 -0
- package/dist/telemetry.js +30 -0
- package/dist/types.d.ts +128 -7
- package/dist/utils.d.ts +58 -0
- package/dist/utils.js +30 -0
- package/package.json +81 -6
package/dist/SemanticCache.js
CHANGED
|
@@ -2,11 +2,17 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.SemanticCache = void 0;
|
|
4
4
|
const node_crypto_1 = require("node:crypto");
|
|
5
|
+
const node_crypto_2 = require("node:crypto");
|
|
5
6
|
const api_1 = require("@opentelemetry/api");
|
|
6
7
|
const errors_1 = require("./errors");
|
|
7
8
|
const telemetry_1 = require("./telemetry");
|
|
8
9
|
const utils_1 = require("./utils");
|
|
10
|
+
const defaultCostTable_1 = require("./defaultCostTable");
|
|
11
|
+
const cluster_1 = require("./cluster");
|
|
12
|
+
const analytics_1 = require("./analytics");
|
|
13
|
+
const discovery_1 = require("./discovery");
|
|
9
14
|
const INVALIDATE_BATCH_SIZE = 1000;
|
|
15
|
+
const PACKAGE_VERSION = require('../package.json').version;
|
|
10
16
|
function errMsg(err) {
|
|
11
17
|
return err instanceof Error ? err.message : String(err);
|
|
12
18
|
}
|
|
@@ -17,15 +23,34 @@ class SemanticCache {
|
|
|
17
23
|
indexName;
|
|
18
24
|
entryPrefix;
|
|
19
25
|
statsKey;
|
|
26
|
+
similarityWindowKey;
|
|
27
|
+
configKey;
|
|
20
28
|
defaultThreshold;
|
|
21
29
|
defaultTtl;
|
|
22
30
|
categoryThresholds;
|
|
23
31
|
uncertaintyBand;
|
|
24
32
|
telemetry;
|
|
33
|
+
costTable;
|
|
34
|
+
embeddingCacheEnabled;
|
|
35
|
+
embeddingCacheTtl;
|
|
36
|
+
embedKeyPrefix;
|
|
37
|
+
discoveryOptions;
|
|
38
|
+
_initialDefaultThreshold;
|
|
39
|
+
_initialCategoryThresholds;
|
|
40
|
+
configRefreshOptions;
|
|
41
|
+
configRefreshTimer;
|
|
42
|
+
discovery = null;
|
|
25
43
|
_initialized = false;
|
|
26
44
|
_dimension = 0;
|
|
45
|
+
_hasBinaryRefs = false;
|
|
27
46
|
_initPromise = null;
|
|
28
47
|
_initGeneration = 0;
|
|
48
|
+
analyticsOpts;
|
|
49
|
+
usesDefaultCostTable;
|
|
50
|
+
analytics = analytics_1.NOOP_ANALYTICS;
|
|
51
|
+
statsTimer;
|
|
52
|
+
shutdownCalled = false;
|
|
53
|
+
analyticsInitiated = false;
|
|
29
54
|
/**
|
|
30
55
|
* Creates a new SemanticCache instance.
|
|
31
56
|
*
|
|
@@ -42,17 +67,46 @@ class SemanticCache {
|
|
|
42
67
|
this.indexName = `${this.name}:idx`;
|
|
43
68
|
this.entryPrefix = `${this.name}:entry:`;
|
|
44
69
|
this.statsKey = `${this.name}:__stats`;
|
|
70
|
+
this.similarityWindowKey = `${this.name}:__similarity_window`;
|
|
71
|
+
this.configKey = `${this.name}:__config`;
|
|
72
|
+
this.embedKeyPrefix = `${this.name}:embed:`;
|
|
45
73
|
this.defaultThreshold = options.defaultThreshold ?? 0.1;
|
|
46
74
|
this.defaultTtl = options.defaultTtl;
|
|
47
75
|
this.categoryThresholds = options.categoryThresholds ?? {};
|
|
48
76
|
this.uncertaintyBand = options.uncertaintyBand ?? 0.05;
|
|
77
|
+
// Build effective cost table
|
|
78
|
+
const useDefault = options.useDefaultCostTable ?? true;
|
|
79
|
+
if (!useDefault && !options.costTable) {
|
|
80
|
+
this.costTable = undefined;
|
|
81
|
+
}
|
|
82
|
+
else if (!useDefault) {
|
|
83
|
+
this.costTable = options.costTable;
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
this.costTable = { ...defaultCostTable_1.DEFAULT_COST_TABLE, ...(options.costTable ?? {}) };
|
|
87
|
+
}
|
|
88
|
+
// Embedding cache config
|
|
89
|
+
this.embeddingCacheEnabled = options.embeddingCache?.enabled ?? true;
|
|
90
|
+
this.embeddingCacheTtl = options.embeddingCache?.ttl ?? 86400;
|
|
49
91
|
this.telemetry = (0, telemetry_1.createTelemetry)({
|
|
50
92
|
prefix: options.telemetry?.metricsPrefix ?? 'semantic_cache',
|
|
51
93
|
tracerName: options.telemetry?.tracerName ?? '@betterdb/semantic-cache',
|
|
52
94
|
registry: options.telemetry?.registry,
|
|
53
95
|
});
|
|
96
|
+
this.analyticsOpts = options.analytics;
|
|
97
|
+
this.usesDefaultCostTable = useDefault;
|
|
98
|
+
this.discoveryOptions = options.discovery ?? {};
|
|
99
|
+
// Capture constructor values as fallback when __config fields are absent
|
|
100
|
+
this._initialDefaultThreshold = this.defaultThreshold;
|
|
101
|
+
this._initialCategoryThresholds = { ...this.categoryThresholds };
|
|
102
|
+
// Refresh options
|
|
103
|
+
const refresh = options.configRefresh ?? {};
|
|
104
|
+
this.configRefreshOptions = {
|
|
105
|
+
enabled: refresh.enabled ?? true,
|
|
106
|
+
intervalMs: Math.max(1000, refresh.intervalMs ?? 30_000),
|
|
107
|
+
};
|
|
54
108
|
}
|
|
55
|
-
//
|
|
109
|
+
// -- Lifecycle --
|
|
56
110
|
async initialize() {
|
|
57
111
|
if (!this._initPromise) {
|
|
58
112
|
this._initPromise = this._doInitialize().catch((err) => {
|
|
@@ -65,10 +119,17 @@ class SemanticCache {
|
|
|
65
119
|
async flush() {
|
|
66
120
|
// Mark uninitialized immediately so concurrent check()/store() calls get
|
|
67
121
|
// a clear SemanticCacheUsageError instead of cryptic Valkey errors.
|
|
68
|
-
// Bump generation so any in-flight _doInitialize() won't overwrite this state.
|
|
69
122
|
this._initialized = false;
|
|
70
123
|
this._initPromise = null;
|
|
71
124
|
this._initGeneration++;
|
|
125
|
+
// Capture and null the discovery ref synchronously, before any await,
|
|
126
|
+
// so a concurrent _doInitialize() (started after _initGeneration++) can't
|
|
127
|
+
// race in and have its new manager overwritten by this flush.
|
|
128
|
+
const discoveryToStop = this.discovery;
|
|
129
|
+
this.discovery = null;
|
|
130
|
+
if (discoveryToStop) {
|
|
131
|
+
await discoveryToStop.stop({ deleteHeartbeat: true });
|
|
132
|
+
}
|
|
72
133
|
// Valkey Search 1.2 does not support the DD (Delete Documents) flag on
|
|
73
134
|
// FT.DROPINDEX. Drop the index first, then clean up keys separately.
|
|
74
135
|
try {
|
|
@@ -79,33 +140,86 @@ class SemanticCache {
|
|
|
79
140
|
throw new errors_1.ValkeyCommandError('FT.DROPINDEX', err);
|
|
80
141
|
}
|
|
81
142
|
}
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
143
|
+
// Cluster-aware SCAN for entry keys and embed cache keys
|
|
144
|
+
const patterns = [
|
|
145
|
+
`${this.name}:entry:*`,
|
|
146
|
+
`${this.name}:embed:*`,
|
|
147
|
+
];
|
|
148
|
+
for (const pattern of patterns) {
|
|
149
|
+
await (0, cluster_1.clusterScan)(this.client, pattern, async (keys, nodeClient) => {
|
|
150
|
+
await nodeClient.del(keys);
|
|
151
|
+
});
|
|
152
|
+
}
|
|
90
153
|
await this.client.del(this.statsKey);
|
|
154
|
+
await this.client.del(this.similarityWindowKey);
|
|
155
|
+
this.analytics.capture('cache_flush');
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Shut down the analytics client, cancel the stats timer, and stop the
|
|
159
|
+
* discovery heartbeat. Safe to call multiple times.
|
|
160
|
+
*/
|
|
161
|
+
async shutdown() {
|
|
162
|
+
this.shutdownCalled = true;
|
|
163
|
+
if (this.configRefreshTimer) {
|
|
164
|
+
clearInterval(this.configRefreshTimer);
|
|
165
|
+
this.configRefreshTimer = undefined;
|
|
166
|
+
}
|
|
167
|
+
if (this.statsTimer) {
|
|
168
|
+
clearInterval(this.statsTimer);
|
|
169
|
+
this.statsTimer = undefined;
|
|
170
|
+
}
|
|
171
|
+
await this.analytics.shutdown();
|
|
172
|
+
await this.dispose();
|
|
91
173
|
}
|
|
92
|
-
|
|
174
|
+
/**
|
|
175
|
+
* Graceful shutdown of the discovery layer — stops the heartbeat and
|
|
176
|
+
* deletes this instance's heartbeat key so Monitor marks the cache offline
|
|
177
|
+
* immediately. Does NOT touch the registry hash, the FT index, or any
|
|
178
|
+
* entries. Safe to call multiple times.
|
|
179
|
+
*/
|
|
180
|
+
async dispose() {
|
|
181
|
+
if (this.configRefreshTimer) {
|
|
182
|
+
clearInterval(this.configRefreshTimer);
|
|
183
|
+
this.configRefreshTimer = undefined;
|
|
184
|
+
}
|
|
185
|
+
if (this._initPromise) {
|
|
186
|
+
await this._initPromise.catch(() => { });
|
|
187
|
+
}
|
|
188
|
+
if (this.discovery) {
|
|
189
|
+
await this.discovery.stop({ deleteHeartbeat: true });
|
|
190
|
+
this.discovery = null;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
// -- Public operations --
|
|
93
194
|
async check(prompt, options) {
|
|
94
195
|
this.assertInitialized('check');
|
|
95
196
|
return this.traced('check', async (span) => {
|
|
96
197
|
const category = options?.category ?? '';
|
|
97
|
-
const k = options?.k ?? 1;
|
|
98
198
|
const threshold = options?.threshold ??
|
|
99
199
|
(category && this.categoryThresholds[category] !== undefined
|
|
100
200
|
? this.categoryThresholds[category]
|
|
101
201
|
: this.defaultThreshold);
|
|
102
|
-
|
|
202
|
+
// Resolve text and binary refs from prompt
|
|
203
|
+
const { text: promptText, binaryRefs } = await this.resolvePrompt(prompt);
|
|
204
|
+
// Stale model detection
|
|
205
|
+
const checkStale = (options?.staleAfterModelChange ?? false) && !!options?.currentModel;
|
|
206
|
+
// Rerank option
|
|
207
|
+
const rerankOpts = options?.rerank;
|
|
208
|
+
const k = rerankOpts ? rerankOpts.k : (options?.k ?? 1);
|
|
209
|
+
const { vector: embedding, durationSec: embedSec } = await this.embed(promptText);
|
|
103
210
|
this.assertDimension(embedding);
|
|
104
|
-
//
|
|
105
|
-
|
|
211
|
+
// Build filter
|
|
212
|
+
const userFilter = options?.filter;
|
|
213
|
+
// AND semantics: each ref must be present — chain separate TAG clauses.
|
|
214
|
+
const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
|
|
215
|
+
? (binaryRefs.length === 1
|
|
216
|
+
? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
|
|
217
|
+
: binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' '))
|
|
218
|
+
: null;
|
|
219
|
+
const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
|
|
220
|
+
const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
|
|
221
|
+
const query = `${filterExpr}=>[KNN ${k} @embedding $vec AS __score]`;
|
|
106
222
|
const searchStart = performance.now();
|
|
107
|
-
const filter = options?.filter;
|
|
108
|
-
const query = `${filter ? `(${filter})` : '*'}=>[KNN ${k} @embedding $vec AS __score]`;
|
|
109
223
|
let rawResult;
|
|
110
224
|
try {
|
|
111
225
|
rawResult = await this.client.call('FT.SEARCH', this.indexName, query, 'PARAMS', '2', 'vec', (0, utils_1.encodeFloat32)(embedding), 'LIMIT', '0', String(k), 'DIALECT', '2');
|
|
@@ -136,6 +250,9 @@ class SemanticCache {
|
|
|
136
250
|
}
|
|
137
251
|
// Miss (no usable score, or score exceeds threshold)
|
|
138
252
|
if (isNaN(score) || score > threshold) {
|
|
253
|
+
if (!isNaN(score)) {
|
|
254
|
+
await this.recordSimilarityWindow(score, 'miss', category);
|
|
255
|
+
}
|
|
139
256
|
await this.recordStat('misses');
|
|
140
257
|
this.telemetry.metrics.requestsTotal
|
|
141
258
|
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
|
|
@@ -151,42 +268,148 @@ class SemanticCache {
|
|
|
151
268
|
}
|
|
152
269
|
return result;
|
|
153
270
|
}
|
|
154
|
-
//
|
|
155
|
-
|
|
271
|
+
// Rerank: apply rerankFn to all candidates above threshold
|
|
272
|
+
let winnerParsedIndex = 0;
|
|
273
|
+
if (rerankOpts && parsed.length > 0) {
|
|
274
|
+
// Preserve the original parsed[] index alongside each candidate so we
|
|
275
|
+
// can map back even when NaN-scored entries are filtered out.
|
|
276
|
+
const indexedCandidates = parsed
|
|
277
|
+
.map((r, i) => ({ i, s: parseFloat(r.fields['__score'] ?? 'NaN') }))
|
|
278
|
+
.filter(({ s }) => !isNaN(s))
|
|
279
|
+
.map(({ i, s }) => ({
|
|
280
|
+
origIdx: i,
|
|
281
|
+
candidate: { response: parsed[i].fields['response'] ?? '', similarity: s },
|
|
282
|
+
}));
|
|
283
|
+
const picked = await rerankOpts.rerankFn(promptText, indexedCandidates.map((x) => x.candidate));
|
|
284
|
+
// Explicit bounds check: -1 means "reject all"; out-of-range is a caller bug
|
|
285
|
+
// treated as a miss rather than silently falling back to the top candidate.
|
|
286
|
+
if (picked === -1 || picked < 0 || picked >= indexedCandidates.length) {
|
|
287
|
+
await this.recordSimilarityWindow(score, 'miss', category);
|
|
288
|
+
await this.recordStat('misses');
|
|
289
|
+
this.telemetry.metrics.requestsTotal
|
|
290
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
|
|
291
|
+
span.setAttributes({ 'cache.hit': false, 'cache.name': this.name, 'cache.reranked': true });
|
|
292
|
+
return { hit: false, confidence: 'miss' };
|
|
293
|
+
}
|
|
294
|
+
// Map back to the original parsed[] index (not the candidates[] index)
|
|
295
|
+
winnerParsedIndex = indexedCandidates[picked].origIdx;
|
|
296
|
+
}
|
|
297
|
+
const winner = parsed[winnerParsedIndex] ?? parsed[0];
|
|
298
|
+
const winnerScore = parseFloat(winner.fields['__score'] ?? String(score));
|
|
299
|
+
// Stale model check: if winner's model differs from currentModel, evict and treat as miss
|
|
300
|
+
if (checkStale) {
|
|
301
|
+
const storedModel = winner.fields['model'] ?? '';
|
|
302
|
+
if (storedModel && storedModel !== options.currentModel) {
|
|
303
|
+
// Evict stale entry
|
|
304
|
+
try {
|
|
305
|
+
await this.client.del(winner.key);
|
|
306
|
+
}
|
|
307
|
+
catch { /* best effort */ }
|
|
308
|
+
await this.recordSimilarityWindow(winnerScore, 'miss', category);
|
|
309
|
+
this.telemetry.metrics.staleModelEvictions.labels({ cache_name: this.name }).inc();
|
|
310
|
+
await this.recordStat('misses');
|
|
311
|
+
this.telemetry.metrics.requestsTotal
|
|
312
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
|
|
313
|
+
span.setAttributes({ 'cache.hit': false, 'cache.stale_evicted': true });
|
|
314
|
+
return { hit: false, confidence: 'miss' };
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
// All checks passed — record as a genuine hit
|
|
318
|
+
await this.recordSimilarityWindow(winnerScore, 'hit', category);
|
|
319
|
+
const confidence = winnerScore >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
|
|
156
320
|
await this.recordStat('hits');
|
|
157
321
|
const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
|
|
158
322
|
this.telemetry.metrics.requestsTotal
|
|
159
323
|
.labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
|
|
160
|
-
const matchedKey =
|
|
324
|
+
const matchedKey = winner.key;
|
|
161
325
|
if (this.defaultTtl !== undefined && matchedKey) {
|
|
162
326
|
await this.client.expire(matchedKey, this.defaultTtl);
|
|
163
327
|
}
|
|
328
|
+
// Cost saved
|
|
329
|
+
let costSaved;
|
|
330
|
+
const costMicrosStr = winner.fields['cost_micros'];
|
|
331
|
+
if (costMicrosStr) {
|
|
332
|
+
const costMicros = parseInt(costMicrosStr, 10);
|
|
333
|
+
if (!isNaN(costMicros) && costMicros > 0) {
|
|
334
|
+
costSaved = costMicros / 1_000_000;
|
|
335
|
+
// Atomically increment cost_saved_micros in stats
|
|
336
|
+
await this.client.hincrby(this.statsKey, 'cost_saved_micros', costMicros);
|
|
337
|
+
this.telemetry.metrics.costSavedTotal
|
|
338
|
+
.labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
// Content blocks
|
|
342
|
+
let contentBlocks;
|
|
343
|
+
const contentBlocksStr = winner.fields['content_blocks'];
|
|
344
|
+
if (contentBlocksStr) {
|
|
345
|
+
try {
|
|
346
|
+
contentBlocks = JSON.parse(contentBlocksStr);
|
|
347
|
+
}
|
|
348
|
+
catch { /* ignore parse errors */ }
|
|
349
|
+
}
|
|
164
350
|
span.setAttributes({
|
|
165
|
-
'cache.hit': true, 'cache.similarity':
|
|
351
|
+
'cache.hit': true, 'cache.similarity': winnerScore, 'cache.threshold': threshold,
|
|
166
352
|
'cache.confidence': confidence, 'cache.matched_key': matchedKey,
|
|
167
353
|
'cache.category': categoryLabel, ...timingAttrs,
|
|
168
354
|
});
|
|
169
|
-
|
|
170
|
-
hit: true, response:
|
|
171
|
-
similarity:
|
|
355
|
+
const result = {
|
|
356
|
+
hit: true, response: winner.fields['response'],
|
|
357
|
+
similarity: winnerScore, confidence, matchedKey,
|
|
172
358
|
};
|
|
359
|
+
if (costSaved !== undefined)
|
|
360
|
+
result.costSaved = costSaved;
|
|
361
|
+
if (contentBlocks)
|
|
362
|
+
result.contentBlocks = contentBlocks;
|
|
363
|
+
return result;
|
|
173
364
|
});
|
|
174
365
|
}
|
|
175
366
|
async store(prompt, response, options) {
|
|
176
367
|
this.assertInitialized('store');
|
|
177
368
|
return this.traced('store', async (span) => {
|
|
178
|
-
const {
|
|
369
|
+
const { text: promptText, binaryRefs } = await this.resolvePrompt(prompt);
|
|
370
|
+
const { vector: embedding, durationSec: embedSec } = await this.embed(promptText);
|
|
179
371
|
this.assertDimension(embedding);
|
|
180
372
|
const entryKey = `${this.entryPrefix}${(0, node_crypto_1.randomUUID)()}`;
|
|
181
373
|
const category = options?.category ?? '';
|
|
182
374
|
const model = options?.model ?? '';
|
|
375
|
+
// Compute cost if tokens and model provided
|
|
376
|
+
let costMicros;
|
|
377
|
+
if (options?.model &&
|
|
378
|
+
options?.inputTokens !== undefined &&
|
|
379
|
+
options?.outputTokens !== undefined &&
|
|
380
|
+
this.costTable) {
|
|
381
|
+
const pricing = this.costTable[options.model];
|
|
382
|
+
if (pricing) {
|
|
383
|
+
costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
|
|
384
|
+
options.outputTokens * pricing.outputPer1k / 1000) * 1_000_000);
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
const hashFields = {
|
|
388
|
+
prompt: promptText,
|
|
389
|
+
response,
|
|
390
|
+
model,
|
|
391
|
+
category,
|
|
392
|
+
inserted_at: Date.now().toString(),
|
|
393
|
+
metadata: JSON.stringify(options?.metadata ?? {}),
|
|
394
|
+
embedding: (0, utils_1.encodeFloat32)(embedding),
|
|
395
|
+
};
|
|
396
|
+
if (binaryRefs.length > 0) {
|
|
397
|
+
hashFields['binary_refs'] = binaryRefs.join(',');
|
|
398
|
+
}
|
|
399
|
+
if (costMicros !== undefined && costMicros > 0) {
|
|
400
|
+
hashFields['cost_micros'] = String(costMicros);
|
|
401
|
+
}
|
|
402
|
+
if (options?.temperature !== undefined) {
|
|
403
|
+
hashFields['temperature'] = String(options.temperature);
|
|
404
|
+
}
|
|
405
|
+
if (options?.topP !== undefined) {
|
|
406
|
+
hashFields['top_p'] = String(options.topP);
|
|
407
|
+
}
|
|
408
|
+
if (options?.seed !== undefined) {
|
|
409
|
+
hashFields['seed'] = String(options.seed);
|
|
410
|
+
}
|
|
183
411
|
try {
|
|
184
|
-
await this.client.hset(entryKey,
|
|
185
|
-
prompt, response, model, category,
|
|
186
|
-
inserted_at: Date.now().toString(),
|
|
187
|
-
metadata: JSON.stringify(options?.metadata ?? {}),
|
|
188
|
-
embedding: (0, utils_1.encodeFloat32)(embedding),
|
|
189
|
-
});
|
|
412
|
+
await this.client.hset(entryKey, hashFields);
|
|
190
413
|
}
|
|
191
414
|
catch (err) {
|
|
192
415
|
throw new errors_1.ValkeyCommandError('HSET', err);
|
|
@@ -202,11 +425,195 @@ class SemanticCache {
|
|
|
202
425
|
return entryKey;
|
|
203
426
|
});
|
|
204
427
|
}
|
|
428
|
+
/**
|
|
429
|
+
* Store structured content blocks as the cached response.
|
|
430
|
+
* Populates both the response field (from TextBlock text) and content_blocks (full JSON).
|
|
431
|
+
*/
|
|
432
|
+
async storeMultipart(prompt, blocks, options) {
|
|
433
|
+
this.assertInitialized('storeMultipart');
|
|
434
|
+
return this.traced('storeMultipart', async (span) => {
|
|
435
|
+
const { text: promptText, binaryRefs } = await this.resolvePrompt(prompt);
|
|
436
|
+
const { vector: embedding, durationSec: embedSec } = await this.embed(promptText);
|
|
437
|
+
this.assertDimension(embedding);
|
|
438
|
+
// Derive text response from blocks for backward compat
|
|
439
|
+
const textResponse = (0, utils_1.extractText)(blocks);
|
|
440
|
+
const entryKey = `${this.entryPrefix}${(0, node_crypto_1.randomUUID)()}`;
|
|
441
|
+
const category = options?.category ?? '';
|
|
442
|
+
const model = options?.model ?? '';
|
|
443
|
+
let costMicros;
|
|
444
|
+
if (options?.model && options?.inputTokens !== undefined && options?.outputTokens !== undefined && this.costTable) {
|
|
445
|
+
const pricing = this.costTable[options.model];
|
|
446
|
+
if (pricing) {
|
|
447
|
+
costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
|
|
448
|
+
options.outputTokens * pricing.outputPer1k / 1000) * 1_000_000);
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
const hashFields = {
|
|
452
|
+
prompt: promptText,
|
|
453
|
+
response: textResponse,
|
|
454
|
+
model,
|
|
455
|
+
category,
|
|
456
|
+
inserted_at: Date.now().toString(),
|
|
457
|
+
metadata: JSON.stringify(options?.metadata ?? {}),
|
|
458
|
+
embedding: (0, utils_1.encodeFloat32)(embedding),
|
|
459
|
+
content_blocks: JSON.stringify(blocks),
|
|
460
|
+
};
|
|
461
|
+
if (binaryRefs.length > 0) {
|
|
462
|
+
hashFields['binary_refs'] = binaryRefs.join(',');
|
|
463
|
+
}
|
|
464
|
+
if (costMicros !== undefined && costMicros > 0) {
|
|
465
|
+
hashFields['cost_micros'] = String(costMicros);
|
|
466
|
+
}
|
|
467
|
+
if (options?.temperature !== undefined)
|
|
468
|
+
hashFields['temperature'] = String(options.temperature);
|
|
469
|
+
if (options?.topP !== undefined)
|
|
470
|
+
hashFields['top_p'] = String(options.topP);
|
|
471
|
+
if (options?.seed !== undefined)
|
|
472
|
+
hashFields['seed'] = String(options.seed);
|
|
473
|
+
try {
|
|
474
|
+
await this.client.hset(entryKey, hashFields);
|
|
475
|
+
}
|
|
476
|
+
catch (err) {
|
|
477
|
+
throw new errors_1.ValkeyCommandError('HSET', err);
|
|
478
|
+
}
|
|
479
|
+
const ttl = options?.ttl ?? this.defaultTtl;
|
|
480
|
+
if (ttl !== undefined)
|
|
481
|
+
await this.client.expire(entryKey, ttl);
|
|
482
|
+
span.setAttributes({
|
|
483
|
+
'cache.name': this.name, 'cache.key': entryKey, 'cache.ttl': ttl ?? -1,
|
|
484
|
+
'cache.category': category || 'none', 'cache.model': model || 'none',
|
|
485
|
+
'embedding_latency_ms': embedSec * 1000,
|
|
486
|
+
});
|
|
487
|
+
return entryKey;
|
|
488
|
+
});
|
|
489
|
+
}
|
|
490
|
+
/**
|
|
491
|
+
* Check multiple prompts in parallel, using pipelined FT.SEARCH calls.
|
|
492
|
+
* Returns results in input order.
|
|
493
|
+
*/
|
|
494
|
+
async checkBatch(prompts, options) {
|
|
495
|
+
this.assertInitialized('checkBatch');
|
|
496
|
+
if (prompts.length === 0)
|
|
497
|
+
return [];
|
|
498
|
+
if (options?.rerank) {
|
|
499
|
+
throw new errors_1.SemanticCacheUsageError("checkBatch() does not support the 'rerank' option. Use check() for reranking individual prompts.");
|
|
500
|
+
}
|
|
501
|
+
if (options?.staleAfterModelChange) {
|
|
502
|
+
throw new errors_1.SemanticCacheUsageError("checkBatch() does not support 'staleAfterModelChange'. Use check() for stale-model eviction.");
|
|
503
|
+
}
|
|
504
|
+
return this.traced('checkBatch', async (span) => {
|
|
505
|
+
// Resolve all prompts and embed in parallel
|
|
506
|
+
const resolved = await Promise.all(prompts.map((p) => this.resolvePrompt(p)));
|
|
507
|
+
const embeddings = await Promise.all(resolved.map(({ text }) => this.embed(text)));
|
|
508
|
+
const category = options?.category ?? '';
|
|
509
|
+
const threshold = options?.threshold ??
|
|
510
|
+
(category && this.categoryThresholds[category] !== undefined
|
|
511
|
+
? this.categoryThresholds[category]
|
|
512
|
+
: this.defaultThreshold);
|
|
513
|
+
const k = options?.k ?? 1;
|
|
514
|
+
const userFilter = options?.filter;
|
|
515
|
+
// Pipeline all FT.SEARCH calls
|
|
516
|
+
const pipeline = this.client.pipeline();
|
|
517
|
+
for (let i = 0; i < prompts.length; i++) {
|
|
518
|
+
const { binaryRefs } = resolved[i];
|
|
519
|
+
const { vector: embedding } = embeddings[i];
|
|
520
|
+
const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
|
|
521
|
+
? (binaryRefs.length === 1
|
|
522
|
+
? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
|
|
523
|
+
: binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' '))
|
|
524
|
+
: null;
|
|
525
|
+
const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
|
|
526
|
+
const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
|
|
527
|
+
const query = `${filterExpr}=>[KNN ${k} @embedding $vec AS __score]`;
|
|
528
|
+
pipeline.call('FT.SEARCH', this.indexName, query, 'PARAMS', '2', 'vec', (0, utils_1.encodeFloat32)(embedding), 'LIMIT', '0', String(k), 'DIALECT', '2');
|
|
529
|
+
}
|
|
530
|
+
const pipelineResults = await pipeline.exec();
|
|
531
|
+
span.setAttributes({ 'cache.batch_size': prompts.length, 'cache.name': this.name });
|
|
532
|
+
const results = [];
|
|
533
|
+
const categoryLabel = category || 'none';
|
|
534
|
+
for (let i = 0; i < prompts.length; i++) {
|
|
535
|
+
const pipelineEntry = pipelineResults?.[i];
|
|
536
|
+
const err = pipelineEntry?.[0];
|
|
537
|
+
const rawResult = pipelineEntry?.[1];
|
|
538
|
+
if (err) {
|
|
539
|
+
await this.recordStat('misses');
|
|
540
|
+
this.telemetry.metrics.requestsTotal
|
|
541
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
|
|
542
|
+
results.push({ hit: false, confidence: 'miss' });
|
|
543
|
+
continue;
|
|
544
|
+
}
|
|
545
|
+
const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
|
|
546
|
+
if (parsed.length === 0) {
|
|
547
|
+
await this.recordStat('misses');
|
|
548
|
+
this.telemetry.metrics.requestsTotal
|
|
549
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
|
|
550
|
+
results.push({ hit: false, confidence: 'miss' });
|
|
551
|
+
continue;
|
|
552
|
+
}
|
|
553
|
+
const scoreStr = parsed[0].fields['__score'];
|
|
554
|
+
const score = scoreStr !== undefined ? parseFloat(scoreStr) : NaN;
|
|
555
|
+
if (isNaN(score) || score > threshold) {
|
|
556
|
+
if (!isNaN(score)) {
|
|
557
|
+
await this.recordSimilarityWindow(score, 'miss', category);
|
|
558
|
+
}
|
|
559
|
+
await this.recordStat('misses');
|
|
560
|
+
this.telemetry.metrics.requestsTotal
|
|
561
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
|
|
562
|
+
const result = { hit: false, confidence: 'miss' };
|
|
563
|
+
if (!isNaN(score)) {
|
|
564
|
+
result.similarity = score;
|
|
565
|
+
result.nearestMiss = { similarity: score, deltaToThreshold: score - threshold };
|
|
566
|
+
}
|
|
567
|
+
results.push(result);
|
|
568
|
+
continue;
|
|
569
|
+
}
|
|
570
|
+
await this.recordSimilarityWindow(score, 'hit', category);
|
|
571
|
+
const confidence = score >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
|
|
572
|
+
await this.recordStat('hits');
|
|
573
|
+
const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
|
|
574
|
+
this.telemetry.metrics.requestsTotal
|
|
575
|
+
.labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
|
|
576
|
+
const matchedKey = parsed[0].key;
|
|
577
|
+
if (this.defaultTtl !== undefined && matchedKey) {
|
|
578
|
+
await this.client.expire(matchedKey, this.defaultTtl);
|
|
579
|
+
}
|
|
580
|
+
let costSaved;
|
|
581
|
+
const costMicrosStr = parsed[0].fields['cost_micros'];
|
|
582
|
+
if (costMicrosStr) {
|
|
583
|
+
const costMicros = parseInt(costMicrosStr, 10);
|
|
584
|
+
if (!isNaN(costMicros) && costMicros > 0) {
|
|
585
|
+
costSaved = costMicros / 1_000_000;
|
|
586
|
+
await this.client.hincrby(this.statsKey, 'cost_saved_micros', costMicros);
|
|
587
|
+
this.telemetry.metrics.costSavedTotal
|
|
588
|
+
.labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
let contentBlocks;
|
|
592
|
+
const contentBlocksStr = parsed[0].fields['content_blocks'];
|
|
593
|
+
if (contentBlocksStr) {
|
|
594
|
+
try {
|
|
595
|
+
contentBlocks = JSON.parse(contentBlocksStr);
|
|
596
|
+
}
|
|
597
|
+
catch { /* ignore */ }
|
|
598
|
+
}
|
|
599
|
+
const result = {
|
|
600
|
+
hit: true, response: parsed[0].fields['response'],
|
|
601
|
+
similarity: score, confidence, matchedKey,
|
|
602
|
+
};
|
|
603
|
+
if (costSaved !== undefined)
|
|
604
|
+
result.costSaved = costSaved;
|
|
605
|
+
if (contentBlocks)
|
|
606
|
+
result.contentBlocks = contentBlocks;
|
|
607
|
+
results.push(result);
|
|
608
|
+
}
|
|
609
|
+
return results;
|
|
610
|
+
});
|
|
611
|
+
}
|
|
205
612
|
/**
|
|
206
613
|
* Deletes all entries matching a valkey-search filter expression.
|
|
207
614
|
*
|
|
208
615
|
* **Security note:** `filter` is passed directly to FT.SEARCH. Only pass
|
|
209
|
-
* trusted, programmatically-constructed expressions
|
|
616
|
+
* trusted, programmatically-constructed expressions - never unsanitised
|
|
210
617
|
* user input.
|
|
211
618
|
*/
|
|
212
619
|
async invalidate(filter) {
|
|
@@ -242,13 +649,34 @@ class SemanticCache {
|
|
|
242
649
|
return { deleted: keys.length, truncated };
|
|
243
650
|
});
|
|
244
651
|
}
|
|
652
|
+
/** Delete all entries tagged with the given model name. */
|
|
653
|
+
async invalidateByModel(model) {
|
|
654
|
+
let total = 0;
|
|
655
|
+
let result;
|
|
656
|
+
do {
|
|
657
|
+
result = await this.invalidate(`@model:{${(0, utils_1.escapeTag)(model)}}`);
|
|
658
|
+
total += result.deleted;
|
|
659
|
+
} while (result.truncated);
|
|
660
|
+
return total;
|
|
661
|
+
}
|
|
662
|
+
/** Delete all entries tagged with the given category. */
|
|
663
|
+
async invalidateByCategory(category) {
|
|
664
|
+
let total = 0;
|
|
665
|
+
let result;
|
|
666
|
+
do {
|
|
667
|
+
result = await this.invalidate(`@category:{${(0, utils_1.escapeTag)(category)}}`);
|
|
668
|
+
total += result.deleted;
|
|
669
|
+
} while (result.truncated);
|
|
670
|
+
return total;
|
|
671
|
+
}
|
|
245
672
|
async stats() {
|
|
246
673
|
this.assertInitialized('stats');
|
|
247
674
|
const raw = await this.client.hgetall(this.statsKey);
|
|
248
|
-
const hits = parseInt(raw
|
|
249
|
-
const misses = parseInt(raw
|
|
250
|
-
const total = parseInt(raw
|
|
251
|
-
|
|
675
|
+
const hits = parseInt(raw?.hits ?? '0', 10);
|
|
676
|
+
const misses = parseInt(raw?.misses ?? '0', 10);
|
|
677
|
+
const total = parseInt(raw?.total ?? '0', 10);
|
|
678
|
+
const costSavedMicros = parseInt(raw?.cost_saved_micros ?? '0', 10);
|
|
679
|
+
return { hits, misses, total, hitRate: total === 0 ? 0 : hits / total, costSavedMicros };
|
|
252
680
|
}
|
|
253
681
|
async indexInfo() {
|
|
254
682
|
this.assertInitialized('indexInfo');
|
|
@@ -271,27 +699,339 @@ class SemanticCache {
|
|
|
271
699
|
}
|
|
272
700
|
return { name: this.indexName, numDocs, dimension: this._dimension, indexingState };
|
|
273
701
|
}
|
|
274
|
-
|
|
702
|
+
/**
|
|
703
|
+
* Analyze the rolling similarity score window and recommend threshold adjustments.
|
|
704
|
+
*/
|
|
705
|
+
async thresholdEffectiveness(options) {
|
|
706
|
+
this.assertInitialized('thresholdEffectiveness');
|
|
707
|
+
const minSamples = options?.minSamples ?? 100;
|
|
708
|
+
const category = options?.category;
|
|
709
|
+
const threshold = category && this.categoryThresholds[category] !== undefined
|
|
710
|
+
? this.categoryThresholds[category]
|
|
711
|
+
: this.defaultThreshold;
|
|
712
|
+
// Read all window entries
|
|
713
|
+
let rawEntries;
|
|
714
|
+
try {
|
|
715
|
+
rawEntries = (await this.client.zrange(this.similarityWindowKey, '0', '-1'));
|
|
716
|
+
}
|
|
717
|
+
catch {
|
|
718
|
+
rawEntries = [];
|
|
719
|
+
}
|
|
720
|
+
// Parse and optionally filter by category
|
|
721
|
+
const entries = [];
|
|
722
|
+
for (const raw of rawEntries) {
|
|
723
|
+
try {
|
|
724
|
+
const entry = JSON.parse(String(raw));
|
|
725
|
+
if (typeof entry.score === 'number' &&
|
|
726
|
+
(entry.result === 'hit' || entry.result === 'miss')) {
|
|
727
|
+
if (!category || entry.category === category) {
|
|
728
|
+
entries.push(entry);
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
catch { /* skip corrupt entries */ }
|
|
733
|
+
}
|
|
734
|
+
const sampleCount = entries.length;
|
|
735
|
+
const categoryLabel = category ?? 'all';
|
|
736
|
+
if (sampleCount < minSamples) {
|
|
737
|
+
return {
|
|
738
|
+
category: categoryLabel,
|
|
739
|
+
sampleCount,
|
|
740
|
+
currentThreshold: threshold,
|
|
741
|
+
hitRate: 0,
|
|
742
|
+
uncertainHitRate: 0,
|
|
743
|
+
nearMissRate: 0,
|
|
744
|
+
avgHitSimilarity: 0,
|
|
745
|
+
avgMissSimilarity: 0,
|
|
746
|
+
recommendation: 'insufficient_data',
|
|
747
|
+
reasoning: `Only ${sampleCount} samples collected; ${minSamples} required for a reliable recommendation.`,
|
|
748
|
+
};
|
|
749
|
+
}
|
|
750
|
+
const hits = entries.filter((e) => e.result === 'hit');
|
|
751
|
+
const misses = entries.filter((e) => e.result === 'miss');
|
|
752
|
+
const hitRate = hits.length / sampleCount;
|
|
753
|
+
const uncertainHits = hits.filter((e) => e.score >= threshold - this.uncertaintyBand);
|
|
754
|
+
const uncertainHitRate = hits.length > 0 ? uncertainHits.length / hits.length : 0;
|
|
755
|
+
// Near-misses are scores just ABOVE the threshold (genuine close misses).
|
|
756
|
+
// Scores below the threshold recorded as misses (rerank rejection, stale eviction)
|
|
757
|
+
// must be excluded — they produce negative avgNearMissDelta, causing
|
|
758
|
+
// recommendedThreshold = threshold + negative < threshold, contradicting "loosen".
|
|
759
|
+
const nearMisses = misses.filter((e) => e.score > threshold && e.score <= threshold + 0.03);
|
|
760
|
+
const nearMissRate = misses.length > 0 ? nearMisses.length / misses.length : 0;
|
|
761
|
+
const avgHitSimilarity = hits.length > 0 ? hits.reduce((s, e) => s + e.score, 0) / hits.length : 0;
|
|
762
|
+
const avgMissSimilarity = misses.length > 0 ? misses.reduce((s, e) => s + e.score, 0) / misses.length : 0;
|
|
763
|
+
// avgNearMissDelta: how far above the threshold near-misses are on average
|
|
764
|
+
const avgNearMissDelta = nearMisses.length > 0
|
|
765
|
+
? nearMisses.reduce((s, e) => s + (e.score - threshold), 0) / nearMisses.length
|
|
766
|
+
: 0;
|
|
767
|
+
let recommendation;
|
|
768
|
+
let recommendedThreshold;
|
|
769
|
+
let reasoning;
|
|
770
|
+
if (uncertainHitRate > 0.2) {
|
|
771
|
+
recommendation = 'tighten_threshold';
|
|
772
|
+
recommendedThreshold = Math.max(0, threshold - this.uncertaintyBand * 1.5);
|
|
773
|
+
reasoning = `${(uncertainHitRate * 100).toFixed(1)}% of hits are in the uncertainty band - tighten the threshold to reduce false positives.`;
|
|
774
|
+
}
|
|
775
|
+
else if (nearMissRate > 0.3 && avgNearMissDelta < 0.03) {
|
|
776
|
+
recommendation = 'loosen_threshold';
|
|
777
|
+
recommendedThreshold = threshold + avgNearMissDelta;
|
|
778
|
+
reasoning = `${(nearMissRate * 100).toFixed(1)}% of misses are very close to the threshold - consider loosening to capture more hits.`;
|
|
779
|
+
}
|
|
780
|
+
else {
|
|
781
|
+
recommendation = 'optimal';
|
|
782
|
+
reasoning = `Hit rate is ${(hitRate * 100).toFixed(1)}% with ${(uncertainHitRate * 100).toFixed(1)}% uncertain hits - threshold appears well-calibrated.`;
|
|
783
|
+
}
|
|
784
|
+
return {
|
|
785
|
+
category: categoryLabel,
|
|
786
|
+
sampleCount,
|
|
787
|
+
currentThreshold: threshold,
|
|
788
|
+
hitRate,
|
|
789
|
+
uncertainHitRate,
|
|
790
|
+
nearMissRate,
|
|
791
|
+
avgHitSimilarity,
|
|
792
|
+
avgMissSimilarity,
|
|
793
|
+
recommendation,
|
|
794
|
+
recommendedThreshold,
|
|
795
|
+
reasoning,
|
|
796
|
+
};
|
|
797
|
+
}
|
|
798
|
+
/**
|
|
799
|
+
* Returns threshold effectiveness results for every category seen in the
|
|
800
|
+
* rolling window, plus one aggregate result for all categories combined.
|
|
801
|
+
*/
|
|
802
|
+
async thresholdEffectivenessAll(options) {
|
|
803
|
+
this.assertInitialized('thresholdEffectivenessAll');
|
|
804
|
+
let rawEntries;
|
|
805
|
+
try {
|
|
806
|
+
rawEntries = (await this.client.zrange(this.similarityWindowKey, '0', '-1'));
|
|
807
|
+
}
|
|
808
|
+
catch {
|
|
809
|
+
rawEntries = [];
|
|
810
|
+
}
|
|
811
|
+
// Collect unique categories
|
|
812
|
+
const categories = new Set();
|
|
813
|
+
for (const raw of rawEntries) {
|
|
814
|
+
try {
|
|
815
|
+
const entry = JSON.parse(raw);
|
|
816
|
+
if (entry.category)
|
|
817
|
+
categories.add(entry.category);
|
|
818
|
+
}
|
|
819
|
+
catch { /* skip */ }
|
|
820
|
+
}
|
|
821
|
+
const results = await Promise.all([
|
|
822
|
+
this.thresholdEffectiveness({ minSamples: options?.minSamples }),
|
|
823
|
+
...[...categories].filter(Boolean).map((cat) => this.thresholdEffectiveness({ category: cat, minSamples: options?.minSamples })),
|
|
824
|
+
]);
|
|
825
|
+
return results;
|
|
826
|
+
}
|
|
827
|
+
/**
|
|
828
|
+
* Refresh threshold config from Valkey. Returns true on a successful HGETALL,
|
|
829
|
+
* false if the call threw.
|
|
830
|
+
*
|
|
831
|
+
* Field semantics:
|
|
832
|
+
* - "threshold" -> updates defaultThreshold
|
|
833
|
+
* - "threshold:{category}" -> updates categoryThresholds[category]
|
|
834
|
+
* - "threshold:" (empty) -> ignored
|
|
835
|
+
* - non-numeric values -> ignored
|
|
836
|
+
* - out-of-range values -> ignored (must be 0 <= x <= 2)
|
|
837
|
+
*
|
|
838
|
+
* Categories present in memory but absent from the hash fall back to their
|
|
839
|
+
* constructor values (or are removed if no constructor override existed).
|
|
840
|
+
* The default threshold likewise falls back to its constructor value if
|
|
841
|
+
* `threshold` is absent from the hash.
|
|
842
|
+
*/
|
|
843
|
+
async refreshConfig() {
|
|
844
|
+
let raw = null;
|
|
845
|
+
try {
|
|
846
|
+
raw = await this.client.hgetall(this.configKey);
|
|
847
|
+
}
|
|
848
|
+
catch {
|
|
849
|
+
return false;
|
|
850
|
+
}
|
|
851
|
+
let nextDefault = this._initialDefaultThreshold;
|
|
852
|
+
const nextCategory = { ...this._initialCategoryThresholds };
|
|
853
|
+
if (raw) {
|
|
854
|
+
for (const [field, value] of Object.entries(raw)) {
|
|
855
|
+
const parsed = Number(value);
|
|
856
|
+
if (!Number.isFinite(parsed) || parsed < 0 || parsed > 2) {
|
|
857
|
+
continue;
|
|
858
|
+
}
|
|
859
|
+
if (field === 'threshold') {
|
|
860
|
+
nextDefault = parsed;
|
|
861
|
+
}
|
|
862
|
+
else if (field.startsWith('threshold:')) {
|
|
863
|
+
const category = field.slice('threshold:'.length);
|
|
864
|
+
if (category.length > 0) {
|
|
865
|
+
nextCategory[category] = parsed;
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
this.defaultThreshold = nextDefault;
|
|
871
|
+
this.categoryThresholds = nextCategory;
|
|
872
|
+
return true;
|
|
873
|
+
}
|
|
874
|
+
// -- Internal helpers exposed to package adapters --
|
|
875
|
+
/** @internal Default similarity threshold. */
|
|
876
|
+
get _defaultThreshold() { return this.defaultThreshold; }
|
|
877
|
+
/** @internal Test-only getter. */
|
|
878
|
+
get _categoryThresholds() {
|
|
879
|
+
return this.categoryThresholds;
|
|
880
|
+
}
|
|
881
|
+
/** @internal Test-only getter. */
|
|
882
|
+
get _configRefreshIntervalMs() {
|
|
883
|
+
return this.configRefreshOptions.intervalMs;
|
|
884
|
+
}
|
|
885
|
+
/**
|
|
886
|
+
* Execute a stable FT.SEARCH for use by adapters (e.g. LangGraph).
|
|
887
|
+
* SORTBY inserted_at ASC gives stable ordering across paginated calls.
|
|
888
|
+
* @internal
|
|
889
|
+
*/
|
|
890
|
+
async _searchEntries(filterExpr, limit, offset) {
|
|
891
|
+
return this.client.call('FT.SEARCH', this.indexName, filterExpr, 'SORTBY', 'inserted_at', 'ASC', 'LIMIT', String(offset), String(limit), 'DIALECT', '2');
|
|
892
|
+
}
|
|
893
|
+
/**
|
|
894
|
+
* Embed text for use by adapters (e.g. LangGraph semantic search).
|
|
895
|
+
* @internal
|
|
896
|
+
*/
|
|
897
|
+
async _embedText(text) {
|
|
898
|
+
return this.embed(text);
|
|
899
|
+
}
|
|
900
|
+
// -- Private helpers --
|
|
901
|
+
startConfigRefresh() {
|
|
902
|
+
if (!this.configRefreshOptions.enabled) {
|
|
903
|
+
return;
|
|
904
|
+
}
|
|
905
|
+
const tick = () => {
|
|
906
|
+
this.refreshConfig()
|
|
907
|
+
.then((ok) => {
|
|
908
|
+
if (!ok) {
|
|
909
|
+
this.telemetry.metrics.configRefreshFailed
|
|
910
|
+
.labels({ cache_name: this.name })
|
|
911
|
+
.inc();
|
|
912
|
+
}
|
|
913
|
+
})
|
|
914
|
+
.catch(() => {
|
|
915
|
+
this.telemetry.metrics.configRefreshFailed
|
|
916
|
+
.labels({ cache_name: this.name })
|
|
917
|
+
.inc();
|
|
918
|
+
});
|
|
919
|
+
};
|
|
920
|
+
// Synchronous first refresh: process started immediately after a proposal
|
|
921
|
+
// was applied picks up the change without waiting for the first tick.
|
|
922
|
+
tick();
|
|
923
|
+
this.configRefreshTimer = setInterval(tick, this.configRefreshOptions.intervalMs);
|
|
924
|
+
if (typeof this.configRefreshTimer.unref === 'function') {
|
|
925
|
+
this.configRefreshTimer.unref();
|
|
926
|
+
}
|
|
927
|
+
}
|
|
275
928
|
async _doInitialize() {
|
|
276
929
|
const gen = this._initGeneration;
|
|
277
930
|
return this.traced('initialize', async () => {
|
|
278
|
-
const dim = await this.ensureIndexAndGetDimension();
|
|
279
|
-
|
|
280
|
-
if (this._initGeneration !== gen)
|
|
931
|
+
const { dim, hasBinaryRefs } = await this.ensureIndexAndGetDimension();
|
|
932
|
+
if (this._initGeneration !== gen) {
|
|
281
933
|
return;
|
|
934
|
+
}
|
|
282
935
|
this._dimension = dim;
|
|
936
|
+
this._hasBinaryRefs = hasBinaryRefs;
|
|
937
|
+
// registerDiscovery() may throw SemanticCacheUsageError on a name
|
|
938
|
+
// collision. Mark the cache initialized only after discovery succeeds
|
|
939
|
+
// so a colliding caller cannot subsequently call check()/store()
|
|
940
|
+
// against another owner's keys.
|
|
941
|
+
const manager = await this.registerDiscovery();
|
|
942
|
+
if (this._initGeneration !== gen) {
|
|
943
|
+
if (manager) {
|
|
944
|
+
await manager.stop({ deleteHeartbeat: true });
|
|
945
|
+
}
|
|
946
|
+
return;
|
|
947
|
+
}
|
|
948
|
+
this.discovery = manager;
|
|
283
949
|
this._initialized = true;
|
|
950
|
+
this.startConfigRefresh();
|
|
951
|
+
// Fire analytics init once (not on every flush+initialize cycle)
|
|
952
|
+
this.initAnalyticsSafe().catch(() => { });
|
|
284
953
|
});
|
|
285
954
|
}
|
|
955
|
+
async registerDiscovery() {
|
|
956
|
+
if (this.discoveryOptions.enabled === false) {
|
|
957
|
+
return null;
|
|
958
|
+
}
|
|
959
|
+
const metadata = (0, discovery_1.buildSemanticMetadata)({
|
|
960
|
+
name: this.name,
|
|
961
|
+
version: PACKAGE_VERSION,
|
|
962
|
+
defaultThreshold: this.defaultThreshold,
|
|
963
|
+
categoryThresholds: this.categoryThresholds,
|
|
964
|
+
uncertaintyBand: this.uncertaintyBand,
|
|
965
|
+
includeCategories: this.discoveryOptions.includeCategories ?? true,
|
|
966
|
+
});
|
|
967
|
+
const manager = new discovery_1.DiscoveryManager({
|
|
968
|
+
client: this.client,
|
|
969
|
+
name: this.name,
|
|
970
|
+
metadata,
|
|
971
|
+
heartbeatIntervalMs: this.discoveryOptions.heartbeatIntervalMs,
|
|
972
|
+
onWriteFailed: () => {
|
|
973
|
+
this.telemetry.metrics.discoveryWriteFailed
|
|
974
|
+
.labels({ cache_name: this.name })
|
|
975
|
+
.inc();
|
|
976
|
+
},
|
|
977
|
+
});
|
|
978
|
+
await manager.register();
|
|
979
|
+
return manager;
|
|
980
|
+
}
|
|
981
|
+
async initAnalyticsSafe() {
|
|
982
|
+
if (this.analyticsInitiated)
|
|
983
|
+
return;
|
|
984
|
+
this.analyticsInitiated = true;
|
|
985
|
+
try {
|
|
986
|
+
const a = await (0, analytics_1.createAnalytics)(this.analyticsOpts);
|
|
987
|
+
if (this.shutdownCalled) {
|
|
988
|
+
await a.shutdown();
|
|
989
|
+
return;
|
|
990
|
+
}
|
|
991
|
+
this.analytics = a;
|
|
992
|
+
await a.init(this.client, this.name, {
|
|
993
|
+
defaultThreshold: this.defaultThreshold,
|
|
994
|
+
uncertaintyBand: this.uncertaintyBand,
|
|
995
|
+
defaultTtl: this.defaultTtl ?? null,
|
|
996
|
+
hasCostTable: !!this.costTable,
|
|
997
|
+
usesDefaultCostTable: this.usesDefaultCostTable,
|
|
998
|
+
embeddingCacheEnabled: this.embeddingCacheEnabled,
|
|
999
|
+
categoryThresholdCount: Object.keys(this.categoryThresholds).length,
|
|
1000
|
+
dimension: this._dimension,
|
|
1001
|
+
});
|
|
1002
|
+
const intervalMs = this.analyticsOpts?.statsIntervalMs ?? 300_000;
|
|
1003
|
+
if (!this.shutdownCalled && intervalMs > 0) {
|
|
1004
|
+
this.statsTimer = setInterval(() => this.captureStatsSnapshot(), intervalMs);
|
|
1005
|
+
this.statsTimer.unref();
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
catch {
|
|
1009
|
+
// never throw from analytics
|
|
1010
|
+
}
|
|
1011
|
+
}
|
|
1012
|
+
captureStatsSnapshot() {
|
|
1013
|
+
this.stats()
|
|
1014
|
+
.then((s) => {
|
|
1015
|
+
this.analytics.capture('stats_snapshot', {
|
|
1016
|
+
hits: s.hits,
|
|
1017
|
+
misses: s.misses,
|
|
1018
|
+
hit_rate: s.hitRate,
|
|
1019
|
+
cost_saved_micros: s.costSavedMicros,
|
|
1020
|
+
});
|
|
1021
|
+
})
|
|
1022
|
+
.catch(() => { });
|
|
1023
|
+
}
|
|
286
1024
|
async ensureIndexAndGetDimension() {
|
|
287
1025
|
// Try reading an existing index
|
|
288
1026
|
try {
|
|
289
1027
|
const info = (await this.client.call('FT.INFO', this.indexName));
|
|
290
1028
|
const dim = this.parseDimensionFromInfo(info);
|
|
1029
|
+
const hasBinaryRefs = this.parseHasBinaryRefsFromInfo(info);
|
|
291
1030
|
if (dim > 0)
|
|
292
|
-
return dim;
|
|
293
|
-
// Couldn't parse dimension from FT.INFO
|
|
294
|
-
|
|
1031
|
+
return { dim, hasBinaryRefs };
|
|
1032
|
+
// Couldn't parse dimension from FT.INFO - fall back to probe
|
|
1033
|
+
const probeDim = (await this.embed('probe')).vector.length;
|
|
1034
|
+
return { dim: probeDim, hasBinaryRefs };
|
|
295
1035
|
}
|
|
296
1036
|
catch (err) {
|
|
297
1037
|
if (err instanceof errors_1.EmbeddingError)
|
|
@@ -300,18 +1040,69 @@ class SemanticCache {
|
|
|
300
1040
|
throw new errors_1.ValkeyCommandError('FT.INFO', err);
|
|
301
1041
|
}
|
|
302
1042
|
}
|
|
303
|
-
// Index doesn't exist
|
|
1043
|
+
// Index doesn't exist - probe dimension and create it
|
|
304
1044
|
const dim = (await this.embed('probe')).vector.length;
|
|
305
1045
|
try {
|
|
306
|
-
await this.client.call('FT.CREATE', this.indexName, 'ON', 'HASH', 'PREFIX', '1', this.entryPrefix, 'SCHEMA', 'prompt', 'TEXT', 'NOSTEM', 'response', 'TEXT', 'NOSTEM', 'model', 'TAG', 'category', 'TAG', 'inserted_at', 'NUMERIC', 'SORTABLE', 'embedding', 'VECTOR', 'HNSW', '6', 'TYPE', 'FLOAT32', 'DIM', String(dim), 'DISTANCE_METRIC', 'COSINE');
|
|
1046
|
+
await this.client.call('FT.CREATE', this.indexName, 'ON', 'HASH', 'PREFIX', '1', this.entryPrefix, 'SCHEMA', 'prompt', 'TEXT', 'NOSTEM', 'response', 'TEXT', 'NOSTEM', 'model', 'TAG', 'category', 'TAG', 'binary_refs', 'TAG', 'inserted_at', 'NUMERIC', 'SORTABLE', 'temperature', 'NUMERIC', 'top_p', 'NUMERIC', 'seed', 'NUMERIC', 'embedding', 'VECTOR', 'HNSW', '6', 'TYPE', 'FLOAT32', 'DIM', String(dim), 'DISTANCE_METRIC', 'COSINE');
|
|
307
1047
|
}
|
|
308
1048
|
catch (err) {
|
|
309
1049
|
throw new errors_1.ValkeyCommandError('FT.CREATE', err);
|
|
310
1050
|
}
|
|
311
|
-
return dim;
|
|
1051
|
+
return { dim, hasBinaryRefs: true };
|
|
1052
|
+
}
|
|
1053
|
+
/** Check if the index schema has a binary_refs field. */
|
|
1054
|
+
parseHasBinaryRefsFromInfo(info) {
|
|
1055
|
+
for (let i = 0; i < info.length - 1; i += 2) {
|
|
1056
|
+
const key = String(info[i]);
|
|
1057
|
+
if (key !== 'attributes' && key !== 'fields')
|
|
1058
|
+
continue;
|
|
1059
|
+
const attributes = info[i + 1];
|
|
1060
|
+
if (!Array.isArray(attributes))
|
|
1061
|
+
continue;
|
|
1062
|
+
for (const attr of attributes) {
|
|
1063
|
+
if (!Array.isArray(attr))
|
|
1064
|
+
continue;
|
|
1065
|
+
for (let j = 0; j < attr.length - 1; j++) {
|
|
1066
|
+
if (String(attr[j]) === 'identifier' && String(attr[j + 1]) === 'binary_refs') {
|
|
1067
|
+
return true;
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
return false;
|
|
312
1073
|
}
|
|
313
|
-
/**
|
|
1074
|
+
/** Resolve a prompt (string or ContentBlock[]) into text + binary refs. */
|
|
1075
|
+
resolvePrompt(prompt) {
|
|
1076
|
+
if (typeof prompt === 'string') {
|
|
1077
|
+
return { text: prompt, binaryRefs: [] };
|
|
1078
|
+
}
|
|
1079
|
+
const text = (0, utils_1.extractText)(prompt);
|
|
1080
|
+
const binaryRefs = (0, utils_1.extractBinaryRefs)(prompt);
|
|
1081
|
+
return { text, binaryRefs };
|
|
1082
|
+
}
|
|
1083
|
+
/** Wraps embedFn with error handling, duration tracking, and optional embedding cache. */
|
|
314
1084
|
async embed(text) {
|
|
1085
|
+
// Check embedding cache
|
|
1086
|
+
if (this.embeddingCacheEnabled && text) {
|
|
1087
|
+
const hash = (0, node_crypto_2.createHash)('sha256').update(text).digest('hex');
|
|
1088
|
+
const embedKey = `${this.embedKeyPrefix}${hash}`;
|
|
1089
|
+
try {
|
|
1090
|
+
const cached = await this.client.getBuffer(embedKey);
|
|
1091
|
+
if (cached) {
|
|
1092
|
+
this.telemetry.metrics.embeddingCacheTotal
|
|
1093
|
+
.labels({ cache_name: this.name, result: 'hit' }).inc();
|
|
1094
|
+
// Decode Float32 buffer
|
|
1095
|
+
const vector = [];
|
|
1096
|
+
for (let i = 0; i < cached.length; i += 4) {
|
|
1097
|
+
vector.push(cached.readFloatLE(i));
|
|
1098
|
+
}
|
|
1099
|
+
return { vector, durationSec: 0 };
|
|
1100
|
+
}
|
|
1101
|
+
}
|
|
1102
|
+
catch { /* ignore cache read errors */ }
|
|
1103
|
+
this.telemetry.metrics.embeddingCacheTotal
|
|
1104
|
+
.labels({ cache_name: this.name, result: 'miss' }).inc();
|
|
1105
|
+
}
|
|
315
1106
|
const start = performance.now();
|
|
316
1107
|
let vector;
|
|
317
1108
|
try {
|
|
@@ -324,12 +1115,22 @@ class SemanticCache {
|
|
|
324
1115
|
this.telemetry.metrics.embeddingDuration
|
|
325
1116
|
.labels({ cache_name: this.name })
|
|
326
1117
|
.observe(durationSec);
|
|
1118
|
+
// Store in embedding cache
|
|
1119
|
+
if (this.embeddingCacheEnabled && text) {
|
|
1120
|
+
const hash = (0, node_crypto_2.createHash)('sha256').update(text).digest('hex');
|
|
1121
|
+
const embedKey = `${this.embedKeyPrefix}${hash}`;
|
|
1122
|
+
try {
|
|
1123
|
+
const buf = (0, utils_1.encodeFloat32)(vector);
|
|
1124
|
+
await this.client.set(embedKey, buf, 'EX', this.embeddingCacheTtl);
|
|
1125
|
+
}
|
|
1126
|
+
catch { /* ignore cache write errors */ }
|
|
1127
|
+
}
|
|
327
1128
|
return { vector, durationSec };
|
|
328
1129
|
}
|
|
329
1130
|
/**
|
|
330
1131
|
* Wraps a method body in an OTel span with automatic status, end, and
|
|
331
1132
|
* operation duration metric. The span is passed to fn so callers can
|
|
332
|
-
* set attributes
|
|
1133
|
+
* set attributes - but callers must NOT call span.end() or span.setStatus(),
|
|
333
1134
|
* as traced() handles both.
|
|
334
1135
|
*/
|
|
335
1136
|
async traced(operation, fn) {
|
|
@@ -359,6 +1160,24 @@ class SemanticCache {
|
|
|
359
1160
|
pipeline.hincrby(this.statsKey, field, 1);
|
|
360
1161
|
await pipeline.exec();
|
|
361
1162
|
}
|
|
1163
|
+
/** Append to the rolling similarity window sorted set and trim to 10,000 entries or 7 days. */
|
|
1164
|
+
async recordSimilarityWindow(score, result, category) {
|
|
1165
|
+
const now = Date.now();
|
|
1166
|
+
// Include a unique nonce so identical (score, result, category) tuples are
|
|
1167
|
+
// each recorded as distinct ZADD members instead of overwriting each other.
|
|
1168
|
+
const member = JSON.stringify({ score, result, category, _n: Math.random() });
|
|
1169
|
+
const sevenDaysAgo = now - 7 * 24 * 60 * 60 * 1000;
|
|
1170
|
+
try {
|
|
1171
|
+
const pipeline = this.client.pipeline();
|
|
1172
|
+
pipeline.zadd(this.similarityWindowKey, now, member);
|
|
1173
|
+
// Trim by time: remove entries older than 7 days
|
|
1174
|
+
pipeline.zremrangebyscore(this.similarityWindowKey, '-inf', sevenDaysAgo);
|
|
1175
|
+
// Trim by count: keep at most 10,000 most recent
|
|
1176
|
+
pipeline.zremrangebyrank(this.similarityWindowKey, 0, -10001);
|
|
1177
|
+
await pipeline.exec();
|
|
1178
|
+
}
|
|
1179
|
+
catch { /* best effort - never fail on window writes */ }
|
|
1180
|
+
}
|
|
362
1181
|
assertInitialized(method) {
|
|
363
1182
|
if (!this._initialized) {
|
|
364
1183
|
throw new errors_1.SemanticCacheUsageError(`SemanticCache.initialize() must be called before ${method}().`);
|