@betterdb/semantic-cache 0.5.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +19 -0
- package/dist/SemanticCache.d.ts +14 -2
- package/dist/SemanticCache.js +264 -160
- package/dist/index.d.ts +1 -0
- package/dist/index.js +3 -1
- package/dist/rerank.d.ts +31 -0
- package/dist/rerank.js +73 -0
- package/dist/types.d.ts +3 -0
- package/dist/utils.d.ts +1 -28
- package/dist/utils.js +5 -77
- package/package.json +12 -11
package/LICENSE
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Copyright (c) 2026-present BetterDB Inc.
|
|
2
|
+
|
|
3
|
+
Portions of this software are licensed as follows:
|
|
4
|
+
|
|
5
|
+
- All content residing under the "doc/" directory of this repository is licensed under the "Creative Commons: CC BY-SA 4.0 license".
|
|
6
|
+
|
|
7
|
+
- All content that resides under the "proprietary/" directory of this repository, if that directory exists, is licensed under the license defined in "proprietary/LICENSE".
|
|
8
|
+
|
|
9
|
+
- All third-party components incorporated into the BetterDB Software are licensed under the original license provided by the owner of the applicable component.
|
|
10
|
+
|
|
11
|
+
- Content outside of the above-mentioned directories or restrictions above is available under the "MIT Expat" license as defined below.
|
|
12
|
+
|
|
13
|
+
MIT License
|
|
14
|
+
|
|
15
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
18
|
+
|
|
19
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
package/dist/SemanticCache.d.ts
CHANGED
|
@@ -8,6 +8,7 @@ export declare class SemanticCache {
|
|
|
8
8
|
private readonly entryPrefix;
|
|
9
9
|
private readonly statsKey;
|
|
10
10
|
private readonly similarityWindowKey;
|
|
11
|
+
private readonly missPendingKey;
|
|
11
12
|
private readonly configKey;
|
|
12
13
|
private defaultThreshold;
|
|
13
14
|
private readonly defaultTtl;
|
|
@@ -159,10 +160,21 @@ export declare class SemanticCache {
|
|
|
159
160
|
private recordStat;
|
|
160
161
|
/** Append to the rolling similarity window sorted set and trim to 10,000 entries or 7 days. */
|
|
161
162
|
private recordSimilarityWindow;
|
|
163
|
+
/**
|
|
164
|
+
* Track a miss so a subsequent store() can backfill its cost into the
|
|
165
|
+
* similarity-window record. Bounded by a 5-minute TTL on the bookkeeping
|
|
166
|
+
* zset — entries beyond that are pruned on every record and backfill.
|
|
167
|
+
*/
|
|
168
|
+
private recordMissPending;
|
|
169
|
+
/**
|
|
170
|
+
* After a successful store(), find the oldest pending miss for the same
|
|
171
|
+
* query and update its similarity-window record with the now-known cost.
|
|
172
|
+
* Best-effort — silently no-op if no pending miss exists or the bookkeeping
|
|
173
|
+
* entry has already been pruned.
|
|
174
|
+
*/
|
|
175
|
+
private applyCostToPendingMiss;
|
|
162
176
|
private assertInitialized;
|
|
163
177
|
private assertDimension;
|
|
164
|
-
private isIndexNotFoundError;
|
|
165
|
-
private parseDimensionFromInfo;
|
|
166
178
|
}
|
|
167
179
|
export interface ThresholdEffectivenessResult {
|
|
168
180
|
category: string;
|
package/dist/SemanticCache.js
CHANGED
|
@@ -6,6 +6,7 @@ const node_crypto_2 = require("node:crypto");
|
|
|
6
6
|
const api_1 = require("@opentelemetry/api");
|
|
7
7
|
const errors_1 = require("./errors");
|
|
8
8
|
const telemetry_1 = require("./telemetry");
|
|
9
|
+
const valkey_search_kit_1 = require("@betterdb/valkey-search-kit");
|
|
9
10
|
const utils_1 = require("./utils");
|
|
10
11
|
const defaultCostTable_1 = require("./defaultCostTable");
|
|
11
12
|
const cluster_1 = require("./cluster");
|
|
@@ -16,6 +17,19 @@ const PACKAGE_VERSION = require('../package.json').version;
|
|
|
16
17
|
function errMsg(err) {
|
|
17
18
|
return err instanceof Error ? err.message : String(err);
|
|
18
19
|
}
|
|
20
|
+
function parseHitCostMicros(raw) {
|
|
21
|
+
if (raw === undefined || raw === null) {
|
|
22
|
+
return null;
|
|
23
|
+
}
|
|
24
|
+
const n = parseInt(raw, 10);
|
|
25
|
+
if (!Number.isFinite(n) || n < 0) {
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
28
|
+
return n;
|
|
29
|
+
}
|
|
30
|
+
function correlationIdFor(prompt) {
|
|
31
|
+
return (0, node_crypto_2.createHash)('sha256').update(prompt).digest('hex').slice(0, 16);
|
|
32
|
+
}
|
|
19
33
|
class SemanticCache {
|
|
20
34
|
client;
|
|
21
35
|
embedFn;
|
|
@@ -24,6 +38,7 @@ class SemanticCache {
|
|
|
24
38
|
entryPrefix;
|
|
25
39
|
statsKey;
|
|
26
40
|
similarityWindowKey;
|
|
41
|
+
missPendingKey;
|
|
27
42
|
configKey;
|
|
28
43
|
defaultThreshold;
|
|
29
44
|
defaultTtl;
|
|
@@ -68,6 +83,7 @@ class SemanticCache {
|
|
|
68
83
|
this.entryPrefix = `${this.name}:entry:`;
|
|
69
84
|
this.statsKey = `${this.name}:__stats`;
|
|
70
85
|
this.similarityWindowKey = `${this.name}:__similarity_window`;
|
|
86
|
+
this.missPendingKey = `${this.name}:__miss_pending`;
|
|
71
87
|
this.configKey = `${this.name}:__config`;
|
|
72
88
|
this.embedKeyPrefix = `${this.name}:embed:`;
|
|
73
89
|
this.defaultThreshold = options.defaultThreshold ?? 0.1;
|
|
@@ -136,15 +152,12 @@ class SemanticCache {
|
|
|
136
152
|
await this.client.call('FT.DROPINDEX', this.indexName);
|
|
137
153
|
}
|
|
138
154
|
catch (err) {
|
|
139
|
-
if (!
|
|
155
|
+
if (!(0, valkey_search_kit_1.isIndexNotFoundError)(err)) {
|
|
140
156
|
throw new errors_1.ValkeyCommandError('FT.DROPINDEX', err);
|
|
141
157
|
}
|
|
142
158
|
}
|
|
143
159
|
// Cluster-aware SCAN for entry keys and embed cache keys
|
|
144
|
-
const patterns = [
|
|
145
|
-
`${this.name}:entry:*`,
|
|
146
|
-
`${this.name}:embed:*`,
|
|
147
|
-
];
|
|
160
|
+
const patterns = [`${this.name}:entry:*`, `${this.name}:embed:*`];
|
|
148
161
|
for (const pattern of patterns) {
|
|
149
162
|
await (0, cluster_1.clusterScan)(this.client, pattern, async (keys, nodeClient) => {
|
|
150
163
|
await nodeClient.del(keys);
|
|
@@ -152,6 +165,7 @@ class SemanticCache {
|
|
|
152
165
|
}
|
|
153
166
|
await this.client.del(this.statsKey);
|
|
154
167
|
await this.client.del(this.similarityWindowKey);
|
|
168
|
+
await this.client.del(this.missPendingKey);
|
|
155
169
|
this.analytics.capture('cache_flush');
|
|
156
170
|
}
|
|
157
171
|
/**
|
|
@@ -212,9 +226,9 @@ class SemanticCache {
|
|
|
212
226
|
const userFilter = options?.filter;
|
|
213
227
|
// AND semantics: each ref must be present — chain separate TAG clauses.
|
|
214
228
|
const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
|
|
215
|
-
?
|
|
229
|
+
? binaryRefs.length === 1
|
|
216
230
|
? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
|
|
217
|
-
: binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' ')
|
|
231
|
+
: binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' ')
|
|
218
232
|
: null;
|
|
219
233
|
const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
|
|
220
234
|
const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
|
|
@@ -230,15 +244,18 @@ class SemanticCache {
|
|
|
230
244
|
const searchMs = performance.now() - searchStart;
|
|
231
245
|
const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
|
|
232
246
|
const categoryLabel = category || 'none';
|
|
233
|
-
const timingAttrs = {
|
|
247
|
+
const timingAttrs = { embedding_latency_ms: embedSec * 1000, search_latency_ms: searchMs };
|
|
234
248
|
// No candidates at all
|
|
235
249
|
if (parsed.length === 0) {
|
|
236
250
|
await this.recordStat('misses');
|
|
237
251
|
this.telemetry.metrics.requestsTotal
|
|
238
|
-
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
252
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
253
|
+
.inc();
|
|
239
254
|
span.setAttributes({
|
|
240
|
-
'cache.hit': false,
|
|
241
|
-
'cache.
|
|
255
|
+
'cache.hit': false,
|
|
256
|
+
'cache.name': this.name,
|
|
257
|
+
'cache.category': categoryLabel,
|
|
258
|
+
...timingAttrs,
|
|
242
259
|
});
|
|
243
260
|
return { hit: false, confidence: 'miss' };
|
|
244
261
|
}
|
|
@@ -246,19 +263,24 @@ class SemanticCache {
|
|
|
246
263
|
const score = scoreStr !== undefined ? parseFloat(scoreStr) : NaN;
|
|
247
264
|
if (!isNaN(score)) {
|
|
248
265
|
this.telemetry.metrics.similarityScore
|
|
249
|
-
.labels({ cache_name: this.name, category: categoryLabel })
|
|
266
|
+
.labels({ cache_name: this.name, category: categoryLabel })
|
|
267
|
+
.observe(score);
|
|
250
268
|
}
|
|
251
269
|
// Miss (no usable score, or score exceeds threshold)
|
|
252
270
|
if (isNaN(score) || score > threshold) {
|
|
253
271
|
if (!isNaN(score)) {
|
|
254
|
-
await this.recordSimilarityWindow(score, 'miss', category);
|
|
272
|
+
const missMember = await this.recordSimilarityWindow(score, 'miss', category, null);
|
|
273
|
+
await this.recordMissPending(promptText, missMember);
|
|
255
274
|
}
|
|
256
275
|
await this.recordStat('misses');
|
|
257
276
|
this.telemetry.metrics.requestsTotal
|
|
258
|
-
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
277
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
278
|
+
.inc();
|
|
259
279
|
span.setAttributes({
|
|
260
|
-
'cache.hit': false,
|
|
261
|
-
'cache.
|
|
280
|
+
'cache.hit': false,
|
|
281
|
+
'cache.name': this.name,
|
|
282
|
+
'cache.category': categoryLabel,
|
|
283
|
+
...timingAttrs,
|
|
262
284
|
...(isNaN(score) ? {} : { 'cache.similarity': score, 'cache.threshold': threshold }),
|
|
263
285
|
});
|
|
264
286
|
const result = { hit: false, confidence: 'miss' };
|
|
@@ -278,17 +300,23 @@ class SemanticCache {
|
|
|
278
300
|
.filter(({ s }) => !isNaN(s))
|
|
279
301
|
.map(({ i, s }) => ({
|
|
280
302
|
origIdx: i,
|
|
281
|
-
candidate: { response: parsed[i].fields['response'] ?? '', similarity: s },
|
|
303
|
+
candidate: { response: parsed[i].fields['response'] ?? '', similarity: s, prompt: parsed[i].fields['prompt'] ?? '' },
|
|
282
304
|
}));
|
|
283
305
|
const picked = await rerankOpts.rerankFn(promptText, indexedCandidates.map((x) => x.candidate));
|
|
284
306
|
// Explicit bounds check: -1 means "reject all"; out-of-range is a caller bug
|
|
285
307
|
// treated as a miss rather than silently falling back to the top candidate.
|
|
286
308
|
if (picked === -1 || picked < 0 || picked >= indexedCandidates.length) {
|
|
287
|
-
await this.recordSimilarityWindow(score, 'miss', category);
|
|
309
|
+
const missMember = await this.recordSimilarityWindow(score, 'miss', category, null);
|
|
310
|
+
await this.recordMissPending(promptText, missMember);
|
|
288
311
|
await this.recordStat('misses');
|
|
289
312
|
this.telemetry.metrics.requestsTotal
|
|
290
|
-
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
291
|
-
|
|
313
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
314
|
+
.inc();
|
|
315
|
+
span.setAttributes({
|
|
316
|
+
'cache.hit': false,
|
|
317
|
+
'cache.name': this.name,
|
|
318
|
+
'cache.reranked': true,
|
|
319
|
+
});
|
|
292
320
|
return { hit: false, confidence: 'miss' };
|
|
293
321
|
}
|
|
294
322
|
// Map back to the original parsed[] index (not the candidates[] index)
|
|
@@ -304,12 +332,16 @@ class SemanticCache {
|
|
|
304
332
|
try {
|
|
305
333
|
await this.client.del(winner.key);
|
|
306
334
|
}
|
|
307
|
-
catch {
|
|
308
|
-
|
|
335
|
+
catch {
|
|
336
|
+
/* best effort */
|
|
337
|
+
}
|
|
338
|
+
const missMember = await this.recordSimilarityWindow(winnerScore, 'miss', category, null);
|
|
339
|
+
await this.recordMissPending(promptText, missMember);
|
|
309
340
|
this.telemetry.metrics.staleModelEvictions.labels({ cache_name: this.name }).inc();
|
|
310
341
|
await this.recordStat('misses');
|
|
311
342
|
this.telemetry.metrics.requestsTotal
|
|
312
|
-
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
343
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
344
|
+
.inc();
|
|
313
345
|
span.setAttributes({ 'cache.hit': false, 'cache.stale_evicted': true });
|
|
314
346
|
return { hit: false, confidence: 'miss' };
|
|
315
347
|
}
|
|
@@ -330,6 +362,8 @@ class SemanticCache {
|
|
|
330
362
|
similarity: winnerScore,
|
|
331
363
|
threshold,
|
|
332
364
|
category: category || undefined,
|
|
365
|
+
// Reserved for consumer judge functions; not consumed by the built-in judge path.
|
|
366
|
+
cachedPrompt: winner.fields['prompt'] ?? '',
|
|
333
367
|
}), timeoutMs);
|
|
334
368
|
decision = accepted ? 'accept' : 'reject';
|
|
335
369
|
}
|
|
@@ -363,7 +397,8 @@ class SemanticCache {
|
|
|
363
397
|
}
|
|
364
398
|
else {
|
|
365
399
|
// reject / error_reject / timeout_reject → treat as miss
|
|
366
|
-
await this.recordSimilarityWindow(winnerScore, 'miss', category);
|
|
400
|
+
const missMember = await this.recordSimilarityWindow(winnerScore, 'miss', category, null);
|
|
401
|
+
await this.recordMissPending(promptText, missMember);
|
|
367
402
|
await this.recordStat('misses');
|
|
368
403
|
this.telemetry.metrics.requestsTotal
|
|
369
404
|
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
@@ -387,27 +422,26 @@ class SemanticCache {
|
|
|
387
422
|
}
|
|
388
423
|
}
|
|
389
424
|
// --- End judge ---
|
|
425
|
+
const hitCostMicros = parseHitCostMicros(winner.fields['cost_micros']);
|
|
390
426
|
// Record as genuine hit (moved here from before the judge block)
|
|
391
|
-
await this.recordSimilarityWindow(winnerScore, 'hit', category);
|
|
427
|
+
await this.recordSimilarityWindow(winnerScore, 'hit', category, hitCostMicros);
|
|
392
428
|
await this.recordStat('hits');
|
|
393
429
|
const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
|
|
394
430
|
this.telemetry.metrics.requestsTotal
|
|
395
|
-
.labels({ cache_name: this.name, result: metricResult, category: categoryLabel })
|
|
431
|
+
.labels({ cache_name: this.name, result: metricResult, category: categoryLabel })
|
|
432
|
+
.inc();
|
|
396
433
|
if (this.defaultTtl !== undefined && matchedKey) {
|
|
397
434
|
await this.client.expire(matchedKey, this.defaultTtl);
|
|
398
435
|
}
|
|
399
436
|
// Cost saved
|
|
400
437
|
let costSaved;
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
this.telemetry.metrics.costSavedTotal
|
|
409
|
-
.labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
|
|
410
|
-
}
|
|
438
|
+
if (hitCostMicros !== null) {
|
|
439
|
+
costSaved = hitCostMicros / 1_000_000;
|
|
440
|
+
// Atomically increment cost_saved_micros in stats
|
|
441
|
+
await this.client.hincrby(this.statsKey, 'cost_saved_micros', hitCostMicros);
|
|
442
|
+
this.telemetry.metrics.costSavedTotal
|
|
443
|
+
.labels({ cache_name: this.name, category: categoryLabel })
|
|
444
|
+
.inc(costSaved);
|
|
411
445
|
}
|
|
412
446
|
// Content blocks
|
|
413
447
|
let contentBlocks;
|
|
@@ -416,16 +450,25 @@ class SemanticCache {
|
|
|
416
450
|
try {
|
|
417
451
|
contentBlocks = JSON.parse(contentBlocksStr);
|
|
418
452
|
}
|
|
419
|
-
catch {
|
|
453
|
+
catch {
|
|
454
|
+
/* ignore parse errors */
|
|
455
|
+
}
|
|
420
456
|
}
|
|
421
457
|
span.setAttributes({
|
|
422
|
-
'cache.hit': true,
|
|
423
|
-
'cache.
|
|
424
|
-
'cache.
|
|
458
|
+
'cache.hit': true,
|
|
459
|
+
'cache.similarity': winnerScore,
|
|
460
|
+
'cache.threshold': threshold,
|
|
461
|
+
'cache.confidence': confidence,
|
|
462
|
+
'cache.matched_key': matchedKey,
|
|
463
|
+
'cache.category': categoryLabel,
|
|
464
|
+
...timingAttrs,
|
|
425
465
|
});
|
|
426
466
|
const result = {
|
|
427
|
-
hit: true,
|
|
428
|
-
|
|
467
|
+
hit: true,
|
|
468
|
+
response: winner.fields['response'],
|
|
469
|
+
similarity: winnerScore,
|
|
470
|
+
confidence,
|
|
471
|
+
matchedKey,
|
|
429
472
|
};
|
|
430
473
|
if (costSaved !== undefined)
|
|
431
474
|
result.costSaved = costSaved;
|
|
@@ -451,8 +494,9 @@ class SemanticCache {
|
|
|
451
494
|
this.costTable) {
|
|
452
495
|
const pricing = this.costTable[options.model];
|
|
453
496
|
if (pricing) {
|
|
454
|
-
costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
|
|
455
|
-
options.outputTokens * pricing.outputPer1k / 1000) *
|
|
497
|
+
costMicros = Math.round(((options.inputTokens * pricing.inputPer1k) / 1000 +
|
|
498
|
+
(options.outputTokens * pricing.outputPer1k) / 1000) *
|
|
499
|
+
1_000_000);
|
|
456
500
|
}
|
|
457
501
|
}
|
|
458
502
|
const hashFields = {
|
|
@@ -489,10 +533,16 @@ class SemanticCache {
|
|
|
489
533
|
if (ttl !== undefined)
|
|
490
534
|
await this.client.expire(entryKey, ttl);
|
|
491
535
|
span.setAttributes({
|
|
492
|
-
'cache.name': this.name,
|
|
493
|
-
'cache.
|
|
494
|
-
'
|
|
536
|
+
'cache.name': this.name,
|
|
537
|
+
'cache.key': entryKey,
|
|
538
|
+
'cache.ttl': ttl ?? -1,
|
|
539
|
+
'cache.category': category || 'none',
|
|
540
|
+
'cache.model': model || 'none',
|
|
541
|
+
embedding_latency_ms: embedSec * 1000,
|
|
495
542
|
});
|
|
543
|
+
if (costMicros !== undefined && costMicros >= 0) {
|
|
544
|
+
await this.applyCostToPendingMiss(promptText, costMicros);
|
|
545
|
+
}
|
|
496
546
|
return entryKey;
|
|
497
547
|
});
|
|
498
548
|
}
|
|
@@ -512,11 +562,15 @@ class SemanticCache {
|
|
|
512
562
|
const category = options?.category ?? '';
|
|
513
563
|
const model = options?.model ?? '';
|
|
514
564
|
let costMicros;
|
|
515
|
-
if (options?.model &&
|
|
565
|
+
if (options?.model &&
|
|
566
|
+
options?.inputTokens !== undefined &&
|
|
567
|
+
options?.outputTokens !== undefined &&
|
|
568
|
+
this.costTable) {
|
|
516
569
|
const pricing = this.costTable[options.model];
|
|
517
570
|
if (pricing) {
|
|
518
|
-
costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
|
|
519
|
-
options.outputTokens * pricing.outputPer1k / 1000) *
|
|
571
|
+
costMicros = Math.round(((options.inputTokens * pricing.inputPer1k) / 1000 +
|
|
572
|
+
(options.outputTokens * pricing.outputPer1k) / 1000) *
|
|
573
|
+
1_000_000);
|
|
520
574
|
}
|
|
521
575
|
}
|
|
522
576
|
const hashFields = {
|
|
@@ -535,8 +589,9 @@ class SemanticCache {
|
|
|
535
589
|
if (costMicros !== undefined && costMicros > 0) {
|
|
536
590
|
hashFields['cost_micros'] = String(costMicros);
|
|
537
591
|
}
|
|
538
|
-
if (options?.temperature !== undefined)
|
|
592
|
+
if (options?.temperature !== undefined) {
|
|
539
593
|
hashFields['temperature'] = String(options.temperature);
|
|
594
|
+
}
|
|
540
595
|
if (options?.topP !== undefined)
|
|
541
596
|
hashFields['top_p'] = String(options.topP);
|
|
542
597
|
if (options?.seed !== undefined)
|
|
@@ -551,10 +606,16 @@ class SemanticCache {
|
|
|
551
606
|
if (ttl !== undefined)
|
|
552
607
|
await this.client.expire(entryKey, ttl);
|
|
553
608
|
span.setAttributes({
|
|
554
|
-
'cache.name': this.name,
|
|
555
|
-
'cache.
|
|
556
|
-
'
|
|
609
|
+
'cache.name': this.name,
|
|
610
|
+
'cache.key': entryKey,
|
|
611
|
+
'cache.ttl': ttl ?? -1,
|
|
612
|
+
'cache.category': category || 'none',
|
|
613
|
+
'cache.model': model || 'none',
|
|
614
|
+
embedding_latency_ms: embedSec * 1000,
|
|
557
615
|
});
|
|
616
|
+
if (costMicros !== undefined && costMicros >= 0) {
|
|
617
|
+
await this.applyCostToPendingMiss(promptText, costMicros);
|
|
618
|
+
}
|
|
558
619
|
return entryKey;
|
|
559
620
|
});
|
|
560
621
|
}
|
|
@@ -592,9 +653,9 @@ class SemanticCache {
|
|
|
592
653
|
const { binaryRefs } = resolved[i];
|
|
593
654
|
const { vector: embedding } = embeddings[i];
|
|
594
655
|
const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
|
|
595
|
-
?
|
|
656
|
+
? binaryRefs.length === 1
|
|
596
657
|
? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
|
|
597
|
-
: binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' ')
|
|
658
|
+
: binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' ')
|
|
598
659
|
: null;
|
|
599
660
|
const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
|
|
600
661
|
const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
|
|
@@ -612,7 +673,8 @@ class SemanticCache {
|
|
|
612
673
|
if (err) {
|
|
613
674
|
await this.recordStat('misses');
|
|
614
675
|
this.telemetry.metrics.requestsTotal
|
|
615
|
-
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
676
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
677
|
+
.inc();
|
|
616
678
|
results.push({ hit: false, confidence: 'miss' });
|
|
617
679
|
continue;
|
|
618
680
|
}
|
|
@@ -620,7 +682,8 @@ class SemanticCache {
|
|
|
620
682
|
if (parsed.length === 0) {
|
|
621
683
|
await this.recordStat('misses');
|
|
622
684
|
this.telemetry.metrics.requestsTotal
|
|
623
|
-
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
685
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
686
|
+
.inc();
|
|
624
687
|
results.push({ hit: false, confidence: 'miss' });
|
|
625
688
|
continue;
|
|
626
689
|
}
|
|
@@ -628,11 +691,13 @@ class SemanticCache {
|
|
|
628
691
|
const score = scoreStr !== undefined ? parseFloat(scoreStr) : NaN;
|
|
629
692
|
if (isNaN(score) || score > threshold) {
|
|
630
693
|
if (!isNaN(score)) {
|
|
631
|
-
await this.recordSimilarityWindow(score, 'miss', category);
|
|
694
|
+
const missMember = await this.recordSimilarityWindow(score, 'miss', category, null);
|
|
695
|
+
await this.recordMissPending(resolved[i].text, missMember);
|
|
632
696
|
}
|
|
633
697
|
await this.recordStat('misses');
|
|
634
698
|
this.telemetry.metrics.requestsTotal
|
|
635
|
-
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
699
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
700
|
+
.inc();
|
|
636
701
|
const result = { hit: false, confidence: 'miss' };
|
|
637
702
|
if (!isNaN(score)) {
|
|
638
703
|
result.similarity = score;
|
|
@@ -641,26 +706,25 @@ class SemanticCache {
|
|
|
641
706
|
results.push(result);
|
|
642
707
|
continue;
|
|
643
708
|
}
|
|
644
|
-
|
|
709
|
+
const hitCostMicros = parseHitCostMicros(parsed[0].fields['cost_micros']);
|
|
710
|
+
await this.recordSimilarityWindow(score, 'hit', category, hitCostMicros);
|
|
645
711
|
const confidence = score >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
|
|
646
712
|
await this.recordStat('hits');
|
|
647
713
|
const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
|
|
648
714
|
this.telemetry.metrics.requestsTotal
|
|
649
|
-
.labels({ cache_name: this.name, result: metricResult, category: categoryLabel })
|
|
715
|
+
.labels({ cache_name: this.name, result: metricResult, category: categoryLabel })
|
|
716
|
+
.inc();
|
|
650
717
|
const matchedKey = parsed[0].key;
|
|
651
718
|
if (this.defaultTtl !== undefined && matchedKey) {
|
|
652
719
|
await this.client.expire(matchedKey, this.defaultTtl);
|
|
653
720
|
}
|
|
654
721
|
let costSaved;
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
this.telemetry.metrics.costSavedTotal
|
|
662
|
-
.labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
|
|
663
|
-
}
|
|
722
|
+
if (hitCostMicros !== null) {
|
|
723
|
+
costSaved = hitCostMicros / 1_000_000;
|
|
724
|
+
await this.client.hincrby(this.statsKey, 'cost_saved_micros', hitCostMicros);
|
|
725
|
+
this.telemetry.metrics.costSavedTotal
|
|
726
|
+
.labels({ cache_name: this.name, category: categoryLabel })
|
|
727
|
+
.inc(costSaved);
|
|
664
728
|
}
|
|
665
729
|
let contentBlocks;
|
|
666
730
|
const contentBlocksStr = parsed[0].fields['content_blocks'];
|
|
@@ -668,11 +732,16 @@ class SemanticCache {
|
|
|
668
732
|
try {
|
|
669
733
|
contentBlocks = JSON.parse(contentBlocksStr);
|
|
670
734
|
}
|
|
671
|
-
catch {
|
|
735
|
+
catch {
|
|
736
|
+
/* ignore */
|
|
737
|
+
}
|
|
672
738
|
}
|
|
673
739
|
const result = {
|
|
674
|
-
hit: true,
|
|
675
|
-
|
|
740
|
+
hit: true,
|
|
741
|
+
response: parsed[0].fields['response'],
|
|
742
|
+
similarity: score,
|
|
743
|
+
confidence,
|
|
744
|
+
matchedKey,
|
|
676
745
|
};
|
|
677
746
|
if (costSaved !== undefined)
|
|
678
747
|
result.costSaved = costSaved;
|
|
@@ -703,8 +772,10 @@ class SemanticCache {
|
|
|
703
772
|
const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
|
|
704
773
|
if (parsed.length === 0) {
|
|
705
774
|
span.setAttributes({
|
|
706
|
-
'cache.name': this.name,
|
|
707
|
-
'cache.
|
|
775
|
+
'cache.name': this.name,
|
|
776
|
+
'cache.filter': filter,
|
|
777
|
+
'cache.deleted_count': 0,
|
|
778
|
+
'cache.truncated': false,
|
|
708
779
|
});
|
|
709
780
|
return { deleted: 0, truncated: false };
|
|
710
781
|
}
|
|
@@ -717,8 +788,10 @@ class SemanticCache {
|
|
|
717
788
|
throw new errors_1.ValkeyCommandError('DEL', err);
|
|
718
789
|
}
|
|
719
790
|
span.setAttributes({
|
|
720
|
-
'cache.name': this.name,
|
|
721
|
-
'cache.
|
|
791
|
+
'cache.name': this.name,
|
|
792
|
+
'cache.filter': filter,
|
|
793
|
+
'cache.deleted_count': keys.length,
|
|
794
|
+
'cache.truncated': truncated,
|
|
722
795
|
});
|
|
723
796
|
return { deleted: keys.length, truncated };
|
|
724
797
|
});
|
|
@@ -761,16 +834,7 @@ class SemanticCache {
|
|
|
761
834
|
catch (err) {
|
|
762
835
|
throw new errors_1.ValkeyCommandError('FT.INFO', err);
|
|
763
836
|
}
|
|
764
|
-
const
|
|
765
|
-
let numDocs = 0;
|
|
766
|
-
let indexingState = 'unknown';
|
|
767
|
-
for (let i = 0; i < info.length - 1; i += 2) {
|
|
768
|
-
const key = String(info[i]);
|
|
769
|
-
if (key === 'num_docs')
|
|
770
|
-
numDocs = parseInt(String(info[i + 1]), 10) || 0;
|
|
771
|
-
else if (key === 'indexing')
|
|
772
|
-
indexingState = String(info[i + 1]);
|
|
773
|
-
}
|
|
837
|
+
const { numDocs, indexingState } = (0, valkey_search_kit_1.parseFtInfoStats)(raw);
|
|
774
838
|
return { name: this.indexName, numDocs, dimension: this._dimension, indexingState };
|
|
775
839
|
}
|
|
776
840
|
/**
|
|
@@ -803,7 +867,9 @@ class SemanticCache {
|
|
|
803
867
|
}
|
|
804
868
|
}
|
|
805
869
|
}
|
|
806
|
-
catch {
|
|
870
|
+
catch {
|
|
871
|
+
/* skip corrupt entries */
|
|
872
|
+
}
|
|
807
873
|
}
|
|
808
874
|
const sampleCount = entries.length;
|
|
809
875
|
const categoryLabel = category ?? 'all';
|
|
@@ -890,11 +956,15 @@ class SemanticCache {
|
|
|
890
956
|
if (entry.category)
|
|
891
957
|
categories.add(entry.category);
|
|
892
958
|
}
|
|
893
|
-
catch {
|
|
959
|
+
catch {
|
|
960
|
+
/* skip */
|
|
961
|
+
}
|
|
894
962
|
}
|
|
895
963
|
const results = await Promise.all([
|
|
896
964
|
this.thresholdEffectiveness({ minSamples: options?.minSamples }),
|
|
897
|
-
...[...categories]
|
|
965
|
+
...[...categories]
|
|
966
|
+
.filter(Boolean)
|
|
967
|
+
.map((cat) => this.thresholdEffectiveness({ category: cat, minSamples: options?.minSamples })),
|
|
898
968
|
]);
|
|
899
969
|
return results;
|
|
900
970
|
}
|
|
@@ -947,7 +1017,9 @@ class SemanticCache {
|
|
|
947
1017
|
}
|
|
948
1018
|
// -- Internal helpers exposed to package adapters --
|
|
949
1019
|
/** @internal Default similarity threshold. */
|
|
950
|
-
get _defaultThreshold() {
|
|
1020
|
+
get _defaultThreshold() {
|
|
1021
|
+
return this.defaultThreshold;
|
|
1022
|
+
}
|
|
951
1023
|
/** @internal Test-only getter. */
|
|
952
1024
|
get _categoryThresholds() {
|
|
953
1025
|
return this.categoryThresholds;
|
|
@@ -980,15 +1052,11 @@ class SemanticCache {
|
|
|
980
1052
|
this.refreshConfig()
|
|
981
1053
|
.then((ok) => {
|
|
982
1054
|
if (!ok) {
|
|
983
|
-
this.telemetry.metrics.configRefreshFailed
|
|
984
|
-
.labels({ cache_name: this.name })
|
|
985
|
-
.inc();
|
|
1055
|
+
this.telemetry.metrics.configRefreshFailed.labels({ cache_name: this.name }).inc();
|
|
986
1056
|
}
|
|
987
1057
|
})
|
|
988
1058
|
.catch(() => {
|
|
989
|
-
this.telemetry.metrics.configRefreshFailed
|
|
990
|
-
.labels({ cache_name: this.name })
|
|
991
|
-
.inc();
|
|
1059
|
+
this.telemetry.metrics.configRefreshFailed.labels({ cache_name: this.name }).inc();
|
|
992
1060
|
});
|
|
993
1061
|
};
|
|
994
1062
|
// Synchronous first refresh: process started immediately after a proposal
|
|
@@ -1044,9 +1112,7 @@ class SemanticCache {
|
|
|
1044
1112
|
metadata,
|
|
1045
1113
|
heartbeatIntervalMs: this.discoveryOptions.heartbeatIntervalMs,
|
|
1046
1114
|
onWriteFailed: () => {
|
|
1047
|
-
this.telemetry.metrics.discoveryWriteFailed
|
|
1048
|
-
.labels({ cache_name: this.name })
|
|
1049
|
-
.inc();
|
|
1115
|
+
this.telemetry.metrics.discoveryWriteFailed.labels({ cache_name: this.name }).inc();
|
|
1050
1116
|
},
|
|
1051
1117
|
});
|
|
1052
1118
|
await manager.register();
|
|
@@ -1099,7 +1165,7 @@ class SemanticCache {
|
|
|
1099
1165
|
// Try reading an existing index
|
|
1100
1166
|
try {
|
|
1101
1167
|
const info = (await this.client.call('FT.INFO', this.indexName));
|
|
1102
|
-
const dim =
|
|
1168
|
+
const dim = (0, valkey_search_kit_1.parseDimensionFromInfo)(info);
|
|
1103
1169
|
const hasBinaryRefs = this.parseHasBinaryRefsFromInfo(info);
|
|
1104
1170
|
if (dim > 0)
|
|
1105
1171
|
return { dim, hasBinaryRefs };
|
|
@@ -1110,7 +1176,7 @@ class SemanticCache {
|
|
|
1110
1176
|
catch (err) {
|
|
1111
1177
|
if (err instanceof errors_1.EmbeddingError)
|
|
1112
1178
|
throw err;
|
|
1113
|
-
if (!
|
|
1179
|
+
if (!(0, valkey_search_kit_1.isIndexNotFoundError)(err)) {
|
|
1114
1180
|
throw new errors_1.ValkeyCommandError('FT.INFO', err);
|
|
1115
1181
|
}
|
|
1116
1182
|
}
|
|
@@ -1164,7 +1230,8 @@ class SemanticCache {
|
|
|
1164
1230
|
const cached = await this.client.getBuffer(embedKey);
|
|
1165
1231
|
if (cached) {
|
|
1166
1232
|
this.telemetry.metrics.embeddingCacheTotal
|
|
1167
|
-
.labels({ cache_name: this.name, result: 'hit' })
|
|
1233
|
+
.labels({ cache_name: this.name, result: 'hit' })
|
|
1234
|
+
.inc();
|
|
1168
1235
|
// Decode Float32 buffer
|
|
1169
1236
|
const vector = [];
|
|
1170
1237
|
for (let i = 0; i < cached.length; i += 4) {
|
|
@@ -1173,9 +1240,12 @@ class SemanticCache {
|
|
|
1173
1240
|
return { vector, durationSec: 0 };
|
|
1174
1241
|
}
|
|
1175
1242
|
}
|
|
1176
|
-
catch {
|
|
1243
|
+
catch {
|
|
1244
|
+
/* ignore cache read errors */
|
|
1245
|
+
}
|
|
1177
1246
|
this.telemetry.metrics.embeddingCacheTotal
|
|
1178
|
-
.labels({ cache_name: this.name, result: 'miss' })
|
|
1247
|
+
.labels({ cache_name: this.name, result: 'miss' })
|
|
1248
|
+
.inc();
|
|
1179
1249
|
}
|
|
1180
1250
|
const start = performance.now();
|
|
1181
1251
|
let vector;
|
|
@@ -1186,9 +1256,7 @@ class SemanticCache {
|
|
|
1186
1256
|
throw new errors_1.EmbeddingError(`embedFn failed: ${errMsg(err)}`, err);
|
|
1187
1257
|
}
|
|
1188
1258
|
const durationSec = (performance.now() - start) / 1000;
|
|
1189
|
-
this.telemetry.metrics.embeddingDuration
|
|
1190
|
-
.labels({ cache_name: this.name })
|
|
1191
|
-
.observe(durationSec);
|
|
1259
|
+
this.telemetry.metrics.embeddingDuration.labels({ cache_name: this.name }).observe(durationSec);
|
|
1192
1260
|
// Store in embedding cache
|
|
1193
1261
|
if (this.embeddingCacheEnabled && text) {
|
|
1194
1262
|
const hash = (0, node_crypto_2.createHash)('sha256').update(text).digest('hex');
|
|
@@ -1197,7 +1265,9 @@ class SemanticCache {
|
|
|
1197
1265
|
const buf = (0, utils_1.encodeFloat32)(vector);
|
|
1198
1266
|
await this.client.set(embedKey, buf, 'EX', this.embeddingCacheTtl);
|
|
1199
1267
|
}
|
|
1200
|
-
catch {
|
|
1268
|
+
catch {
|
|
1269
|
+
/* ignore cache write errors */
|
|
1270
|
+
}
|
|
1201
1271
|
}
|
|
1202
1272
|
return { vector, durationSec };
|
|
1203
1273
|
}
|
|
@@ -1235,22 +1305,99 @@ class SemanticCache {
|
|
|
1235
1305
|
await pipeline.exec();
|
|
1236
1306
|
}
|
|
1237
1307
|
/** Append to the rolling similarity window sorted set and trim to 10,000 entries or 7 days. */
|
|
1238
|
-
async recordSimilarityWindow(score, result, category) {
|
|
1308
|
+
async recordSimilarityWindow(score, result, category, costSavedMicros) {
|
|
1239
1309
|
const now = Date.now();
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1310
|
+
const member = JSON.stringify({
|
|
1311
|
+
score,
|
|
1312
|
+
result,
|
|
1313
|
+
category,
|
|
1314
|
+
_n: Math.random(),
|
|
1315
|
+
cost_saved_micros: costSavedMicros,
|
|
1316
|
+
});
|
|
1243
1317
|
const sevenDaysAgo = now - 7 * 24 * 60 * 60 * 1000;
|
|
1244
1318
|
try {
|
|
1245
1319
|
const pipeline = this.client.pipeline();
|
|
1246
1320
|
pipeline.zadd(this.similarityWindowKey, now, member);
|
|
1247
|
-
// Trim by time: remove entries older than 7 days
|
|
1248
1321
|
pipeline.zremrangebyscore(this.similarityWindowKey, '-inf', sevenDaysAgo);
|
|
1249
|
-
// Trim by count: keep at most 10,000 most recent
|
|
1250
1322
|
pipeline.zremrangebyrank(this.similarityWindowKey, 0, -10001);
|
|
1251
1323
|
await pipeline.exec();
|
|
1252
1324
|
}
|
|
1253
|
-
catch {
|
|
1325
|
+
catch {
|
|
1326
|
+
/* best effort - never fail on window writes */
|
|
1327
|
+
}
|
|
1328
|
+
return member;
|
|
1329
|
+
}
|
|
1330
|
+
/**
|
|
1331
|
+
* Track a miss so a subsequent store() can backfill its cost into the
|
|
1332
|
+
* similarity-window record. Bounded by a 5-minute TTL on the bookkeeping
|
|
1333
|
+
* zset — entries beyond that are pruned on every record and backfill.
|
|
1334
|
+
*/
|
|
1335
|
+
async recordMissPending(prompt, similarityMember) {
|
|
1336
|
+
const correlationId = correlationIdFor(prompt);
|
|
1337
|
+
const now = Date.now();
|
|
1338
|
+
const fiveMinutesAgo = now - 5 * 60 * 1000;
|
|
1339
|
+
const entry = JSON.stringify({ correlationId, similarityMember });
|
|
1340
|
+
try {
|
|
1341
|
+
await this.client.zadd(this.missPendingKey, now, entry);
|
|
1342
|
+
await this.client.zremrangebyscore(this.missPendingKey, '-inf', `(${fiveMinutesAgo}`);
|
|
1343
|
+
}
|
|
1344
|
+
catch {
|
|
1345
|
+
/* best effort */
|
|
1346
|
+
}
|
|
1347
|
+
}
|
|
1348
|
+
/**
|
|
1349
|
+
* After a successful store(), find the oldest pending miss for the same
|
|
1350
|
+
* query and update its similarity-window record with the now-known cost.
|
|
1351
|
+
* Best-effort — silently no-op if no pending miss exists or the bookkeeping
|
|
1352
|
+
* entry has already been pruned.
|
|
1353
|
+
*/
|
|
1354
|
+
async applyCostToPendingMiss(prompt, costMicros) {
|
|
1355
|
+
const correlationId = correlationIdFor(prompt);
|
|
1356
|
+
const fiveMinutesAgo = Date.now() - 5 * 60 * 1000;
|
|
1357
|
+
try {
|
|
1358
|
+
await this.client.zremrangebyscore(this.missPendingKey, '-inf', `(${fiveMinutesAgo}`);
|
|
1359
|
+
const raw = (await this.client.zrange(this.missPendingKey, '0', '-1', 'WITHSCORES'));
|
|
1360
|
+
let matchedEntry = null;
|
|
1361
|
+
let matchedSimilarityMember = null;
|
|
1362
|
+
for (let i = 0; i < raw.length; i += 2) {
|
|
1363
|
+
const entryStr = raw[i];
|
|
1364
|
+
try {
|
|
1365
|
+
const parsed = JSON.parse(entryStr);
|
|
1366
|
+
if (parsed.correlationId === correlationId) {
|
|
1367
|
+
matchedEntry = entryStr;
|
|
1368
|
+
matchedSimilarityMember = parsed.similarityMember;
|
|
1369
|
+
break;
|
|
1370
|
+
}
|
|
1371
|
+
}
|
|
1372
|
+
catch {
|
|
1373
|
+
/* skip malformed */
|
|
1374
|
+
}
|
|
1375
|
+
}
|
|
1376
|
+
if (matchedEntry === null || matchedSimilarityMember === null) {
|
|
1377
|
+
return;
|
|
1378
|
+
}
|
|
1379
|
+
const rawScore = await this.client.zscore(this.similarityWindowKey, matchedSimilarityMember);
|
|
1380
|
+
if (rawScore === null) {
|
|
1381
|
+
await this.client.zrem(this.missPendingKey, matchedEntry);
|
|
1382
|
+
return;
|
|
1383
|
+
}
|
|
1384
|
+
const similarityScore = Number(rawScore);
|
|
1385
|
+
if (!Number.isFinite(similarityScore)) {
|
|
1386
|
+
await this.client.zrem(this.missPendingKey, matchedEntry);
|
|
1387
|
+
return;
|
|
1388
|
+
}
|
|
1389
|
+
const parsedMember = JSON.parse(matchedSimilarityMember);
|
|
1390
|
+
parsedMember.cost_saved_micros = costMicros;
|
|
1391
|
+
const updatedMember = JSON.stringify(parsedMember);
|
|
1392
|
+
const updatePipeline = this.client.pipeline();
|
|
1393
|
+
updatePipeline.zrem(this.similarityWindowKey, matchedSimilarityMember);
|
|
1394
|
+
updatePipeline.zadd(this.similarityWindowKey, similarityScore, updatedMember);
|
|
1395
|
+
updatePipeline.zrem(this.missPendingKey, matchedEntry);
|
|
1396
|
+
await updatePipeline.exec();
|
|
1397
|
+
}
|
|
1398
|
+
catch {
|
|
1399
|
+
/* never fail store() because of bookkeeping */
|
|
1400
|
+
}
|
|
1254
1401
|
}
|
|
1255
1402
|
assertInitialized(method) {
|
|
1256
1403
|
if (!this._initialized) {
|
|
@@ -1262,49 +1409,6 @@ class SemanticCache {
|
|
|
1262
1409
|
throw new errors_1.SemanticCacheUsageError(`Embedding dimension mismatch: index expects ${this._dimension}, embedFn returned ${embedding.length}. Call flush() then initialize() to rebuild.`);
|
|
1263
1410
|
}
|
|
1264
1411
|
}
|
|
1265
|
-
isIndexNotFoundError(err) {
|
|
1266
|
-
const msg = err instanceof Error ? err.message.toLowerCase() : '';
|
|
1267
|
-
return (msg.includes('unknown index name') ||
|
|
1268
|
-
msg.includes('no such index') ||
|
|
1269
|
-
msg.includes('not found'));
|
|
1270
|
-
}
|
|
1271
|
-
parseDimensionFromInfo(info) {
|
|
1272
|
-
for (let i = 0; i < info.length - 1; i += 2) {
|
|
1273
|
-
const key = String(info[i]);
|
|
1274
|
-
if (key !== 'attributes' && key !== 'fields')
|
|
1275
|
-
continue;
|
|
1276
|
-
const attributes = info[i + 1];
|
|
1277
|
-
if (!Array.isArray(attributes))
|
|
1278
|
-
continue;
|
|
1279
|
-
for (const attr of attributes) {
|
|
1280
|
-
if (!Array.isArray(attr))
|
|
1281
|
-
continue;
|
|
1282
|
-
let isVector = false;
|
|
1283
|
-
let dim = 0;
|
|
1284
|
-
for (let j = 0; j < attr.length - 1; j++) {
|
|
1285
|
-
const attrKey = String(attr[j]);
|
|
1286
|
-
if (attrKey === 'type' && String(attr[j + 1]) === 'VECTOR')
|
|
1287
|
-
isVector = true;
|
|
1288
|
-
if (attrKey.toLowerCase() === 'dim')
|
|
1289
|
-
dim = parseInt(String(attr[j + 1]), 10) || 0;
|
|
1290
|
-
// Valkey Search 1.2 nests dimension inside an 'index' sub-array
|
|
1291
|
-
if (attrKey === 'index' && Array.isArray(attr[j + 1])) {
|
|
1292
|
-
const indexArr = attr[j + 1];
|
|
1293
|
-
for (let k = 0; k < indexArr.length - 1; k++) {
|
|
1294
|
-
if (String(indexArr[k]) === 'dimensions') {
|
|
1295
|
-
const d = parseInt(String(indexArr[k + 1]), 10) || 0;
|
|
1296
|
-
if (d > 0)
|
|
1297
|
-
dim = d;
|
|
1298
|
-
}
|
|
1299
|
-
}
|
|
1300
|
-
}
|
|
1301
|
-
}
|
|
1302
|
-
if (isVector && dim > 0)
|
|
1303
|
-
return dim;
|
|
1304
|
-
}
|
|
1305
|
-
}
|
|
1306
|
-
return 0;
|
|
1307
|
-
}
|
|
1308
1412
|
}
|
|
1309
1413
|
exports.SemanticCache = SemanticCache;
|
|
1310
1414
|
// --- Judge helpers ---
|
package/dist/index.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ export { SemanticCache } from './SemanticCache';
|
|
|
2
2
|
export type { ThresholdEffectivenessResult } from './SemanticCache';
|
|
3
3
|
export { DEFAULT_COST_TABLE } from './defaultCostTable';
|
|
4
4
|
export type { SemanticCacheOptions, CacheCheckOptions, CacheStoreOptions, CacheCheckResult, CacheStats, IndexInfo, InvalidateResult, CacheConfidence, EmbedFn, ModelCost, RerankOptions, JudgeOptions, ConfigRefreshOptions, } from './types';
|
|
5
|
+
export { createKeywordOverlapRerank } from './rerank';
|
|
5
6
|
export { SemanticCacheUsageError, EmbeddingError, ValkeyCommandError, } from './errors';
|
|
6
7
|
export type { ContentBlock, TextBlock, BinaryBlock, ToolCallBlock, ToolResultBlock, ReasoningBlock, BlockHints, } from './utils';
|
|
7
8
|
export { escapeTag } from './utils';
|
package/dist/index.js
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.defaultNormalizer = exports.composeNormalizer = exports.passthrough = exports.fetchAndHash = exports.hashUrl = exports.hashBytes = exports.hashBase64 = exports.escapeTag = exports.ValkeyCommandError = exports.EmbeddingError = exports.SemanticCacheUsageError = exports.DEFAULT_COST_TABLE = exports.SemanticCache = void 0;
|
|
3
|
+
exports.defaultNormalizer = exports.composeNormalizer = exports.passthrough = exports.fetchAndHash = exports.hashUrl = exports.hashBytes = exports.hashBase64 = exports.escapeTag = exports.ValkeyCommandError = exports.EmbeddingError = exports.SemanticCacheUsageError = exports.createKeywordOverlapRerank = exports.DEFAULT_COST_TABLE = exports.SemanticCache = void 0;
|
|
4
4
|
var SemanticCache_1 = require("./SemanticCache");
|
|
5
5
|
Object.defineProperty(exports, "SemanticCache", { enumerable: true, get: function () { return SemanticCache_1.SemanticCache; } });
|
|
6
6
|
var defaultCostTable_1 = require("./defaultCostTable");
|
|
7
7
|
Object.defineProperty(exports, "DEFAULT_COST_TABLE", { enumerable: true, get: function () { return defaultCostTable_1.DEFAULT_COST_TABLE; } });
|
|
8
|
+
var rerank_1 = require("./rerank");
|
|
9
|
+
Object.defineProperty(exports, "createKeywordOverlapRerank", { enumerable: true, get: function () { return rerank_1.createKeywordOverlapRerank; } });
|
|
8
10
|
var errors_1 = require("./errors");
|
|
9
11
|
Object.defineProperty(exports, "SemanticCacheUsageError", { enumerable: true, get: function () { return errors_1.SemanticCacheUsageError; } });
|
|
10
12
|
Object.defineProperty(exports, "EmbeddingError", { enumerable: true, get: function () { return errors_1.EmbeddingError; } });
|
package/dist/rerank.d.ts
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Built-in rerank factories for @betterdb/semantic-cache.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Built-in keyword-overlap reranker.
|
|
6
|
+
*
|
|
7
|
+
* Blends cosine similarity with word overlap and returns the index of the
|
|
8
|
+
* best candidate.
|
|
9
|
+
*
|
|
10
|
+
* @param compare
|
|
11
|
+
* `"prompt"` – overlap of the incoming query against each candidate's stored
|
|
12
|
+
* prompt. Equivalence signal. Catches entity mismatches
|
|
13
|
+
* (e.g. "weather in Paris" vs "weather in Berlin"). Default.
|
|
14
|
+
* `"response"` – overlap of the incoming query against each candidate's cached
|
|
15
|
+
* response. Relevance signal.
|
|
16
|
+
*
|
|
17
|
+
* @param cosineWeight
|
|
18
|
+
* Weight on cosine similarity in [0, 1]. Overlap weight is `1 - cosineWeight`.
|
|
19
|
+
* Default: 0.7 (overlap 0.3).
|
|
20
|
+
*
|
|
21
|
+
* Candidate objects carry: `similarity` (cosine distance, lower = more similar),
|
|
22
|
+
* `response` (string), and `prompt` (string, stored prompt).
|
|
23
|
+
*/
|
|
24
|
+
export declare function createKeywordOverlapRerank(options?: {
|
|
25
|
+
compare?: 'prompt' | 'response';
|
|
26
|
+
cosineWeight?: number;
|
|
27
|
+
}): (query: string, candidates: Array<{
|
|
28
|
+
response: string;
|
|
29
|
+
similarity: number;
|
|
30
|
+
prompt: string;
|
|
31
|
+
}>) => Promise<number>;
|
package/dist/rerank.js
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Built-in rerank factories for @betterdb/semantic-cache.
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.createKeywordOverlapRerank = createKeywordOverlapRerank;
|
|
7
|
+
/**
|
|
8
|
+
* Tokenize: lowercase, split on whitespace, strip surrounding punctuation.
|
|
9
|
+
* Deterministic and dependency-free.
|
|
10
|
+
* IDF weighting would attach here at the token-weighting step.
|
|
11
|
+
*/
|
|
12
|
+
function tokenize(text) {
|
|
13
|
+
const out = new Set();
|
|
14
|
+
for (const raw of text.toLowerCase().split(/\s+/)) {
|
|
15
|
+
const tok = raw.replace(/^[.,!?;:"'()\[\]{}<>]+|[.,!?;:"'()\[\]{}<>]+$/g, '');
|
|
16
|
+
if (tok)
|
|
17
|
+
out.add(tok);
|
|
18
|
+
}
|
|
19
|
+
return out;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Built-in keyword-overlap reranker.
|
|
23
|
+
*
|
|
24
|
+
* Blends cosine similarity with word overlap and returns the index of the
|
|
25
|
+
* best candidate.
|
|
26
|
+
*
|
|
27
|
+
* @param compare
|
|
28
|
+
* `"prompt"` – overlap of the incoming query against each candidate's stored
|
|
29
|
+
* prompt. Equivalence signal. Catches entity mismatches
|
|
30
|
+
* (e.g. "weather in Paris" vs "weather in Berlin"). Default.
|
|
31
|
+
* `"response"` – overlap of the incoming query against each candidate's cached
|
|
32
|
+
* response. Relevance signal.
|
|
33
|
+
*
|
|
34
|
+
* @param cosineWeight
|
|
35
|
+
* Weight on cosine similarity in [0, 1]. Overlap weight is `1 - cosineWeight`.
|
|
36
|
+
* Default: 0.7 (overlap 0.3).
|
|
37
|
+
*
|
|
38
|
+
* Candidate objects carry: `similarity` (cosine distance, lower = more similar),
|
|
39
|
+
* `response` (string), and `prompt` (string, stored prompt).
|
|
40
|
+
*/
|
|
41
|
+
function createKeywordOverlapRerank(options) {
|
|
42
|
+
const compare = options?.compare ?? 'prompt';
|
|
43
|
+
const cosineWeight = options?.cosineWeight ?? 0.7;
|
|
44
|
+
if (cosineWeight < 0 || cosineWeight > 1) {
|
|
45
|
+
throw new Error('cosineWeight must be in [0, 1]');
|
|
46
|
+
}
|
|
47
|
+
const overlapWeight = 1.0 - cosineWeight;
|
|
48
|
+
return async (query, candidates) => {
|
|
49
|
+
const queryTokens = tokenize(query);
|
|
50
|
+
let bestIdx = 0;
|
|
51
|
+
let bestScore = -Infinity;
|
|
52
|
+
for (let i = 0; i < candidates.length; i++) {
|
|
53
|
+
const text = candidates[i][compare] ?? '';
|
|
54
|
+
const candTokens = tokenize(text);
|
|
55
|
+
let overlap = 0;
|
|
56
|
+
if (queryTokens.size > 0) {
|
|
57
|
+
let intersection = 0;
|
|
58
|
+
for (const t of queryTokens) {
|
|
59
|
+
if (candTokens.has(t))
|
|
60
|
+
intersection++;
|
|
61
|
+
}
|
|
62
|
+
overlap = intersection / queryTokens.size;
|
|
63
|
+
}
|
|
64
|
+
const cosineSim = 1.0 - candidates[i].similarity;
|
|
65
|
+
const score = cosineWeight * cosineSim + overlapWeight * overlap;
|
|
66
|
+
if (score > bestScore) {
|
|
67
|
+
bestScore = score;
|
|
68
|
+
bestIdx = i;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return bestIdx;
|
|
72
|
+
};
|
|
73
|
+
}
|
package/dist/types.d.ts
CHANGED
|
@@ -127,6 +127,7 @@ export interface RerankOptions {
|
|
|
127
127
|
rerankFn: (query: string, candidates: Array<{
|
|
128
128
|
response: string;
|
|
129
129
|
similarity: number;
|
|
130
|
+
prompt: string;
|
|
130
131
|
}>) => Promise<number>;
|
|
131
132
|
}
|
|
132
133
|
/**
|
|
@@ -162,6 +163,8 @@ export interface JudgeOptions {
|
|
|
162
163
|
similarity: number;
|
|
163
164
|
threshold: number;
|
|
164
165
|
category: string | undefined;
|
|
166
|
+
/** The stored prompt text for the matched entry. */
|
|
167
|
+
cachedPrompt: string;
|
|
165
168
|
}) => Promise<boolean>;
|
|
166
169
|
/**
|
|
167
170
|
* Behavior when judgeFn throws or exceeds timeoutMs.
|
package/dist/utils.d.ts
CHANGED
|
@@ -1,10 +1,6 @@
|
|
|
1
1
|
/** SHA-256 hex digest of a string. */
|
|
2
2
|
export declare function sha256(text: string): string;
|
|
3
|
-
|
|
4
|
-
* Spaces are included because Valkey Search treats unescaped spaces as term
|
|
5
|
-
* separators (OR semantics), which would broaden the filter unintentionally.
|
|
6
|
-
*/
|
|
7
|
-
export declare function escapeTag(value: string): string;
|
|
3
|
+
export { escapeTag, encodeFloat32, parseFtSearchResponse } from '@betterdb/valkey-search-kit';
|
|
8
4
|
export type ContentBlock = TextBlock | BinaryBlock | ToolCallBlock | ToolResultBlock | ReasoningBlock;
|
|
9
5
|
export interface TextBlock {
|
|
10
6
|
type: 'text';
|
|
@@ -58,26 +54,3 @@ export declare function extractText(blocks: ContentBlock[]): string;
|
|
|
58
54
|
* Used for the binary_refs TAG field on cache entries.
|
|
59
55
|
*/
|
|
60
56
|
export declare function extractBinaryRefs(blocks: ContentBlock[]): string[];
|
|
61
|
-
/**
|
|
62
|
-
* Encode number[] as a little-endian Float32 Buffer.
|
|
63
|
-
* Used to store embeddings as binary HSET field values.
|
|
64
|
-
*/
|
|
65
|
-
export declare function encodeFloat32(vec: number[]): Buffer;
|
|
66
|
-
/**
|
|
67
|
-
* Parse a raw FT.SEARCH response from iovalkey's client.call().
|
|
68
|
-
*
|
|
69
|
-
* iovalkey returns FT.SEARCH results in the following shape:
|
|
70
|
-
* [totalCount, key1, [field1, val1, field2, val2, ...], key2, [...], ...]
|
|
71
|
-
*
|
|
72
|
-
* - totalCount is a string (e.g. "2")
|
|
73
|
-
* - Each key is a string
|
|
74
|
-
* - Each field list is a flat string array: [fieldName, value, fieldName, value, ...]
|
|
75
|
-
*
|
|
76
|
-
* Returns an array of { key: string, fields: Record<string, string> }.
|
|
77
|
-
* Returns [] if totalCount is "0" or the response is empty/malformed.
|
|
78
|
-
* Never throws — on any parse error, returns [].
|
|
79
|
-
*/
|
|
80
|
-
export declare function parseFtSearchResponse(raw: unknown): Array<{
|
|
81
|
-
key: string;
|
|
82
|
-
fields: Record<string, string>;
|
|
83
|
-
}>;
|
package/dist/utils.js
CHANGED
|
@@ -1,23 +1,18 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.parseFtSearchResponse = exports.encodeFloat32 = exports.escapeTag = void 0;
|
|
3
4
|
exports.sha256 = sha256;
|
|
4
|
-
exports.escapeTag = escapeTag;
|
|
5
5
|
exports.extractText = extractText;
|
|
6
6
|
exports.extractBinaryRefs = extractBinaryRefs;
|
|
7
|
-
exports.encodeFloat32 = encodeFloat32;
|
|
8
|
-
exports.parseFtSearchResponse = parseFtSearchResponse;
|
|
9
7
|
const node_crypto_1 = require("node:crypto");
|
|
10
8
|
/** SHA-256 hex digest of a string. */
|
|
11
9
|
function sha256(text) {
|
|
12
10
|
return (0, node_crypto_1.createHash)('sha256').update(text).digest('hex');
|
|
13
11
|
}
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
function escapeTag(value) {
|
|
19
|
-
return value.replace(/[,.<>{}[\]"':;!@#$%^&*()\-+=~|/\\ ]/g, '\\$&');
|
|
20
|
-
}
|
|
12
|
+
var valkey_search_kit_1 = require("@betterdb/valkey-search-kit");
|
|
13
|
+
Object.defineProperty(exports, "escapeTag", { enumerable: true, get: function () { return valkey_search_kit_1.escapeTag; } });
|
|
14
|
+
Object.defineProperty(exports, "encodeFloat32", { enumerable: true, get: function () { return valkey_search_kit_1.encodeFloat32; } });
|
|
15
|
+
Object.defineProperty(exports, "parseFtSearchResponse", { enumerable: true, get: function () { return valkey_search_kit_1.parseFtSearchResponse; } });
|
|
21
16
|
/**
|
|
22
17
|
* Extract all text from a ContentBlock array, joining TextBlock.text values with a space.
|
|
23
18
|
* Used to derive the embedding text from a multi-modal prompt.
|
|
@@ -38,70 +33,3 @@ function extractBinaryRefs(blocks) {
|
|
|
38
33
|
.map((b) => b.ref)
|
|
39
34
|
.sort();
|
|
40
35
|
}
|
|
41
|
-
/**
|
|
42
|
-
* Encode number[] as a little-endian Float32 Buffer.
|
|
43
|
-
* Used to store embeddings as binary HSET field values.
|
|
44
|
-
*/
|
|
45
|
-
function encodeFloat32(vec) {
|
|
46
|
-
const buf = Buffer.alloc(vec.length * 4);
|
|
47
|
-
for (let i = 0; i < vec.length; i++) {
|
|
48
|
-
buf.writeFloatLE(vec[i], i * 4);
|
|
49
|
-
}
|
|
50
|
-
return buf;
|
|
51
|
-
}
|
|
52
|
-
/**
|
|
53
|
-
* Parse a raw FT.SEARCH response from iovalkey's client.call().
|
|
54
|
-
*
|
|
55
|
-
* iovalkey returns FT.SEARCH results in the following shape:
|
|
56
|
-
* [totalCount, key1, [field1, val1, field2, val2, ...], key2, [...], ...]
|
|
57
|
-
*
|
|
58
|
-
* - totalCount is a string (e.g. "2")
|
|
59
|
-
* - Each key is a string
|
|
60
|
-
* - Each field list is a flat string array: [fieldName, value, fieldName, value, ...]
|
|
61
|
-
*
|
|
62
|
-
* Returns an array of { key: string, fields: Record<string, string> }.
|
|
63
|
-
* Returns [] if totalCount is "0" or the response is empty/malformed.
|
|
64
|
-
* Never throws — on any parse error, returns [].
|
|
65
|
-
*/
|
|
66
|
-
function parseFtSearchResponse(raw) {
|
|
67
|
-
try {
|
|
68
|
-
if (!Array.isArray(raw) || raw.length < 1) {
|
|
69
|
-
return [];
|
|
70
|
-
}
|
|
71
|
-
const totalCount = typeof raw[0] === 'string' ? parseInt(raw[0], 10) : Number(raw[0]);
|
|
72
|
-
if (!totalCount || totalCount <= 0) {
|
|
73
|
-
return [];
|
|
74
|
-
}
|
|
75
|
-
const results = [];
|
|
76
|
-
let i = 1;
|
|
77
|
-
while (i < raw.length) {
|
|
78
|
-
const key = raw[i];
|
|
79
|
-
if (typeof key !== 'string') {
|
|
80
|
-
i++;
|
|
81
|
-
continue;
|
|
82
|
-
}
|
|
83
|
-
const fieldList = raw[i + 1];
|
|
84
|
-
const fields = {};
|
|
85
|
-
if (Array.isArray(fieldList)) {
|
|
86
|
-
const len = fieldList.length - (fieldList.length % 2);
|
|
87
|
-
for (let j = 0; j < len; j += 2) {
|
|
88
|
-
const fieldName = String(fieldList[j]);
|
|
89
|
-
const fieldValue = String(fieldList[j + 1]);
|
|
90
|
-
fields[fieldName] = fieldValue;
|
|
91
|
-
}
|
|
92
|
-
i += 2;
|
|
93
|
-
}
|
|
94
|
-
else {
|
|
95
|
-
// No field list follows the key (e.g. RETURN 0 mode)
|
|
96
|
-
results.push({ key, fields });
|
|
97
|
-
i++;
|
|
98
|
-
continue;
|
|
99
|
-
}
|
|
100
|
-
results.push({ key, fields });
|
|
101
|
-
}
|
|
102
|
-
return results;
|
|
103
|
-
}
|
|
104
|
-
catch {
|
|
105
|
-
return [];
|
|
106
|
-
}
|
|
107
|
-
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@betterdb/semantic-cache",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"description": "Valkey-native semantic cache for LLM applications with built-in OpenTelemetry and Prometheus instrumentation",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"valkey",
|
|
@@ -93,18 +93,11 @@
|
|
|
93
93
|
"dist",
|
|
94
94
|
"README.md"
|
|
95
95
|
],
|
|
96
|
-
"scripts": {
|
|
97
|
-
"build": "tsc && node scripts/inject-telemetry-defaults.mjs",
|
|
98
|
-
"typecheck": "tsc --noEmit",
|
|
99
|
-
"test": "vitest run",
|
|
100
|
-
"test:watch": "vitest",
|
|
101
|
-
"clean": "rm -rf dist",
|
|
102
|
-
"update:pricing": "node scripts/update-model-prices.mjs"
|
|
103
|
-
},
|
|
104
96
|
"dependencies": {
|
|
105
97
|
"@opentelemetry/api": "^1.9.0",
|
|
106
98
|
"posthog-node": ">=4.0.0",
|
|
107
|
-
"prom-client": "^15.1.3"
|
|
99
|
+
"prom-client": "^15.1.3",
|
|
100
|
+
"@betterdb/valkey-search-kit": "0.1.0"
|
|
108
101
|
},
|
|
109
102
|
"engines": {
|
|
110
103
|
"node": ">=20.0.0"
|
|
@@ -137,5 +130,13 @@
|
|
|
137
130
|
"openai": {
|
|
138
131
|
"optional": true
|
|
139
132
|
}
|
|
133
|
+
},
|
|
134
|
+
"scripts": {
|
|
135
|
+
"build": "tsc && node scripts/inject-telemetry-defaults.mjs",
|
|
136
|
+
"typecheck": "tsc --noEmit",
|
|
137
|
+
"test": "vitest run",
|
|
138
|
+
"test:watch": "vitest",
|
|
139
|
+
"clean": "rm -rf dist",
|
|
140
|
+
"update:pricing": "node scripts/update-model-prices.mjs"
|
|
140
141
|
}
|
|
141
|
-
}
|
|
142
|
+
}
|