@betterdb/semantic-cache 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/SemanticCache.d.ts +14 -0
- package/dist/SemanticCache.js +259 -104
- package/dist/index.d.ts +1 -0
- package/dist/index.js +3 -1
- package/dist/rerank.d.ts +31 -0
- package/dist/rerank.js +73 -0
- package/dist/types.d.ts +3 -0
- package/package.json +1 -1
package/dist/SemanticCache.d.ts
CHANGED
|
@@ -8,6 +8,7 @@ export declare class SemanticCache {
|
|
|
8
8
|
private readonly entryPrefix;
|
|
9
9
|
private readonly statsKey;
|
|
10
10
|
private readonly similarityWindowKey;
|
|
11
|
+
private readonly missPendingKey;
|
|
11
12
|
private readonly configKey;
|
|
12
13
|
private defaultThreshold;
|
|
13
14
|
private readonly defaultTtl;
|
|
@@ -159,6 +160,19 @@ export declare class SemanticCache {
|
|
|
159
160
|
private recordStat;
|
|
160
161
|
/** Append to the rolling similarity window sorted set and trim to 10,000 entries or 7 days. */
|
|
161
162
|
private recordSimilarityWindow;
|
|
163
|
+
/**
|
|
164
|
+
* Track a miss so a subsequent store() can backfill its cost into the
|
|
165
|
+
* similarity-window record. Bounded by a 5-minute TTL on the bookkeeping
|
|
166
|
+
* zset — entries beyond that are pruned on every record and backfill.
|
|
167
|
+
*/
|
|
168
|
+
private recordMissPending;
|
|
169
|
+
/**
|
|
170
|
+
* After a successful store(), find the oldest pending miss for the same
|
|
171
|
+
* query and update its similarity-window record with the now-known cost.
|
|
172
|
+
* Best-effort — silently no-op if no pending miss exists or the bookkeeping
|
|
173
|
+
* entry has already been pruned.
|
|
174
|
+
*/
|
|
175
|
+
private applyCostToPendingMiss;
|
|
162
176
|
private assertInitialized;
|
|
163
177
|
private assertDimension;
|
|
164
178
|
private isIndexNotFoundError;
|
package/dist/SemanticCache.js
CHANGED
|
@@ -16,6 +16,19 @@ const PACKAGE_VERSION = require('../package.json').version;
|
|
|
16
16
|
function errMsg(err) {
|
|
17
17
|
return err instanceof Error ? err.message : String(err);
|
|
18
18
|
}
|
|
19
|
+
function parseHitCostMicros(raw) {
|
|
20
|
+
if (raw === undefined || raw === null) {
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
const n = parseInt(raw, 10);
|
|
24
|
+
if (!Number.isFinite(n) || n < 0) {
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
return n;
|
|
28
|
+
}
|
|
29
|
+
function correlationIdFor(prompt) {
|
|
30
|
+
return (0, node_crypto_2.createHash)('sha256').update(prompt).digest('hex').slice(0, 16);
|
|
31
|
+
}
|
|
19
32
|
class SemanticCache {
|
|
20
33
|
client;
|
|
21
34
|
embedFn;
|
|
@@ -24,6 +37,7 @@ class SemanticCache {
|
|
|
24
37
|
entryPrefix;
|
|
25
38
|
statsKey;
|
|
26
39
|
similarityWindowKey;
|
|
40
|
+
missPendingKey;
|
|
27
41
|
configKey;
|
|
28
42
|
defaultThreshold;
|
|
29
43
|
defaultTtl;
|
|
@@ -68,6 +82,7 @@ class SemanticCache {
|
|
|
68
82
|
this.entryPrefix = `${this.name}:entry:`;
|
|
69
83
|
this.statsKey = `${this.name}:__stats`;
|
|
70
84
|
this.similarityWindowKey = `${this.name}:__similarity_window`;
|
|
85
|
+
this.missPendingKey = `${this.name}:__miss_pending`;
|
|
71
86
|
this.configKey = `${this.name}:__config`;
|
|
72
87
|
this.embedKeyPrefix = `${this.name}:embed:`;
|
|
73
88
|
this.defaultThreshold = options.defaultThreshold ?? 0.1;
|
|
@@ -141,10 +156,7 @@ class SemanticCache {
|
|
|
141
156
|
}
|
|
142
157
|
}
|
|
143
158
|
// Cluster-aware SCAN for entry keys and embed cache keys
|
|
144
|
-
const patterns = [
|
|
145
|
-
`${this.name}:entry:*`,
|
|
146
|
-
`${this.name}:embed:*`,
|
|
147
|
-
];
|
|
159
|
+
const patterns = [`${this.name}:entry:*`, `${this.name}:embed:*`];
|
|
148
160
|
for (const pattern of patterns) {
|
|
149
161
|
await (0, cluster_1.clusterScan)(this.client, pattern, async (keys, nodeClient) => {
|
|
150
162
|
await nodeClient.del(keys);
|
|
@@ -152,6 +164,7 @@ class SemanticCache {
|
|
|
152
164
|
}
|
|
153
165
|
await this.client.del(this.statsKey);
|
|
154
166
|
await this.client.del(this.similarityWindowKey);
|
|
167
|
+
await this.client.del(this.missPendingKey);
|
|
155
168
|
this.analytics.capture('cache_flush');
|
|
156
169
|
}
|
|
157
170
|
/**
|
|
@@ -212,9 +225,9 @@ class SemanticCache {
|
|
|
212
225
|
const userFilter = options?.filter;
|
|
213
226
|
// AND semantics: each ref must be present — chain separate TAG clauses.
|
|
214
227
|
const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
|
|
215
|
-
?
|
|
228
|
+
? binaryRefs.length === 1
|
|
216
229
|
? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
|
|
217
|
-
: binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' ')
|
|
230
|
+
: binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' ')
|
|
218
231
|
: null;
|
|
219
232
|
const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
|
|
220
233
|
const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
|
|
@@ -230,15 +243,18 @@ class SemanticCache {
|
|
|
230
243
|
const searchMs = performance.now() - searchStart;
|
|
231
244
|
const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
|
|
232
245
|
const categoryLabel = category || 'none';
|
|
233
|
-
const timingAttrs = {
|
|
246
|
+
const timingAttrs = { embedding_latency_ms: embedSec * 1000, search_latency_ms: searchMs };
|
|
234
247
|
// No candidates at all
|
|
235
248
|
if (parsed.length === 0) {
|
|
236
249
|
await this.recordStat('misses');
|
|
237
250
|
this.telemetry.metrics.requestsTotal
|
|
238
|
-
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
251
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
252
|
+
.inc();
|
|
239
253
|
span.setAttributes({
|
|
240
|
-
'cache.hit': false,
|
|
241
|
-
'cache.
|
|
254
|
+
'cache.hit': false,
|
|
255
|
+
'cache.name': this.name,
|
|
256
|
+
'cache.category': categoryLabel,
|
|
257
|
+
...timingAttrs,
|
|
242
258
|
});
|
|
243
259
|
return { hit: false, confidence: 'miss' };
|
|
244
260
|
}
|
|
@@ -246,19 +262,24 @@ class SemanticCache {
|
|
|
246
262
|
const score = scoreStr !== undefined ? parseFloat(scoreStr) : NaN;
|
|
247
263
|
if (!isNaN(score)) {
|
|
248
264
|
this.telemetry.metrics.similarityScore
|
|
249
|
-
.labels({ cache_name: this.name, category: categoryLabel })
|
|
265
|
+
.labels({ cache_name: this.name, category: categoryLabel })
|
|
266
|
+
.observe(score);
|
|
250
267
|
}
|
|
251
268
|
// Miss (no usable score, or score exceeds threshold)
|
|
252
269
|
if (isNaN(score) || score > threshold) {
|
|
253
270
|
if (!isNaN(score)) {
|
|
254
|
-
await this.recordSimilarityWindow(score, 'miss', category);
|
|
271
|
+
const missMember = await this.recordSimilarityWindow(score, 'miss', category, null);
|
|
272
|
+
await this.recordMissPending(promptText, missMember);
|
|
255
273
|
}
|
|
256
274
|
await this.recordStat('misses');
|
|
257
275
|
this.telemetry.metrics.requestsTotal
|
|
258
|
-
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
276
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
277
|
+
.inc();
|
|
259
278
|
span.setAttributes({
|
|
260
|
-
'cache.hit': false,
|
|
261
|
-
'cache.
|
|
279
|
+
'cache.hit': false,
|
|
280
|
+
'cache.name': this.name,
|
|
281
|
+
'cache.category': categoryLabel,
|
|
282
|
+
...timingAttrs,
|
|
262
283
|
...(isNaN(score) ? {} : { 'cache.similarity': score, 'cache.threshold': threshold }),
|
|
263
284
|
});
|
|
264
285
|
const result = { hit: false, confidence: 'miss' };
|
|
@@ -278,17 +299,23 @@ class SemanticCache {
|
|
|
278
299
|
.filter(({ s }) => !isNaN(s))
|
|
279
300
|
.map(({ i, s }) => ({
|
|
280
301
|
origIdx: i,
|
|
281
|
-
candidate: { response: parsed[i].fields['response'] ?? '', similarity: s },
|
|
302
|
+
candidate: { response: parsed[i].fields['response'] ?? '', similarity: s, prompt: parsed[i].fields['prompt'] ?? '' },
|
|
282
303
|
}));
|
|
283
304
|
const picked = await rerankOpts.rerankFn(promptText, indexedCandidates.map((x) => x.candidate));
|
|
284
305
|
// Explicit bounds check: -1 means "reject all"; out-of-range is a caller bug
|
|
285
306
|
// treated as a miss rather than silently falling back to the top candidate.
|
|
286
307
|
if (picked === -1 || picked < 0 || picked >= indexedCandidates.length) {
|
|
287
|
-
await this.recordSimilarityWindow(score, 'miss', category);
|
|
308
|
+
const missMember = await this.recordSimilarityWindow(score, 'miss', category, null);
|
|
309
|
+
await this.recordMissPending(promptText, missMember);
|
|
288
310
|
await this.recordStat('misses');
|
|
289
311
|
this.telemetry.metrics.requestsTotal
|
|
290
|
-
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
291
|
-
|
|
312
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
313
|
+
.inc();
|
|
314
|
+
span.setAttributes({
|
|
315
|
+
'cache.hit': false,
|
|
316
|
+
'cache.name': this.name,
|
|
317
|
+
'cache.reranked': true,
|
|
318
|
+
});
|
|
292
319
|
return { hit: false, confidence: 'miss' };
|
|
293
320
|
}
|
|
294
321
|
// Map back to the original parsed[] index (not the candidates[] index)
|
|
@@ -304,12 +331,16 @@ class SemanticCache {
|
|
|
304
331
|
try {
|
|
305
332
|
await this.client.del(winner.key);
|
|
306
333
|
}
|
|
307
|
-
catch {
|
|
308
|
-
|
|
334
|
+
catch {
|
|
335
|
+
/* best effort */
|
|
336
|
+
}
|
|
337
|
+
const missMember = await this.recordSimilarityWindow(winnerScore, 'miss', category, null);
|
|
338
|
+
await this.recordMissPending(promptText, missMember);
|
|
309
339
|
this.telemetry.metrics.staleModelEvictions.labels({ cache_name: this.name }).inc();
|
|
310
340
|
await this.recordStat('misses');
|
|
311
341
|
this.telemetry.metrics.requestsTotal
|
|
312
|
-
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
342
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
343
|
+
.inc();
|
|
313
344
|
span.setAttributes({ 'cache.hit': false, 'cache.stale_evicted': true });
|
|
314
345
|
return { hit: false, confidence: 'miss' };
|
|
315
346
|
}
|
|
@@ -330,6 +361,8 @@ class SemanticCache {
|
|
|
330
361
|
similarity: winnerScore,
|
|
331
362
|
threshold,
|
|
332
363
|
category: category || undefined,
|
|
364
|
+
// Reserved for consumer judge functions; not consumed by the built-in judge path.
|
|
365
|
+
cachedPrompt: winner.fields['prompt'] ?? '',
|
|
333
366
|
}), timeoutMs);
|
|
334
367
|
decision = accepted ? 'accept' : 'reject';
|
|
335
368
|
}
|
|
@@ -363,7 +396,8 @@ class SemanticCache {
|
|
|
363
396
|
}
|
|
364
397
|
else {
|
|
365
398
|
// reject / error_reject / timeout_reject → treat as miss
|
|
366
|
-
await this.recordSimilarityWindow(winnerScore, 'miss', category);
|
|
399
|
+
const missMember = await this.recordSimilarityWindow(winnerScore, 'miss', category, null);
|
|
400
|
+
await this.recordMissPending(promptText, missMember);
|
|
367
401
|
await this.recordStat('misses');
|
|
368
402
|
this.telemetry.metrics.requestsTotal
|
|
369
403
|
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
@@ -387,27 +421,26 @@ class SemanticCache {
|
|
|
387
421
|
}
|
|
388
422
|
}
|
|
389
423
|
// --- End judge ---
|
|
424
|
+
const hitCostMicros = parseHitCostMicros(winner.fields['cost_micros']);
|
|
390
425
|
// Record as genuine hit (moved here from before the judge block)
|
|
391
|
-
await this.recordSimilarityWindow(winnerScore, 'hit', category);
|
|
426
|
+
await this.recordSimilarityWindow(winnerScore, 'hit', category, hitCostMicros);
|
|
392
427
|
await this.recordStat('hits');
|
|
393
428
|
const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
|
|
394
429
|
this.telemetry.metrics.requestsTotal
|
|
395
|
-
.labels({ cache_name: this.name, result: metricResult, category: categoryLabel })
|
|
430
|
+
.labels({ cache_name: this.name, result: metricResult, category: categoryLabel })
|
|
431
|
+
.inc();
|
|
396
432
|
if (this.defaultTtl !== undefined && matchedKey) {
|
|
397
433
|
await this.client.expire(matchedKey, this.defaultTtl);
|
|
398
434
|
}
|
|
399
435
|
// Cost saved
|
|
400
436
|
let costSaved;
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
this.telemetry.metrics.costSavedTotal
|
|
409
|
-
.labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
|
|
410
|
-
}
|
|
437
|
+
if (hitCostMicros !== null) {
|
|
438
|
+
costSaved = hitCostMicros / 1_000_000;
|
|
439
|
+
// Atomically increment cost_saved_micros in stats
|
|
440
|
+
await this.client.hincrby(this.statsKey, 'cost_saved_micros', hitCostMicros);
|
|
441
|
+
this.telemetry.metrics.costSavedTotal
|
|
442
|
+
.labels({ cache_name: this.name, category: categoryLabel })
|
|
443
|
+
.inc(costSaved);
|
|
411
444
|
}
|
|
412
445
|
// Content blocks
|
|
413
446
|
let contentBlocks;
|
|
@@ -416,16 +449,25 @@ class SemanticCache {
|
|
|
416
449
|
try {
|
|
417
450
|
contentBlocks = JSON.parse(contentBlocksStr);
|
|
418
451
|
}
|
|
419
|
-
catch {
|
|
452
|
+
catch {
|
|
453
|
+
/* ignore parse errors */
|
|
454
|
+
}
|
|
420
455
|
}
|
|
421
456
|
span.setAttributes({
|
|
422
|
-
'cache.hit': true,
|
|
423
|
-
'cache.
|
|
424
|
-
'cache.
|
|
457
|
+
'cache.hit': true,
|
|
458
|
+
'cache.similarity': winnerScore,
|
|
459
|
+
'cache.threshold': threshold,
|
|
460
|
+
'cache.confidence': confidence,
|
|
461
|
+
'cache.matched_key': matchedKey,
|
|
462
|
+
'cache.category': categoryLabel,
|
|
463
|
+
...timingAttrs,
|
|
425
464
|
});
|
|
426
465
|
const result = {
|
|
427
|
-
hit: true,
|
|
428
|
-
|
|
466
|
+
hit: true,
|
|
467
|
+
response: winner.fields['response'],
|
|
468
|
+
similarity: winnerScore,
|
|
469
|
+
confidence,
|
|
470
|
+
matchedKey,
|
|
429
471
|
};
|
|
430
472
|
if (costSaved !== undefined)
|
|
431
473
|
result.costSaved = costSaved;
|
|
@@ -451,8 +493,9 @@ class SemanticCache {
|
|
|
451
493
|
this.costTable) {
|
|
452
494
|
const pricing = this.costTable[options.model];
|
|
453
495
|
if (pricing) {
|
|
454
|
-
costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
|
|
455
|
-
options.outputTokens * pricing.outputPer1k / 1000) *
|
|
496
|
+
costMicros = Math.round(((options.inputTokens * pricing.inputPer1k) / 1000 +
|
|
497
|
+
(options.outputTokens * pricing.outputPer1k) / 1000) *
|
|
498
|
+
1_000_000);
|
|
456
499
|
}
|
|
457
500
|
}
|
|
458
501
|
const hashFields = {
|
|
@@ -489,10 +532,16 @@ class SemanticCache {
|
|
|
489
532
|
if (ttl !== undefined)
|
|
490
533
|
await this.client.expire(entryKey, ttl);
|
|
491
534
|
span.setAttributes({
|
|
492
|
-
'cache.name': this.name,
|
|
493
|
-
'cache.
|
|
494
|
-
'
|
|
535
|
+
'cache.name': this.name,
|
|
536
|
+
'cache.key': entryKey,
|
|
537
|
+
'cache.ttl': ttl ?? -1,
|
|
538
|
+
'cache.category': category || 'none',
|
|
539
|
+
'cache.model': model || 'none',
|
|
540
|
+
embedding_latency_ms: embedSec * 1000,
|
|
495
541
|
});
|
|
542
|
+
if (costMicros !== undefined && costMicros >= 0) {
|
|
543
|
+
await this.applyCostToPendingMiss(promptText, costMicros);
|
|
544
|
+
}
|
|
496
545
|
return entryKey;
|
|
497
546
|
});
|
|
498
547
|
}
|
|
@@ -512,11 +561,15 @@ class SemanticCache {
|
|
|
512
561
|
const category = options?.category ?? '';
|
|
513
562
|
const model = options?.model ?? '';
|
|
514
563
|
let costMicros;
|
|
515
|
-
if (options?.model &&
|
|
564
|
+
if (options?.model &&
|
|
565
|
+
options?.inputTokens !== undefined &&
|
|
566
|
+
options?.outputTokens !== undefined &&
|
|
567
|
+
this.costTable) {
|
|
516
568
|
const pricing = this.costTable[options.model];
|
|
517
569
|
if (pricing) {
|
|
518
|
-
costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
|
|
519
|
-
options.outputTokens * pricing.outputPer1k / 1000) *
|
|
570
|
+
costMicros = Math.round(((options.inputTokens * pricing.inputPer1k) / 1000 +
|
|
571
|
+
(options.outputTokens * pricing.outputPer1k) / 1000) *
|
|
572
|
+
1_000_000);
|
|
520
573
|
}
|
|
521
574
|
}
|
|
522
575
|
const hashFields = {
|
|
@@ -535,8 +588,9 @@ class SemanticCache {
|
|
|
535
588
|
if (costMicros !== undefined && costMicros > 0) {
|
|
536
589
|
hashFields['cost_micros'] = String(costMicros);
|
|
537
590
|
}
|
|
538
|
-
if (options?.temperature !== undefined)
|
|
591
|
+
if (options?.temperature !== undefined) {
|
|
539
592
|
hashFields['temperature'] = String(options.temperature);
|
|
593
|
+
}
|
|
540
594
|
if (options?.topP !== undefined)
|
|
541
595
|
hashFields['top_p'] = String(options.topP);
|
|
542
596
|
if (options?.seed !== undefined)
|
|
@@ -551,10 +605,16 @@ class SemanticCache {
|
|
|
551
605
|
if (ttl !== undefined)
|
|
552
606
|
await this.client.expire(entryKey, ttl);
|
|
553
607
|
span.setAttributes({
|
|
554
|
-
'cache.name': this.name,
|
|
555
|
-
'cache.
|
|
556
|
-
'
|
|
608
|
+
'cache.name': this.name,
|
|
609
|
+
'cache.key': entryKey,
|
|
610
|
+
'cache.ttl': ttl ?? -1,
|
|
611
|
+
'cache.category': category || 'none',
|
|
612
|
+
'cache.model': model || 'none',
|
|
613
|
+
embedding_latency_ms: embedSec * 1000,
|
|
557
614
|
});
|
|
615
|
+
if (costMicros !== undefined && costMicros >= 0) {
|
|
616
|
+
await this.applyCostToPendingMiss(promptText, costMicros);
|
|
617
|
+
}
|
|
558
618
|
return entryKey;
|
|
559
619
|
});
|
|
560
620
|
}
|
|
@@ -592,9 +652,9 @@ class SemanticCache {
|
|
|
592
652
|
const { binaryRefs } = resolved[i];
|
|
593
653
|
const { vector: embedding } = embeddings[i];
|
|
594
654
|
const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
|
|
595
|
-
?
|
|
655
|
+
? binaryRefs.length === 1
|
|
596
656
|
? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
|
|
597
|
-
: binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' ')
|
|
657
|
+
: binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' ')
|
|
598
658
|
: null;
|
|
599
659
|
const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
|
|
600
660
|
const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
|
|
@@ -612,7 +672,8 @@ class SemanticCache {
|
|
|
612
672
|
if (err) {
|
|
613
673
|
await this.recordStat('misses');
|
|
614
674
|
this.telemetry.metrics.requestsTotal
|
|
615
|
-
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
675
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
676
|
+
.inc();
|
|
616
677
|
results.push({ hit: false, confidence: 'miss' });
|
|
617
678
|
continue;
|
|
618
679
|
}
|
|
@@ -620,7 +681,8 @@ class SemanticCache {
|
|
|
620
681
|
if (parsed.length === 0) {
|
|
621
682
|
await this.recordStat('misses');
|
|
622
683
|
this.telemetry.metrics.requestsTotal
|
|
623
|
-
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
684
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
685
|
+
.inc();
|
|
624
686
|
results.push({ hit: false, confidence: 'miss' });
|
|
625
687
|
continue;
|
|
626
688
|
}
|
|
@@ -628,11 +690,13 @@ class SemanticCache {
|
|
|
628
690
|
const score = scoreStr !== undefined ? parseFloat(scoreStr) : NaN;
|
|
629
691
|
if (isNaN(score) || score > threshold) {
|
|
630
692
|
if (!isNaN(score)) {
|
|
631
|
-
await this.recordSimilarityWindow(score, 'miss', category);
|
|
693
|
+
const missMember = await this.recordSimilarityWindow(score, 'miss', category, null);
|
|
694
|
+
await this.recordMissPending(resolved[i].text, missMember);
|
|
632
695
|
}
|
|
633
696
|
await this.recordStat('misses');
|
|
634
697
|
this.telemetry.metrics.requestsTotal
|
|
635
|
-
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
698
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
699
|
+
.inc();
|
|
636
700
|
const result = { hit: false, confidence: 'miss' };
|
|
637
701
|
if (!isNaN(score)) {
|
|
638
702
|
result.similarity = score;
|
|
@@ -641,26 +705,25 @@ class SemanticCache {
|
|
|
641
705
|
results.push(result);
|
|
642
706
|
continue;
|
|
643
707
|
}
|
|
644
|
-
|
|
708
|
+
const hitCostMicros = parseHitCostMicros(parsed[0].fields['cost_micros']);
|
|
709
|
+
await this.recordSimilarityWindow(score, 'hit', category, hitCostMicros);
|
|
645
710
|
const confidence = score >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
|
|
646
711
|
await this.recordStat('hits');
|
|
647
712
|
const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
|
|
648
713
|
this.telemetry.metrics.requestsTotal
|
|
649
|
-
.labels({ cache_name: this.name, result: metricResult, category: categoryLabel })
|
|
714
|
+
.labels({ cache_name: this.name, result: metricResult, category: categoryLabel })
|
|
715
|
+
.inc();
|
|
650
716
|
const matchedKey = parsed[0].key;
|
|
651
717
|
if (this.defaultTtl !== undefined && matchedKey) {
|
|
652
718
|
await this.client.expire(matchedKey, this.defaultTtl);
|
|
653
719
|
}
|
|
654
720
|
let costSaved;
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
this.telemetry.metrics.costSavedTotal
|
|
662
|
-
.labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
|
|
663
|
-
}
|
|
721
|
+
if (hitCostMicros !== null) {
|
|
722
|
+
costSaved = hitCostMicros / 1_000_000;
|
|
723
|
+
await this.client.hincrby(this.statsKey, 'cost_saved_micros', hitCostMicros);
|
|
724
|
+
this.telemetry.metrics.costSavedTotal
|
|
725
|
+
.labels({ cache_name: this.name, category: categoryLabel })
|
|
726
|
+
.inc(costSaved);
|
|
664
727
|
}
|
|
665
728
|
let contentBlocks;
|
|
666
729
|
const contentBlocksStr = parsed[0].fields['content_blocks'];
|
|
@@ -668,11 +731,16 @@ class SemanticCache {
|
|
|
668
731
|
try {
|
|
669
732
|
contentBlocks = JSON.parse(contentBlocksStr);
|
|
670
733
|
}
|
|
671
|
-
catch {
|
|
734
|
+
catch {
|
|
735
|
+
/* ignore */
|
|
736
|
+
}
|
|
672
737
|
}
|
|
673
738
|
const result = {
|
|
674
|
-
hit: true,
|
|
675
|
-
|
|
739
|
+
hit: true,
|
|
740
|
+
response: parsed[0].fields['response'],
|
|
741
|
+
similarity: score,
|
|
742
|
+
confidence,
|
|
743
|
+
matchedKey,
|
|
676
744
|
};
|
|
677
745
|
if (costSaved !== undefined)
|
|
678
746
|
result.costSaved = costSaved;
|
|
@@ -703,8 +771,10 @@ class SemanticCache {
|
|
|
703
771
|
const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
|
|
704
772
|
if (parsed.length === 0) {
|
|
705
773
|
span.setAttributes({
|
|
706
|
-
'cache.name': this.name,
|
|
707
|
-
'cache.
|
|
774
|
+
'cache.name': this.name,
|
|
775
|
+
'cache.filter': filter,
|
|
776
|
+
'cache.deleted_count': 0,
|
|
777
|
+
'cache.truncated': false,
|
|
708
778
|
});
|
|
709
779
|
return { deleted: 0, truncated: false };
|
|
710
780
|
}
|
|
@@ -717,8 +787,10 @@ class SemanticCache {
|
|
|
717
787
|
throw new errors_1.ValkeyCommandError('DEL', err);
|
|
718
788
|
}
|
|
719
789
|
span.setAttributes({
|
|
720
|
-
'cache.name': this.name,
|
|
721
|
-
'cache.
|
|
790
|
+
'cache.name': this.name,
|
|
791
|
+
'cache.filter': filter,
|
|
792
|
+
'cache.deleted_count': keys.length,
|
|
793
|
+
'cache.truncated': truncated,
|
|
722
794
|
});
|
|
723
795
|
return { deleted: keys.length, truncated };
|
|
724
796
|
});
|
|
@@ -803,7 +875,9 @@ class SemanticCache {
|
|
|
803
875
|
}
|
|
804
876
|
}
|
|
805
877
|
}
|
|
806
|
-
catch {
|
|
878
|
+
catch {
|
|
879
|
+
/* skip corrupt entries */
|
|
880
|
+
}
|
|
807
881
|
}
|
|
808
882
|
const sampleCount = entries.length;
|
|
809
883
|
const categoryLabel = category ?? 'all';
|
|
@@ -890,11 +964,15 @@ class SemanticCache {
|
|
|
890
964
|
if (entry.category)
|
|
891
965
|
categories.add(entry.category);
|
|
892
966
|
}
|
|
893
|
-
catch {
|
|
967
|
+
catch {
|
|
968
|
+
/* skip */
|
|
969
|
+
}
|
|
894
970
|
}
|
|
895
971
|
const results = await Promise.all([
|
|
896
972
|
this.thresholdEffectiveness({ minSamples: options?.minSamples }),
|
|
897
|
-
...[...categories]
|
|
973
|
+
...[...categories]
|
|
974
|
+
.filter(Boolean)
|
|
975
|
+
.map((cat) => this.thresholdEffectiveness({ category: cat, minSamples: options?.minSamples })),
|
|
898
976
|
]);
|
|
899
977
|
return results;
|
|
900
978
|
}
|
|
@@ -947,7 +1025,9 @@ class SemanticCache {
|
|
|
947
1025
|
}
|
|
948
1026
|
// -- Internal helpers exposed to package adapters --
|
|
949
1027
|
/** @internal Default similarity threshold. */
|
|
950
|
-
get _defaultThreshold() {
|
|
1028
|
+
get _defaultThreshold() {
|
|
1029
|
+
return this.defaultThreshold;
|
|
1030
|
+
}
|
|
951
1031
|
/** @internal Test-only getter. */
|
|
952
1032
|
get _categoryThresholds() {
|
|
953
1033
|
return this.categoryThresholds;
|
|
@@ -980,15 +1060,11 @@ class SemanticCache {
|
|
|
980
1060
|
this.refreshConfig()
|
|
981
1061
|
.then((ok) => {
|
|
982
1062
|
if (!ok) {
|
|
983
|
-
this.telemetry.metrics.configRefreshFailed
|
|
984
|
-
.labels({ cache_name: this.name })
|
|
985
|
-
.inc();
|
|
1063
|
+
this.telemetry.metrics.configRefreshFailed.labels({ cache_name: this.name }).inc();
|
|
986
1064
|
}
|
|
987
1065
|
})
|
|
988
1066
|
.catch(() => {
|
|
989
|
-
this.telemetry.metrics.configRefreshFailed
|
|
990
|
-
.labels({ cache_name: this.name })
|
|
991
|
-
.inc();
|
|
1067
|
+
this.telemetry.metrics.configRefreshFailed.labels({ cache_name: this.name }).inc();
|
|
992
1068
|
});
|
|
993
1069
|
};
|
|
994
1070
|
// Synchronous first refresh: process started immediately after a proposal
|
|
@@ -1044,9 +1120,7 @@ class SemanticCache {
|
|
|
1044
1120
|
metadata,
|
|
1045
1121
|
heartbeatIntervalMs: this.discoveryOptions.heartbeatIntervalMs,
|
|
1046
1122
|
onWriteFailed: () => {
|
|
1047
|
-
this.telemetry.metrics.discoveryWriteFailed
|
|
1048
|
-
.labels({ cache_name: this.name })
|
|
1049
|
-
.inc();
|
|
1123
|
+
this.telemetry.metrics.discoveryWriteFailed.labels({ cache_name: this.name }).inc();
|
|
1050
1124
|
},
|
|
1051
1125
|
});
|
|
1052
1126
|
await manager.register();
|
|
@@ -1164,7 +1238,8 @@ class SemanticCache {
|
|
|
1164
1238
|
const cached = await this.client.getBuffer(embedKey);
|
|
1165
1239
|
if (cached) {
|
|
1166
1240
|
this.telemetry.metrics.embeddingCacheTotal
|
|
1167
|
-
.labels({ cache_name: this.name, result: 'hit' })
|
|
1241
|
+
.labels({ cache_name: this.name, result: 'hit' })
|
|
1242
|
+
.inc();
|
|
1168
1243
|
// Decode Float32 buffer
|
|
1169
1244
|
const vector = [];
|
|
1170
1245
|
for (let i = 0; i < cached.length; i += 4) {
|
|
@@ -1173,9 +1248,12 @@ class SemanticCache {
|
|
|
1173
1248
|
return { vector, durationSec: 0 };
|
|
1174
1249
|
}
|
|
1175
1250
|
}
|
|
1176
|
-
catch {
|
|
1251
|
+
catch {
|
|
1252
|
+
/* ignore cache read errors */
|
|
1253
|
+
}
|
|
1177
1254
|
this.telemetry.metrics.embeddingCacheTotal
|
|
1178
|
-
.labels({ cache_name: this.name, result: 'miss' })
|
|
1255
|
+
.labels({ cache_name: this.name, result: 'miss' })
|
|
1256
|
+
.inc();
|
|
1179
1257
|
}
|
|
1180
1258
|
const start = performance.now();
|
|
1181
1259
|
let vector;
|
|
@@ -1186,9 +1264,7 @@ class SemanticCache {
|
|
|
1186
1264
|
throw new errors_1.EmbeddingError(`embedFn failed: ${errMsg(err)}`, err);
|
|
1187
1265
|
}
|
|
1188
1266
|
const durationSec = (performance.now() - start) / 1000;
|
|
1189
|
-
this.telemetry.metrics.embeddingDuration
|
|
1190
|
-
.labels({ cache_name: this.name })
|
|
1191
|
-
.observe(durationSec);
|
|
1267
|
+
this.telemetry.metrics.embeddingDuration.labels({ cache_name: this.name }).observe(durationSec);
|
|
1192
1268
|
// Store in embedding cache
|
|
1193
1269
|
if (this.embeddingCacheEnabled && text) {
|
|
1194
1270
|
const hash = (0, node_crypto_2.createHash)('sha256').update(text).digest('hex');
|
|
@@ -1197,7 +1273,9 @@ class SemanticCache {
|
|
|
1197
1273
|
const buf = (0, utils_1.encodeFloat32)(vector);
|
|
1198
1274
|
await this.client.set(embedKey, buf, 'EX', this.embeddingCacheTtl);
|
|
1199
1275
|
}
|
|
1200
|
-
catch {
|
|
1276
|
+
catch {
|
|
1277
|
+
/* ignore cache write errors */
|
|
1278
|
+
}
|
|
1201
1279
|
}
|
|
1202
1280
|
return { vector, durationSec };
|
|
1203
1281
|
}
|
|
@@ -1235,22 +1313,99 @@ class SemanticCache {
|
|
|
1235
1313
|
await pipeline.exec();
|
|
1236
1314
|
}
|
|
1237
1315
|
/** Append to the rolling similarity window sorted set and trim to 10,000 entries or 7 days. */
|
|
1238
|
-
async recordSimilarityWindow(score, result, category) {
|
|
1316
|
+
async recordSimilarityWindow(score, result, category, costSavedMicros) {
|
|
1239
1317
|
const now = Date.now();
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1318
|
+
const member = JSON.stringify({
|
|
1319
|
+
score,
|
|
1320
|
+
result,
|
|
1321
|
+
category,
|
|
1322
|
+
_n: Math.random(),
|
|
1323
|
+
cost_saved_micros: costSavedMicros,
|
|
1324
|
+
});
|
|
1243
1325
|
const sevenDaysAgo = now - 7 * 24 * 60 * 60 * 1000;
|
|
1244
1326
|
try {
|
|
1245
1327
|
const pipeline = this.client.pipeline();
|
|
1246
1328
|
pipeline.zadd(this.similarityWindowKey, now, member);
|
|
1247
|
-
// Trim by time: remove entries older than 7 days
|
|
1248
1329
|
pipeline.zremrangebyscore(this.similarityWindowKey, '-inf', sevenDaysAgo);
|
|
1249
|
-
// Trim by count: keep at most 10,000 most recent
|
|
1250
1330
|
pipeline.zremrangebyrank(this.similarityWindowKey, 0, -10001);
|
|
1251
1331
|
await pipeline.exec();
|
|
1252
1332
|
}
|
|
1253
|
-
catch {
|
|
1333
|
+
catch {
|
|
1334
|
+
/* best effort - never fail on window writes */
|
|
1335
|
+
}
|
|
1336
|
+
return member;
|
|
1337
|
+
}
|
|
1338
|
+
/**
|
|
1339
|
+
* Track a miss so a subsequent store() can backfill its cost into the
|
|
1340
|
+
* similarity-window record. Bounded by a 5-minute TTL on the bookkeeping
|
|
1341
|
+
* zset — entries beyond that are pruned on every record and backfill.
|
|
1342
|
+
*/
|
|
1343
|
+
async recordMissPending(prompt, similarityMember) {
|
|
1344
|
+
const correlationId = correlationIdFor(prompt);
|
|
1345
|
+
const now = Date.now();
|
|
1346
|
+
const fiveMinutesAgo = now - 5 * 60 * 1000;
|
|
1347
|
+
const entry = JSON.stringify({ correlationId, similarityMember });
|
|
1348
|
+
try {
|
|
1349
|
+
await this.client.zadd(this.missPendingKey, now, entry);
|
|
1350
|
+
await this.client.zremrangebyscore(this.missPendingKey, '-inf', `(${fiveMinutesAgo}`);
|
|
1351
|
+
}
|
|
1352
|
+
catch {
|
|
1353
|
+
/* best effort */
|
|
1354
|
+
}
|
|
1355
|
+
}
|
|
1356
|
+
/**
|
|
1357
|
+
* After a successful store(), find the oldest pending miss for the same
|
|
1358
|
+
* query and update its similarity-window record with the now-known cost.
|
|
1359
|
+
* Best-effort — silently no-op if no pending miss exists or the bookkeeping
|
|
1360
|
+
* entry has already been pruned.
|
|
1361
|
+
*/
|
|
1362
|
+
async applyCostToPendingMiss(prompt, costMicros) {
|
|
1363
|
+
const correlationId = correlationIdFor(prompt);
|
|
1364
|
+
const fiveMinutesAgo = Date.now() - 5 * 60 * 1000;
|
|
1365
|
+
try {
|
|
1366
|
+
await this.client.zremrangebyscore(this.missPendingKey, '-inf', `(${fiveMinutesAgo}`);
|
|
1367
|
+
const raw = (await this.client.zrange(this.missPendingKey, '0', '-1', 'WITHSCORES'));
|
|
1368
|
+
let matchedEntry = null;
|
|
1369
|
+
let matchedSimilarityMember = null;
|
|
1370
|
+
for (let i = 0; i < raw.length; i += 2) {
|
|
1371
|
+
const entryStr = raw[i];
|
|
1372
|
+
try {
|
|
1373
|
+
const parsed = JSON.parse(entryStr);
|
|
1374
|
+
if (parsed.correlationId === correlationId) {
|
|
1375
|
+
matchedEntry = entryStr;
|
|
1376
|
+
matchedSimilarityMember = parsed.similarityMember;
|
|
1377
|
+
break;
|
|
1378
|
+
}
|
|
1379
|
+
}
|
|
1380
|
+
catch {
|
|
1381
|
+
/* skip malformed */
|
|
1382
|
+
}
|
|
1383
|
+
}
|
|
1384
|
+
if (matchedEntry === null || matchedSimilarityMember === null) {
|
|
1385
|
+
return;
|
|
1386
|
+
}
|
|
1387
|
+
const rawScore = await this.client.zscore(this.similarityWindowKey, matchedSimilarityMember);
|
|
1388
|
+
if (rawScore === null) {
|
|
1389
|
+
await this.client.zrem(this.missPendingKey, matchedEntry);
|
|
1390
|
+
return;
|
|
1391
|
+
}
|
|
1392
|
+
const similarityScore = Number(rawScore);
|
|
1393
|
+
if (!Number.isFinite(similarityScore)) {
|
|
1394
|
+
await this.client.zrem(this.missPendingKey, matchedEntry);
|
|
1395
|
+
return;
|
|
1396
|
+
}
|
|
1397
|
+
const parsedMember = JSON.parse(matchedSimilarityMember);
|
|
1398
|
+
parsedMember.cost_saved_micros = costMicros;
|
|
1399
|
+
const updatedMember = JSON.stringify(parsedMember);
|
|
1400
|
+
const updatePipeline = this.client.pipeline();
|
|
1401
|
+
updatePipeline.zrem(this.similarityWindowKey, matchedSimilarityMember);
|
|
1402
|
+
updatePipeline.zadd(this.similarityWindowKey, similarityScore, updatedMember);
|
|
1403
|
+
updatePipeline.zrem(this.missPendingKey, matchedEntry);
|
|
1404
|
+
await updatePipeline.exec();
|
|
1405
|
+
}
|
|
1406
|
+
catch {
|
|
1407
|
+
/* never fail store() because of bookkeeping */
|
|
1408
|
+
}
|
|
1254
1409
|
}
|
|
1255
1410
|
assertInitialized(method) {
|
|
1256
1411
|
if (!this._initialized) {
|
package/dist/index.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ export { SemanticCache } from './SemanticCache';
|
|
|
2
2
|
export type { ThresholdEffectivenessResult } from './SemanticCache';
|
|
3
3
|
export { DEFAULT_COST_TABLE } from './defaultCostTable';
|
|
4
4
|
export type { SemanticCacheOptions, CacheCheckOptions, CacheStoreOptions, CacheCheckResult, CacheStats, IndexInfo, InvalidateResult, CacheConfidence, EmbedFn, ModelCost, RerankOptions, JudgeOptions, ConfigRefreshOptions, } from './types';
|
|
5
|
+
export { createKeywordOverlapRerank } from './rerank';
|
|
5
6
|
export { SemanticCacheUsageError, EmbeddingError, ValkeyCommandError, } from './errors';
|
|
6
7
|
export type { ContentBlock, TextBlock, BinaryBlock, ToolCallBlock, ToolResultBlock, ReasoningBlock, BlockHints, } from './utils';
|
|
7
8
|
export { escapeTag } from './utils';
|
package/dist/index.js
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.defaultNormalizer = exports.composeNormalizer = exports.passthrough = exports.fetchAndHash = exports.hashUrl = exports.hashBytes = exports.hashBase64 = exports.escapeTag = exports.ValkeyCommandError = exports.EmbeddingError = exports.SemanticCacheUsageError = exports.DEFAULT_COST_TABLE = exports.SemanticCache = void 0;
|
|
3
|
+
exports.defaultNormalizer = exports.composeNormalizer = exports.passthrough = exports.fetchAndHash = exports.hashUrl = exports.hashBytes = exports.hashBase64 = exports.escapeTag = exports.ValkeyCommandError = exports.EmbeddingError = exports.SemanticCacheUsageError = exports.createKeywordOverlapRerank = exports.DEFAULT_COST_TABLE = exports.SemanticCache = void 0;
|
|
4
4
|
var SemanticCache_1 = require("./SemanticCache");
|
|
5
5
|
Object.defineProperty(exports, "SemanticCache", { enumerable: true, get: function () { return SemanticCache_1.SemanticCache; } });
|
|
6
6
|
var defaultCostTable_1 = require("./defaultCostTable");
|
|
7
7
|
Object.defineProperty(exports, "DEFAULT_COST_TABLE", { enumerable: true, get: function () { return defaultCostTable_1.DEFAULT_COST_TABLE; } });
|
|
8
|
+
var rerank_1 = require("./rerank");
|
|
9
|
+
Object.defineProperty(exports, "createKeywordOverlapRerank", { enumerable: true, get: function () { return rerank_1.createKeywordOverlapRerank; } });
|
|
8
10
|
var errors_1 = require("./errors");
|
|
9
11
|
Object.defineProperty(exports, "SemanticCacheUsageError", { enumerable: true, get: function () { return errors_1.SemanticCacheUsageError; } });
|
|
10
12
|
Object.defineProperty(exports, "EmbeddingError", { enumerable: true, get: function () { return errors_1.EmbeddingError; } });
|
package/dist/rerank.d.ts
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Built-in rerank factories for @betterdb/semantic-cache.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Built-in keyword-overlap reranker.
|
|
6
|
+
*
|
|
7
|
+
* Blends cosine similarity with word overlap and returns the index of the
|
|
8
|
+
* best candidate.
|
|
9
|
+
*
|
|
10
|
+
* @param compare
|
|
11
|
+
* `"prompt"` – overlap of the incoming query against each candidate's stored
|
|
12
|
+
* prompt. Equivalence signal. Catches entity mismatches
|
|
13
|
+
* (e.g. "weather in Paris" vs "weather in Berlin"). Default.
|
|
14
|
+
* `"response"` – overlap of the incoming query against each candidate's cached
|
|
15
|
+
* response. Relevance signal.
|
|
16
|
+
*
|
|
17
|
+
* @param cosineWeight
|
|
18
|
+
* Weight on cosine similarity in [0, 1]. Overlap weight is `1 - cosineWeight`.
|
|
19
|
+
* Default: 0.7 (overlap 0.3).
|
|
20
|
+
*
|
|
21
|
+
* Candidate objects carry: `similarity` (cosine distance, lower = more similar),
|
|
22
|
+
* `response` (string), and `prompt` (string, stored prompt).
|
|
23
|
+
*/
|
|
24
|
+
export declare function createKeywordOverlapRerank(options?: {
|
|
25
|
+
compare?: 'prompt' | 'response';
|
|
26
|
+
cosineWeight?: number;
|
|
27
|
+
}): (query: string, candidates: Array<{
|
|
28
|
+
response: string;
|
|
29
|
+
similarity: number;
|
|
30
|
+
prompt: string;
|
|
31
|
+
}>) => Promise<number>;
|
package/dist/rerank.js
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Built-in rerank factories for @betterdb/semantic-cache.
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.createKeywordOverlapRerank = createKeywordOverlapRerank;
|
|
7
|
+
/**
|
|
8
|
+
* Tokenize: lowercase, split on whitespace, strip surrounding punctuation.
|
|
9
|
+
* Deterministic and dependency-free.
|
|
10
|
+
* IDF weighting would attach here at the token-weighting step.
|
|
11
|
+
*/
|
|
12
|
+
function tokenize(text) {
|
|
13
|
+
const out = new Set();
|
|
14
|
+
for (const raw of text.toLowerCase().split(/\s+/)) {
|
|
15
|
+
const tok = raw.replace(/^[.,!?;:"'()\[\]{}<>]+|[.,!?;:"'()\[\]{}<>]+$/g, '');
|
|
16
|
+
if (tok)
|
|
17
|
+
out.add(tok);
|
|
18
|
+
}
|
|
19
|
+
return out;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Built-in keyword-overlap reranker.
|
|
23
|
+
*
|
|
24
|
+
* Blends cosine similarity with word overlap and returns the index of the
|
|
25
|
+
* best candidate.
|
|
26
|
+
*
|
|
27
|
+
* @param compare
|
|
28
|
+
* `"prompt"` – overlap of the incoming query against each candidate's stored
|
|
29
|
+
* prompt. Equivalence signal. Catches entity mismatches
|
|
30
|
+
* (e.g. "weather in Paris" vs "weather in Berlin"). Default.
|
|
31
|
+
* `"response"` – overlap of the incoming query against each candidate's cached
|
|
32
|
+
* response. Relevance signal.
|
|
33
|
+
*
|
|
34
|
+
* @param cosineWeight
|
|
35
|
+
* Weight on cosine similarity in [0, 1]. Overlap weight is `1 - cosineWeight`.
|
|
36
|
+
* Default: 0.7 (overlap 0.3).
|
|
37
|
+
*
|
|
38
|
+
* Candidate objects carry: `similarity` (cosine distance, lower = more similar),
|
|
39
|
+
* `response` (string), and `prompt` (string, stored prompt).
|
|
40
|
+
*/
|
|
41
|
+
function createKeywordOverlapRerank(options) {
|
|
42
|
+
const compare = options?.compare ?? 'prompt';
|
|
43
|
+
const cosineWeight = options?.cosineWeight ?? 0.7;
|
|
44
|
+
if (cosineWeight < 0 || cosineWeight > 1) {
|
|
45
|
+
throw new Error('cosineWeight must be in [0, 1]');
|
|
46
|
+
}
|
|
47
|
+
const overlapWeight = 1.0 - cosineWeight;
|
|
48
|
+
return async (query, candidates) => {
|
|
49
|
+
const queryTokens = tokenize(query);
|
|
50
|
+
let bestIdx = 0;
|
|
51
|
+
let bestScore = -Infinity;
|
|
52
|
+
for (let i = 0; i < candidates.length; i++) {
|
|
53
|
+
const text = candidates[i][compare] ?? '';
|
|
54
|
+
const candTokens = tokenize(text);
|
|
55
|
+
let overlap = 0;
|
|
56
|
+
if (queryTokens.size > 0) {
|
|
57
|
+
let intersection = 0;
|
|
58
|
+
for (const t of queryTokens) {
|
|
59
|
+
if (candTokens.has(t))
|
|
60
|
+
intersection++;
|
|
61
|
+
}
|
|
62
|
+
overlap = intersection / queryTokens.size;
|
|
63
|
+
}
|
|
64
|
+
const cosineSim = 1.0 - candidates[i].similarity;
|
|
65
|
+
const score = cosineWeight * cosineSim + overlapWeight * overlap;
|
|
66
|
+
if (score > bestScore) {
|
|
67
|
+
bestScore = score;
|
|
68
|
+
bestIdx = i;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return bestIdx;
|
|
72
|
+
};
|
|
73
|
+
}
|
package/dist/types.d.ts
CHANGED
|
@@ -127,6 +127,7 @@ export interface RerankOptions {
|
|
|
127
127
|
rerankFn: (query: string, candidates: Array<{
|
|
128
128
|
response: string;
|
|
129
129
|
similarity: number;
|
|
130
|
+
prompt: string;
|
|
130
131
|
}>) => Promise<number>;
|
|
131
132
|
}
|
|
132
133
|
/**
|
|
@@ -162,6 +163,8 @@ export interface JudgeOptions {
|
|
|
162
163
|
similarity: number;
|
|
163
164
|
threshold: number;
|
|
164
165
|
category: string | undefined;
|
|
166
|
+
/** The stored prompt text for the matched entry. */
|
|
167
|
+
cachedPrompt: string;
|
|
165
168
|
}) => Promise<boolean>;
|
|
166
169
|
/**
|
|
167
170
|
* Behavior when judgeFn throws or exceeds timeoutMs.
|
package/package.json
CHANGED