@betterdb/semantic-cache 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -0
- package/dist/SemanticCache.js +91 -3
- package/dist/index.d.ts +1 -1
- package/dist/telemetry.d.ts +2 -0
- package/dist/telemetry.js +13 -0
- package/dist/types.d.ts +69 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -96,6 +96,54 @@ A lookup is a **hit** when `score <= threshold`. The default threshold is `0.1`.
|
|
|
96
96
|
| Conversational / RAG | `0.15` | Paraphrases hit as `high` confidence |
|
|
97
97
|
| Broad search / recall | `0.20` | High hit rate, review uncertain hits |
|
|
98
98
|
|
|
99
|
+
## LLM-as-judge
|
|
100
|
+
|
|
101
|
+
When a hit lands in the uncertainty band (`threshold - uncertaintyBand < score <= threshold`), you can supply a `judgeFn` to adjudicate automatically instead of handling `confidence: 'uncertain'` yourself.
|
|
102
|
+
|
|
103
|
+
```typescript
|
|
104
|
+
const result = await cache.check(userPrompt, {
|
|
105
|
+
judge: {
|
|
106
|
+
judgeFn: async ({ prompt, response, similarity, threshold, category }) => {
|
|
107
|
+
// Return true to accept (confidence → 'high')
|
|
108
|
+
// Return false to reject (treated as miss with nearestMiss)
|
|
109
|
+
const verdict = await openai.chat.completions.create({
|
|
110
|
+
model: 'gpt-5-mini',
|
|
111
|
+
messages: [
|
|
112
|
+
{ role: 'system', content: 'Reply YES or NO only.' },
|
|
113
|
+
{ role: 'user', content: `Does this cached response correctly answer the prompt?\nPrompt: ${prompt}\nResponse: ${response}` },
|
|
114
|
+
],
|
|
115
|
+
});
|
|
116
|
+
return verdict.choices[0].message.content?.startsWith('YES') ?? false;
|
|
117
|
+
},
|
|
118
|
+
onError: 'accept', // fail-open on judge errors (default)
|
|
119
|
+
timeoutMs: 2000, // per-call timeout (default)
|
|
120
|
+
},
|
|
121
|
+
});
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**When the judge is invoked:** only for `confidence === 'uncertain'` hits. High-confidence hits, misses, and the zero-candidates case bypass the judge entirely.
|
|
125
|
+
|
|
126
|
+
**Accept path:** `result.hit === true`, `result.confidence === 'high'`.
|
|
127
|
+
|
|
128
|
+
**Reject path:** `result.hit === false`, `result.nearestMiss` populated with `deltaToThreshold <= 0` (use this to distinguish judge rejections from regular misses where `deltaToThreshold > 0`).
|
|
129
|
+
|
|
130
|
+
**Composing with rerank:** when both `rerank` and `judge` are set, the judge receives the reranked pick's response and similarity score.
|
|
131
|
+
|
|
132
|
+
**`checkBatch()` does not support `judge`.** Call `check()` individually for prompts that need adjudication.
|
|
133
|
+
|
|
134
|
+
### CacheCheckOptions reference
|
|
135
|
+
|
|
136
|
+
| Option | Type | Default | Description |
|
|
137
|
+
|---|---|---|---|
|
|
138
|
+
| `threshold` | `number` | `defaultThreshold` | Per-request cosine distance threshold override |
|
|
139
|
+
| `category` | `string` | — | Category tag for per-category thresholds and metric labels |
|
|
140
|
+
| `filter` | `string` | — | FT.SEARCH pre-filter expression (trusted input only) |
|
|
141
|
+
| `k` | `number` | `1` | KNN neighbours to fetch (ignored when `rerank` is set) |
|
|
142
|
+
| `staleAfterModelChange` | `boolean` | `false` | Evict and miss when stored model differs from `currentModel` |
|
|
143
|
+
| `currentModel` | `string` | — | Model to compare against stored entries |
|
|
144
|
+
| `rerank` | `RerankOptions` | — | Rerank hook; see `RerankOptions` |
|
|
145
|
+
| `judge` | `JudgeOptions` | — | LLM-as-judge for borderline hits; see `JudgeOptions`. Not supported by `checkBatch()`; throws `SemanticCacheUsageError` |
|
|
146
|
+
|
|
99
147
|
## Configuration Reference
|
|
100
148
|
|
|
101
149
|
| Option | Type | Default | Description |
|
package/dist/SemanticCache.js
CHANGED
|
@@ -314,14 +314,85 @@ class SemanticCache {
|
|
|
314
314
|
return { hit: false, confidence: 'miss' };
|
|
315
315
|
}
|
|
316
316
|
}
|
|
317
|
-
// All checks passed —
|
|
317
|
+
// All checks passed — compute confidence (recordSimilarityWindow moves to after judge)
|
|
318
|
+
let confidence = winnerScore >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
|
|
319
|
+
const matchedKey = winner.key;
|
|
320
|
+
// --- LLM-as-judge for borderline hits ---
|
|
321
|
+
if (options?.judge && confidence === 'uncertain') {
|
|
322
|
+
const judgeStart = performance.now();
|
|
323
|
+
const timeoutMs = options.judge.timeoutMs ?? 2000;
|
|
324
|
+
const onError = options.judge.onError ?? 'accept';
|
|
325
|
+
let decision;
|
|
326
|
+
try {
|
|
327
|
+
const accepted = await raceWithTimeout(options.judge.judgeFn({
|
|
328
|
+
prompt: promptText,
|
|
329
|
+
response: winner.fields['response'] ?? '',
|
|
330
|
+
similarity: winnerScore,
|
|
331
|
+
threshold,
|
|
332
|
+
category: category || undefined,
|
|
333
|
+
}), timeoutMs);
|
|
334
|
+
decision = accepted ? 'accept' : 'reject';
|
|
335
|
+
}
|
|
336
|
+
catch (err) {
|
|
337
|
+
const isTimeout = err instanceof JudgeTimeoutError;
|
|
338
|
+
if (onError === 'accept') {
|
|
339
|
+
decision = isTimeout ? 'timeout_accept' : 'error_accept';
|
|
340
|
+
}
|
|
341
|
+
else {
|
|
342
|
+
decision = isTimeout ? 'timeout_reject' : 'error_reject';
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
const judgeSec = (performance.now() - judgeStart) / 1000;
|
|
346
|
+
this.telemetry.metrics.judgeDecisions
|
|
347
|
+
.labels({ cache_name: this.name, category: categoryLabel, decision })
|
|
348
|
+
.inc();
|
|
349
|
+
this.telemetry.metrics.judgeDuration
|
|
350
|
+
.labels({ cache_name: this.name, category: categoryLabel, decision })
|
|
351
|
+
.observe(judgeSec);
|
|
352
|
+
span.setAttributes({
|
|
353
|
+
'cache.judge.invoked': true,
|
|
354
|
+
'cache.judge.decision': decision,
|
|
355
|
+
'cache.judge.latency_ms': judgeSec * 1000,
|
|
356
|
+
});
|
|
357
|
+
if (decision === 'accept') {
|
|
358
|
+
confidence = 'high';
|
|
359
|
+
// Fall through to hit-return path
|
|
360
|
+
}
|
|
361
|
+
else if (decision === 'error_accept' || decision === 'timeout_accept') {
|
|
362
|
+
// Preserve 'uncertain'; fall through to hit-return path
|
|
363
|
+
}
|
|
364
|
+
else {
|
|
365
|
+
// reject / error_reject / timeout_reject → treat as miss
|
|
366
|
+
await this.recordSimilarityWindow(winnerScore, 'miss', category);
|
|
367
|
+
await this.recordStat('misses');
|
|
368
|
+
this.telemetry.metrics.requestsTotal
|
|
369
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
|
|
370
|
+
.inc();
|
|
371
|
+
span.setAttributes({
|
|
372
|
+
'cache.hit': false,
|
|
373
|
+
'cache.name': this.name,
|
|
374
|
+
'cache.category': categoryLabel,
|
|
375
|
+
});
|
|
376
|
+
return {
|
|
377
|
+
hit: false,
|
|
378
|
+
confidence: 'miss',
|
|
379
|
+
similarity: winnerScore,
|
|
380
|
+
nearestMiss: {
|
|
381
|
+
similarity: winnerScore,
|
|
382
|
+
threshold,
|
|
383
|
+
deltaToThreshold: winnerScore - threshold,
|
|
384
|
+
matchedKey,
|
|
385
|
+
},
|
|
386
|
+
};
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
// --- End judge ---
|
|
390
|
+
// Record as genuine hit (moved here from before the judge block)
|
|
318
391
|
await this.recordSimilarityWindow(winnerScore, 'hit', category);
|
|
319
|
-
const confidence = winnerScore >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
|
|
320
392
|
await this.recordStat('hits');
|
|
321
393
|
const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
|
|
322
394
|
this.telemetry.metrics.requestsTotal
|
|
323
395
|
.labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
|
|
324
|
-
const matchedKey = winner.key;
|
|
325
396
|
if (this.defaultTtl !== undefined && matchedKey) {
|
|
326
397
|
await this.client.expire(matchedKey, this.defaultTtl);
|
|
327
398
|
}
|
|
@@ -501,6 +572,9 @@ class SemanticCache {
|
|
|
501
572
|
if (options?.staleAfterModelChange) {
|
|
502
573
|
throw new errors_1.SemanticCacheUsageError("checkBatch() does not support 'staleAfterModelChange'. Use check() for stale-model eviction.");
|
|
503
574
|
}
|
|
575
|
+
if (options?.judge) {
|
|
576
|
+
throw new errors_1.SemanticCacheUsageError("checkBatch() does not support the 'judge' option. Use check() for LLM-as-judge adjudication.");
|
|
577
|
+
}
|
|
504
578
|
return this.traced('checkBatch', async (span) => {
|
|
505
579
|
// Resolve all prompts and embed in parallel
|
|
506
580
|
const resolved = await Promise.all(prompts.map((p) => this.resolvePrompt(p)));
|
|
@@ -1233,3 +1307,17 @@ class SemanticCache {
|
|
|
1233
1307
|
}
|
|
1234
1308
|
}
|
|
1235
1309
|
exports.SemanticCache = SemanticCache;
|
|
1310
|
+
// --- Judge helpers ---
|
|
1311
|
+
class JudgeTimeoutError extends Error {
|
|
1312
|
+
constructor() {
|
|
1313
|
+
super('judgeFn timed out');
|
|
1314
|
+
this.name = 'JudgeTimeoutError';
|
|
1315
|
+
}
|
|
1316
|
+
}
|
|
1317
|
+
function raceWithTimeout(p, timeoutMs) {
|
|
1318
|
+
let timer;
|
|
1319
|
+
const timeout = new Promise((_, reject) => {
|
|
1320
|
+
timer = setTimeout(() => reject(new JudgeTimeoutError()), timeoutMs);
|
|
1321
|
+
});
|
|
1322
|
+
return Promise.race([p, timeout]).finally(() => clearTimeout(timer));
|
|
1323
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export { SemanticCache } from './SemanticCache';
|
|
2
2
|
export type { ThresholdEffectivenessResult } from './SemanticCache';
|
|
3
3
|
export { DEFAULT_COST_TABLE } from './defaultCostTable';
|
|
4
|
-
export type { SemanticCacheOptions, CacheCheckOptions, CacheStoreOptions, CacheCheckResult, CacheStats, IndexInfo, InvalidateResult, CacheConfidence, EmbedFn, ModelCost, RerankOptions, ConfigRefreshOptions, } from './types';
|
|
4
|
+
export type { SemanticCacheOptions, CacheCheckOptions, CacheStoreOptions, CacheCheckResult, CacheStats, IndexInfo, InvalidateResult, CacheConfidence, EmbedFn, ModelCost, RerankOptions, JudgeOptions, ConfigRefreshOptions, } from './types';
|
|
5
5
|
export { SemanticCacheUsageError, EmbeddingError, ValkeyCommandError, } from './errors';
|
|
6
6
|
export type { ContentBlock, TextBlock, BinaryBlock, ToolCallBlock, ToolResultBlock, ReasoningBlock, BlockHints, } from './utils';
|
|
7
7
|
export { escapeTag } from './utils';
|
package/dist/telemetry.d.ts
CHANGED
package/dist/telemetry.js
CHANGED
|
@@ -67,6 +67,17 @@ function createTelemetry(opts) {
|
|
|
67
67
|
help: 'Count of failed periodic config refreshes (HGETALL on __config).',
|
|
68
68
|
labelNames: ['cache_name'],
|
|
69
69
|
});
|
|
70
|
+
const judgeDecisions = getOrCreateCounter(registry, {
|
|
71
|
+
name: `${opts.prefix}_judge_decisions_total`,
|
|
72
|
+
help: 'LLM-as-judge decisions for borderline cache hits',
|
|
73
|
+
labelNames: ['cache_name', 'category', 'decision'],
|
|
74
|
+
});
|
|
75
|
+
const judgeDuration = getOrCreateHistogram(registry, {
|
|
76
|
+
name: `${opts.prefix}_judge_duration_seconds`,
|
|
77
|
+
help: 'Wall-clock duration of judgeFn invocations',
|
|
78
|
+
labelNames: ['cache_name', 'category', 'decision'],
|
|
79
|
+
buckets: [0.05, 0.1, 0.25, 0.5, 1, 2, 5],
|
|
80
|
+
});
|
|
70
81
|
return {
|
|
71
82
|
tracer,
|
|
72
83
|
metrics: {
|
|
@@ -79,6 +90,8 @@ function createTelemetry(opts) {
|
|
|
79
90
|
staleModelEvictions,
|
|
80
91
|
discoveryWriteFailed,
|
|
81
92
|
configRefreshFailed,
|
|
93
|
+
judgeDecisions,
|
|
94
|
+
judgeDuration,
|
|
82
95
|
},
|
|
83
96
|
};
|
|
84
97
|
}
|
package/dist/types.d.ts
CHANGED
|
@@ -129,6 +129,61 @@ export interface RerankOptions {
|
|
|
129
129
|
similarity: number;
|
|
130
130
|
}>) => Promise<number>;
|
|
131
131
|
}
|
|
132
|
+
/**
|
|
133
|
+
* LLM-as-judge adjudication for borderline cache hits.
|
|
134
|
+
*
|
|
135
|
+
* When set on CacheCheckOptions, a hit whose cosine distance lands in the
|
|
136
|
+
* uncertainty band (threshold - uncertaintyBand < score <= threshold) is
|
|
137
|
+
* passed to judgeFn before being returned. The judge accepts (promotes the
|
|
138
|
+
* hit to confidence: 'high') or rejects (treats it as a miss with
|
|
139
|
+
* nearestMiss populated).
|
|
140
|
+
*
|
|
141
|
+
* The judge is NOT invoked for:
|
|
142
|
+
* - high-confidence hits (score <= threshold - uncertaintyBand)
|
|
143
|
+
* - misses (score > threshold)
|
|
144
|
+
* - the no-candidates case (FT.SEARCH returned zero rows)
|
|
145
|
+
*
|
|
146
|
+
* When rerank is also set, the judge runs on the reranked pick, not the
|
|
147
|
+
* original top-1.
|
|
148
|
+
*/
|
|
149
|
+
export interface JudgeOptions {
|
|
150
|
+
/**
|
|
151
|
+
* Function that decides whether a borderline cache hit is acceptable.
|
|
152
|
+
* Return true to accept (caller receives confidence: 'high').
|
|
153
|
+
* Return false to reject (caller receives a miss with nearestMiss).
|
|
154
|
+
*
|
|
155
|
+
* The function receives the original prompt text (or the resolved text
|
|
156
|
+
* portion of a multipart prompt), the cached response, the cosine distance,
|
|
157
|
+
* the effective threshold, and the category if one was supplied to check().
|
|
158
|
+
*/
|
|
159
|
+
judgeFn: (input: {
|
|
160
|
+
prompt: string;
|
|
161
|
+
response: string;
|
|
162
|
+
similarity: number;
|
|
163
|
+
threshold: number;
|
|
164
|
+
category: string | undefined;
|
|
165
|
+
}) => Promise<boolean>;
|
|
166
|
+
/**
|
|
167
|
+
* Behavior when judgeFn throws or exceeds timeoutMs.
|
|
168
|
+
* 'accept' - return the cached response with confidence: 'uncertain'
|
|
169
|
+
* (current pre-judge behavior, fail-open).
|
|
170
|
+
* 'reject' - treat as a miss (fail-closed).
|
|
171
|
+
* Default: 'accept'.
|
|
172
|
+
*/
|
|
173
|
+
onError?: 'accept' | 'reject';
|
|
174
|
+
/**
|
|
175
|
+
* Per-call timeout in milliseconds. Default: 2000.
|
|
176
|
+
* The judge function is raced against this timeout; timeout is treated
|
|
177
|
+
* the same as a thrown error and routed through onError.
|
|
178
|
+
*
|
|
179
|
+
* Note: the underlying promise is not cancelled on timeout — JavaScript has
|
|
180
|
+
* no built-in cancellation primitive. A real LLM HTTP request will continue
|
|
181
|
+
* running in the background after the timeout fires, consuming API quota.
|
|
182
|
+
* To stop the underlying request, use an AbortController inside judgeFn and
|
|
183
|
+
* abort it when the signal you manage fires.
|
|
184
|
+
*/
|
|
185
|
+
timeoutMs?: number;
|
|
186
|
+
}
|
|
132
187
|
export interface CacheCheckOptions {
|
|
133
188
|
/** Per-request threshold override (cosine distance 0-2). Highest priority. */
|
|
134
189
|
threshold?: number;
|
|
@@ -167,6 +222,11 @@ export interface CacheCheckOptions {
|
|
|
167
222
|
* in rerankFn yourself.
|
|
168
223
|
*/
|
|
169
224
|
rerank?: RerankOptions;
|
|
225
|
+
/**
|
|
226
|
+
* Optional LLM-as-judge adjudication for borderline hits.
|
|
227
|
+
* See JudgeOptions. Ignored on checkBatch() - call check() per prompt instead.
|
|
228
|
+
*/
|
|
229
|
+
judge?: JudgeOptions;
|
|
170
230
|
}
|
|
171
231
|
export interface CacheStoreOptions {
|
|
172
232
|
/** Per-entry TTL in seconds. Overrides SemanticCacheOptions.defaultTtl. */
|
|
@@ -223,10 +283,19 @@ export interface CacheCheckResult {
|
|
|
223
283
|
/**
|
|
224
284
|
* On a miss where a candidate existed but didn't clear the threshold,
|
|
225
285
|
* describes how close it was. Useful for threshold tuning.
|
|
286
|
+
*
|
|
287
|
+
* Note: when the miss originates from a judge rejection, `deltaToThreshold`
|
|
288
|
+
* will be <= 0 because the score did clear the threshold — the judge said no.
|
|
289
|
+
* Existing non-judge misses always produce deltaToThreshold > 0.
|
|
290
|
+
* Use `deltaToThreshold <= 0` to detect judge-originated misses.
|
|
226
291
|
*/
|
|
227
292
|
nearestMiss?: {
|
|
228
293
|
similarity: number;
|
|
229
294
|
deltaToThreshold: number;
|
|
295
|
+
/** The effective threshold that was applied. Present on judge-rejection misses. */
|
|
296
|
+
threshold?: number;
|
|
297
|
+
/** The Valkey key of the entry that was rejected. Present on judge-rejection misses. */
|
|
298
|
+
matchedKey?: string;
|
|
230
299
|
};
|
|
231
300
|
/**
|
|
232
301
|
* Estimated cost saved (in dollars) by returning this cached result instead of calling the LLM.
|
package/package.json
CHANGED