@betterdb/semantic-cache 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -96,6 +96,54 @@ A lookup is a **hit** when `score <= threshold`. The default threshold is `0.1`.
96
96
  | Conversational / RAG | `0.15` | Paraphrases hit as `high` confidence |
97
97
  | Broad search / recall | `0.20` | High hit rate, review uncertain hits |
98
98
 
99
+ ## LLM-as-judge
100
+
101
+ When a hit lands in the uncertainty band (`threshold - uncertaintyBand < score <= threshold`), you can supply a `judgeFn` to adjudicate automatically instead of handling `confidence: 'uncertain'` yourself.
102
+
103
+ ```typescript
104
+ const result = await cache.check(userPrompt, {
105
+ judge: {
106
+ judgeFn: async ({ prompt, response, similarity, threshold, category }) => {
107
+ // Return true to accept (confidence → 'high')
108
+ // Return false to reject (treated as miss with nearestMiss)
109
+ const verdict = await openai.chat.completions.create({
110
+ model: 'gpt-5-mini',
111
+ messages: [
112
+ { role: 'system', content: 'Reply YES or NO only.' },
113
+ { role: 'user', content: `Does this cached response correctly answer the prompt?\nPrompt: ${prompt}\nResponse: ${response}` },
114
+ ],
115
+ });
116
+ return verdict.choices[0].message.content?.startsWith('YES') ?? false;
117
+ },
118
+ onError: 'accept', // fail-open on judge errors (default)
119
+ timeoutMs: 2000, // per-call timeout (default)
120
+ },
121
+ });
122
+ ```
123
+
124
+ **When the judge is invoked:** only for `confidence === 'uncertain'` hits. High-confidence hits, misses, and the zero-candidates case bypass the judge entirely.
125
+
126
+ **Accept path:** `result.hit === true`, `result.confidence === 'high'`.
127
+
128
+ **Reject path:** `result.hit === false`, `result.nearestMiss` populated with `deltaToThreshold <= 0` (use this to distinguish judge rejections from regular misses where `deltaToThreshold > 0`).
129
+
130
+ **Composing with rerank:** when both `rerank` and `judge` are set, the judge receives the reranked pick's response and similarity score.
131
+
132
+ **`checkBatch()` does not support `judge`.** Call `check()` individually for prompts that need adjudication.
133
+
134
+ ### CacheCheckOptions reference
135
+
136
+ | Option | Type | Default | Description |
137
+ |---|---|---|---|
138
+ | `threshold` | `number` | `defaultThreshold` | Per-request cosine distance threshold override |
139
+ | `category` | `string` | — | Category tag for per-category thresholds and metric labels |
140
+ | `filter` | `string` | — | FT.SEARCH pre-filter expression (trusted input only) |
141
+ | `k` | `number` | `1` | KNN neighbours to fetch (ignored when `rerank` is set) |
142
+ | `staleAfterModelChange` | `boolean` | `false` | Evict and miss when stored model differs from `currentModel` |
143
+ | `currentModel` | `string` | — | Model to compare against stored entries |
144
+ | `rerank` | `RerankOptions` | — | Rerank hook; see `RerankOptions` |
145
+ | `judge` | `JudgeOptions` | — | LLM-as-judge for borderline hits; see `JudgeOptions`. Not supported by `checkBatch()`; throws `SemanticCacheUsageError` |
146
+
99
147
  ## Configuration Reference
100
148
 
101
149
  | Option | Type | Default | Description |
@@ -314,14 +314,85 @@ class SemanticCache {
314
314
  return { hit: false, confidence: 'miss' };
315
315
  }
316
316
  }
317
- // All checks passed — record as a genuine hit
317
+ // All checks passed — compute confidence (recordSimilarityWindow moves to after judge)
318
+ let confidence = winnerScore >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
319
+ const matchedKey = winner.key;
320
+ // --- LLM-as-judge for borderline hits ---
321
+ if (options?.judge && confidence === 'uncertain') {
322
+ const judgeStart = performance.now();
323
+ const timeoutMs = options.judge.timeoutMs ?? 2000;
324
+ const onError = options.judge.onError ?? 'accept';
325
+ let decision;
326
+ try {
327
+ const accepted = await raceWithTimeout(options.judge.judgeFn({
328
+ prompt: promptText,
329
+ response: winner.fields['response'] ?? '',
330
+ similarity: winnerScore,
331
+ threshold,
332
+ category: category || undefined,
333
+ }), timeoutMs);
334
+ decision = accepted ? 'accept' : 'reject';
335
+ }
336
+ catch (err) {
337
+ const isTimeout = err instanceof JudgeTimeoutError;
338
+ if (onError === 'accept') {
339
+ decision = isTimeout ? 'timeout_accept' : 'error_accept';
340
+ }
341
+ else {
342
+ decision = isTimeout ? 'timeout_reject' : 'error_reject';
343
+ }
344
+ }
345
+ const judgeSec = (performance.now() - judgeStart) / 1000;
346
+ this.telemetry.metrics.judgeDecisions
347
+ .labels({ cache_name: this.name, category: categoryLabel, decision })
348
+ .inc();
349
+ this.telemetry.metrics.judgeDuration
350
+ .labels({ cache_name: this.name, category: categoryLabel, decision })
351
+ .observe(judgeSec);
352
+ span.setAttributes({
353
+ 'cache.judge.invoked': true,
354
+ 'cache.judge.decision': decision,
355
+ 'cache.judge.latency_ms': judgeSec * 1000,
356
+ });
357
+ if (decision === 'accept') {
358
+ confidence = 'high';
359
+ // Fall through to hit-return path
360
+ }
361
+ else if (decision === 'error_accept' || decision === 'timeout_accept') {
362
+ // Preserve 'uncertain'; fall through to hit-return path
363
+ }
364
+ else {
365
+ // reject / error_reject / timeout_reject → treat as miss
366
+ await this.recordSimilarityWindow(winnerScore, 'miss', category);
367
+ await this.recordStat('misses');
368
+ this.telemetry.metrics.requestsTotal
369
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
370
+ .inc();
371
+ span.setAttributes({
372
+ 'cache.hit': false,
373
+ 'cache.name': this.name,
374
+ 'cache.category': categoryLabel,
375
+ });
376
+ return {
377
+ hit: false,
378
+ confidence: 'miss',
379
+ similarity: winnerScore,
380
+ nearestMiss: {
381
+ similarity: winnerScore,
382
+ threshold,
383
+ deltaToThreshold: winnerScore - threshold,
384
+ matchedKey,
385
+ },
386
+ };
387
+ }
388
+ }
389
+ // --- End judge ---
390
+ // Record as genuine hit (moved here from before the judge block)
318
391
  await this.recordSimilarityWindow(winnerScore, 'hit', category);
319
- const confidence = winnerScore >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
320
392
  await this.recordStat('hits');
321
393
  const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
322
394
  this.telemetry.metrics.requestsTotal
323
395
  .labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
324
- const matchedKey = winner.key;
325
396
  if (this.defaultTtl !== undefined && matchedKey) {
326
397
  await this.client.expire(matchedKey, this.defaultTtl);
327
398
  }
@@ -501,6 +572,9 @@ class SemanticCache {
501
572
  if (options?.staleAfterModelChange) {
502
573
  throw new errors_1.SemanticCacheUsageError("checkBatch() does not support 'staleAfterModelChange'. Use check() for stale-model eviction.");
503
574
  }
575
+ if (options?.judge) {
576
+ throw new errors_1.SemanticCacheUsageError("checkBatch() does not support the 'judge' option. Use check() for LLM-as-judge adjudication.");
577
+ }
504
578
  return this.traced('checkBatch', async (span) => {
505
579
  // Resolve all prompts and embed in parallel
506
580
  const resolved = await Promise.all(prompts.map((p) => this.resolvePrompt(p)));
@@ -1233,3 +1307,17 @@ class SemanticCache {
1233
1307
  }
1234
1308
  }
1235
1309
  exports.SemanticCache = SemanticCache;
1310
+ // --- Judge helpers ---
1311
+ class JudgeTimeoutError extends Error {
1312
+ constructor() {
1313
+ super('judgeFn timed out');
1314
+ this.name = 'JudgeTimeoutError';
1315
+ }
1316
+ }
1317
+ function raceWithTimeout(p, timeoutMs) {
1318
+ let timer;
1319
+ const timeout = new Promise((_, reject) => {
1320
+ timer = setTimeout(() => reject(new JudgeTimeoutError()), timeoutMs);
1321
+ });
1322
+ return Promise.race([p, timeout]).finally(() => clearTimeout(timer));
1323
+ }
package/dist/index.d.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  export { SemanticCache } from './SemanticCache';
2
2
  export type { ThresholdEffectivenessResult } from './SemanticCache';
3
3
  export { DEFAULT_COST_TABLE } from './defaultCostTable';
4
- export type { SemanticCacheOptions, CacheCheckOptions, CacheStoreOptions, CacheCheckResult, CacheStats, IndexInfo, InvalidateResult, CacheConfidence, EmbedFn, ModelCost, RerankOptions, ConfigRefreshOptions, } from './types';
4
+ export type { SemanticCacheOptions, CacheCheckOptions, CacheStoreOptions, CacheCheckResult, CacheStats, IndexInfo, InvalidateResult, CacheConfidence, EmbedFn, ModelCost, RerankOptions, JudgeOptions, ConfigRefreshOptions, } from './types';
5
5
  export { SemanticCacheUsageError, EmbeddingError, ValkeyCommandError, } from './errors';
6
6
  export type { ContentBlock, TextBlock, BinaryBlock, ToolCallBlock, ToolResultBlock, ReasoningBlock, BlockHints, } from './utils';
7
7
  export { escapeTag } from './utils';
@@ -15,6 +15,8 @@ interface CacheMetrics {
15
15
  staleModelEvictions: Counter;
16
16
  discoveryWriteFailed: Counter;
17
17
  configRefreshFailed: Counter;
18
+ judgeDecisions: Counter;
19
+ judgeDuration: Histogram;
18
20
  }
19
21
  export interface Telemetry {
20
22
  tracer: Tracer;
package/dist/telemetry.js CHANGED
@@ -67,6 +67,17 @@ function createTelemetry(opts) {
67
67
  help: 'Count of failed periodic config refreshes (HGETALL on __config).',
68
68
  labelNames: ['cache_name'],
69
69
  });
70
+ const judgeDecisions = getOrCreateCounter(registry, {
71
+ name: `${opts.prefix}_judge_decisions_total`,
72
+ help: 'LLM-as-judge decisions for borderline cache hits',
73
+ labelNames: ['cache_name', 'category', 'decision'],
74
+ });
75
+ const judgeDuration = getOrCreateHistogram(registry, {
76
+ name: `${opts.prefix}_judge_duration_seconds`,
77
+ help: 'Wall-clock duration of judgeFn invocations',
78
+ labelNames: ['cache_name', 'category', 'decision'],
79
+ buckets: [0.05, 0.1, 0.25, 0.5, 1, 2, 5],
80
+ });
70
81
  return {
71
82
  tracer,
72
83
  metrics: {
@@ -79,6 +90,8 @@ function createTelemetry(opts) {
79
90
  staleModelEvictions,
80
91
  discoveryWriteFailed,
81
92
  configRefreshFailed,
93
+ judgeDecisions,
94
+ judgeDuration,
82
95
  },
83
96
  };
84
97
  }
package/dist/types.d.ts CHANGED
@@ -129,6 +129,61 @@ export interface RerankOptions {
129
129
  similarity: number;
130
130
  }>) => Promise<number>;
131
131
  }
132
+ /**
133
+ * LLM-as-judge adjudication for borderline cache hits.
134
+ *
135
+ * When set on CacheCheckOptions, a hit whose cosine distance lands in the
136
+ * uncertainty band (threshold - uncertaintyBand < score <= threshold) is
137
+ * passed to judgeFn before being returned. The judge accepts (promotes the
138
+ * hit to confidence: 'high') or rejects (treats it as a miss with
139
+ * nearestMiss populated).
140
+ *
141
+ * The judge is NOT invoked for:
142
+ * - high-confidence hits (score <= threshold - uncertaintyBand)
143
+ * - misses (score > threshold)
144
+ * - the no-candidates case (FT.SEARCH returned zero rows)
145
+ *
146
+ * When rerank is also set, the judge runs on the reranked pick, not the
147
+ * original top-1.
148
+ */
149
+ export interface JudgeOptions {
150
+ /**
151
+ * Function that decides whether a borderline cache hit is acceptable.
152
+ * Return true to accept (caller receives confidence: 'high').
153
+ * Return false to reject (caller receives a miss with nearestMiss).
154
+ *
155
+ * The function receives the original prompt text (or the resolved text
156
+ * portion of a multipart prompt), the cached response, the cosine distance,
157
+ * the effective threshold, and the category if one was supplied to check().
158
+ */
159
+ judgeFn: (input: {
160
+ prompt: string;
161
+ response: string;
162
+ similarity: number;
163
+ threshold: number;
164
+ category: string | undefined;
165
+ }) => Promise<boolean>;
166
+ /**
167
+ * Behavior when judgeFn throws or exceeds timeoutMs.
168
+ * 'accept' - return the cached response with confidence: 'uncertain'
169
+ * (current pre-judge behavior, fail-open).
170
+ * 'reject' - treat as a miss (fail-closed).
171
+ * Default: 'accept'.
172
+ */
173
+ onError?: 'accept' | 'reject';
174
+ /**
175
+ * Per-call timeout in milliseconds. Default: 2000.
176
+ * The judge function is raced against this timeout; timeout is treated
177
+ * the same as a thrown error and routed through onError.
178
+ *
179
+ * Note: the underlying promise is not cancelled on timeout — JavaScript has
180
+ * no built-in cancellation primitive. A real LLM HTTP request will continue
181
+ * running in the background after the timeout fires, consuming API quota.
182
+ * To stop the underlying request, use an AbortController inside judgeFn and
183
+ * abort it when the signal you manage fires.
184
+ */
185
+ timeoutMs?: number;
186
+ }
132
187
  export interface CacheCheckOptions {
133
188
  /** Per-request threshold override (cosine distance 0-2). Highest priority. */
134
189
  threshold?: number;
@@ -167,6 +222,11 @@ export interface CacheCheckOptions {
167
222
  * in rerankFn yourself.
168
223
  */
169
224
  rerank?: RerankOptions;
225
+ /**
226
+ * Optional LLM-as-judge adjudication for borderline hits.
227
+ * See JudgeOptions. Ignored on checkBatch() - call check() per prompt instead.
228
+ */
229
+ judge?: JudgeOptions;
170
230
  }
171
231
  export interface CacheStoreOptions {
172
232
  /** Per-entry TTL in seconds. Overrides SemanticCacheOptions.defaultTtl. */
@@ -223,10 +283,19 @@ export interface CacheCheckResult {
223
283
  /**
224
284
  * On a miss where a candidate existed but didn't clear the threshold,
225
285
  * describes how close it was. Useful for threshold tuning.
286
+ *
287
+ * Note: when the miss originates from a judge rejection, `deltaToThreshold`
288
+ * will be <= 0 because the score did clear the threshold — the judge said no.
289
+ * Existing non-judge misses always produce deltaToThreshold > 0.
290
+ * Use `deltaToThreshold <= 0` to detect judge-originated misses.
226
291
  */
227
292
  nearestMiss?: {
228
293
  similarity: number;
229
294
  deltaToThreshold: number;
295
+ /** The effective threshold that was applied. Present on judge-rejection misses. */
296
+ threshold?: number;
297
+ /** The Valkey key of the entry that was rejected. Present on judge-rejection misses. */
298
+ matchedKey?: string;
230
299
  };
231
300
  /**
232
301
  * Estimated cost saved (in dollars) by returning this cached result instead of calling the LLM.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@betterdb/semantic-cache",
3
- "version": "0.4.0",
3
+ "version": "0.5.0",
4
4
  "description": "Valkey-native semantic cache for LLM applications with built-in OpenTelemetry and Prometheus instrumentation",
5
5
  "keywords": [
6
6
  "valkey",