npm - @betterdb/semantic-cache - Versions diffs - 0.4.0 → 0.5.0 - Mend

@betterdb/semantic-cache 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -96,6 +96,54 @@ A lookup is a **hit** when `score <= threshold`. The default threshold is `0.1`.
 | Conversational / RAG | `0.15` | Paraphrases hit as `high` confidence |
 | Broad search / recall | `0.20` | High hit rate, review uncertain hits |
+## LLM-as-judge
+When a hit lands in the uncertainty band (`threshold - uncertaintyBand < score <= threshold`), you can supply a `judgeFn` to adjudicate automatically instead of handling `confidence: 'uncertain'` yourself.
+```typescript
+const result = await cache.check(userPrompt, {
+  judge: {
+    judgeFn: async ({ prompt, response, similarity, threshold, category }) => {
+      // Return true to accept (confidence → 'high')
+      // Return false to reject (treated as miss with nearestMiss)
+      const verdict = await openai.chat.completions.create({
+        model: 'gpt-5-mini',
+        messages: [
+          { role: 'system', content: 'Reply YES or NO only.' },
+          { role: 'user', content: `Does this cached response correctly answer the prompt?\nPrompt: ${prompt}\nResponse: ${response}` },
+        ],
+      });
+      return verdict.choices[0].message.content?.startsWith('YES') ?? false;
+    },
+    onError: 'accept',  // fail-open on judge errors (default)
+    timeoutMs: 2000,    // per-call timeout (default)
+  },
+});
+```
+**When the judge is invoked:** only for `confidence === 'uncertain'` hits. High-confidence hits, misses, and the zero-candidates case bypass the judge entirely.
+**Accept path:** `result.hit === true`, `result.confidence === 'high'`.
+**Reject path:** `result.hit === false`, `result.nearestMiss` populated with `deltaToThreshold <= 0` (use this to distinguish judge rejections from regular misses where `deltaToThreshold > 0`).
+**Composing with rerank:** when both `rerank` and `judge` are set, the judge receives the reranked pick's response and similarity score.
+**`checkBatch()` does not support `judge`.** Call `check()` individually for prompts that need adjudication.
+### CacheCheckOptions reference
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `threshold` | `number` | `defaultThreshold` | Per-request cosine distance threshold override |
+| `category` | `string` | — | Category tag for per-category thresholds and metric labels |
+| `filter` | `string` | — | FT.SEARCH pre-filter expression (trusted input only) |
+| `k` | `number` | `1` | KNN neighbours to fetch (ignored when `rerank` is set) |
+| `staleAfterModelChange` | `boolean` | `false` | Evict and miss when stored model differs from `currentModel` |
+| `currentModel` | `string` | — | Model to compare against stored entries |
+| `rerank` | `RerankOptions` | — | Rerank hook; see `RerankOptions` |
+| `judge` | `JudgeOptions` | — | LLM-as-judge for borderline hits; see `JudgeOptions`. Not supported by `checkBatch()`; throws `SemanticCacheUsageError` |
 ## Configuration Reference
 | Option | Type | Default | Description |

package/dist/SemanticCache.js CHANGED Viewed

@@ -314,14 +314,85 @@ class SemanticCache {
                     return { hit: false, confidence: 'miss' };
                 }
             }
-            // All checks passed — record as a genuine hit
+            // All checks passed — compute confidence (recordSimilarityWindow moves to after judge)
+            let confidence = winnerScore >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
+            const matchedKey = winner.key;
+            // --- LLM-as-judge for borderline hits ---
+            if (options?.judge && confidence === 'uncertain') {
+                const judgeStart = performance.now();
+                const timeoutMs = options.judge.timeoutMs ?? 2000;
+                const onError = options.judge.onError ?? 'accept';
+                let decision;
+                try {
+                    const accepted = await raceWithTimeout(options.judge.judgeFn({
+                        prompt: promptText,
+                        response: winner.fields['response'] ?? '',
+                        similarity: winnerScore,
+                        threshold,
+                        category: category || undefined,
+                    }), timeoutMs);
+                    decision = accepted ? 'accept' : 'reject';
+                }
+                catch (err) {
+                    const isTimeout = err instanceof JudgeTimeoutError;
+                    if (onError === 'accept') {
+                        decision = isTimeout ? 'timeout_accept' : 'error_accept';
+                    }
+                    else {
+                        decision = isTimeout ? 'timeout_reject' : 'error_reject';
+                    }
+                }
+                const judgeSec = (performance.now() - judgeStart) / 1000;
+                this.telemetry.metrics.judgeDecisions
+                    .labels({ cache_name: this.name, category: categoryLabel, decision })
+                    .inc();
+                this.telemetry.metrics.judgeDuration
+                    .labels({ cache_name: this.name, category: categoryLabel, decision })
+                    .observe(judgeSec);
+                span.setAttributes({
+                    'cache.judge.invoked': true,
+                    'cache.judge.decision': decision,
+                    'cache.judge.latency_ms': judgeSec * 1000,
+                });
+                if (decision === 'accept') {
+                    confidence = 'high';
+                    // Fall through to hit-return path
+                }
+                else if (decision === 'error_accept' || decision === 'timeout_accept') {
+                    // Preserve 'uncertain'; fall through to hit-return path
+                }
+                else {
+                    // reject / error_reject / timeout_reject → treat as miss
+                    await this.recordSimilarityWindow(winnerScore, 'miss', category);
+                    await this.recordStat('misses');
+                    this.telemetry.metrics.requestsTotal
+                        .labels({ cache_name: this.name, result: 'miss', category: categoryLabel })
+                        .inc();
+                    span.setAttributes({
+                        'cache.hit': false,
+                        'cache.name': this.name,
+                        'cache.category': categoryLabel,
+                    });
+                    return {
+                        hit: false,
+                        confidence: 'miss',
+                        similarity: winnerScore,
+                        nearestMiss: {
+                            similarity: winnerScore,
+                            threshold,
+                            deltaToThreshold: winnerScore - threshold,
+                            matchedKey,
+                        },
+                    };
+                }
+            }
+            // --- End judge ---
+            // Record as genuine hit (moved here from before the judge block)
             await this.recordSimilarityWindow(winnerScore, 'hit', category);
-            const confidence = winnerScore >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
             await this.recordStat('hits');
             const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
             this.telemetry.metrics.requestsTotal
                 .labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
-            const matchedKey = winner.key;
             if (this.defaultTtl !== undefined && matchedKey) {
                 await this.client.expire(matchedKey, this.defaultTtl);
             }
@@ -501,6 +572,9 @@ class SemanticCache {
         if (options?.staleAfterModelChange) {
             throw new errors_1.SemanticCacheUsageError("checkBatch() does not support 'staleAfterModelChange'. Use check() for stale-model eviction.");
         }
+        if (options?.judge) {
+            throw new errors_1.SemanticCacheUsageError("checkBatch() does not support the 'judge' option. Use check() for LLM-as-judge adjudication.");
+        }
         return this.traced('checkBatch', async (span) => {
             // Resolve all prompts and embed in parallel
             const resolved = await Promise.all(prompts.map((p) => this.resolvePrompt(p)));
@@ -1233,3 +1307,17 @@ class SemanticCache {
     }
 }
 exports.SemanticCache = SemanticCache;
+// --- Judge helpers ---
+class JudgeTimeoutError extends Error {
+    constructor() {
+        super('judgeFn timed out');
+        this.name = 'JudgeTimeoutError';
+    }
+}
+function raceWithTimeout(p, timeoutMs) {
+    let timer;
+    const timeout = new Promise((_, reject) => {
+        timer = setTimeout(() => reject(new JudgeTimeoutError()), timeoutMs);
+    });
+    return Promise.race([p, timeout]).finally(() => clearTimeout(timer));
+}

package/dist/index.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 export { SemanticCache } from './SemanticCache';
 export type { ThresholdEffectivenessResult } from './SemanticCache';
 export { DEFAULT_COST_TABLE } from './defaultCostTable';
-export type { SemanticCacheOptions, CacheCheckOptions, CacheStoreOptions, CacheCheckResult, CacheStats, IndexInfo, InvalidateResult, CacheConfidence, EmbedFn, ModelCost, RerankOptions, ConfigRefreshOptions, } from './types';
+export type { SemanticCacheOptions, CacheCheckOptions, CacheStoreOptions, CacheCheckResult, CacheStats, IndexInfo, InvalidateResult, CacheConfidence, EmbedFn, ModelCost, RerankOptions, JudgeOptions, ConfigRefreshOptions, } from './types';
 export { SemanticCacheUsageError, EmbeddingError, ValkeyCommandError, } from './errors';
 export type { ContentBlock, TextBlock, BinaryBlock, ToolCallBlock, ToolResultBlock, ReasoningBlock, BlockHints, } from './utils';
 export { escapeTag } from './utils';

package/dist/telemetry.d.ts CHANGED Viewed

@@ -15,6 +15,8 @@ interface CacheMetrics {
     staleModelEvictions: Counter;
     discoveryWriteFailed: Counter;
     configRefreshFailed: Counter;
+    judgeDecisions: Counter;
+    judgeDuration: Histogram;
 }
 export interface Telemetry {
     tracer: Tracer;

package/dist/telemetry.js CHANGED Viewed

@@ -67,6 +67,17 @@ function createTelemetry(opts) {
         help: 'Count of failed periodic config refreshes (HGETALL on __config).',
         labelNames: ['cache_name'],
     });
+    const judgeDecisions = getOrCreateCounter(registry, {
+        name: `${opts.prefix}_judge_decisions_total`,
+        help: 'LLM-as-judge decisions for borderline cache hits',
+        labelNames: ['cache_name', 'category', 'decision'],
+    });
+    const judgeDuration = getOrCreateHistogram(registry, {
+        name: `${opts.prefix}_judge_duration_seconds`,
+        help: 'Wall-clock duration of judgeFn invocations',
+        labelNames: ['cache_name', 'category', 'decision'],
+        buckets: [0.05, 0.1, 0.25, 0.5, 1, 2, 5],
+    });
     return {
         tracer,
         metrics: {
@@ -79,6 +90,8 @@ function createTelemetry(opts) {
             staleModelEvictions,
             discoveryWriteFailed,
             configRefreshFailed,
+            judgeDecisions,
+            judgeDuration,
         },
     };
 }

package/dist/types.d.ts CHANGED Viewed

@@ -129,6 +129,61 @@ export interface RerankOptions {
         similarity: number;
     }>) => Promise<number>;
 }
+/**
+ * LLM-as-judge adjudication for borderline cache hits.
+ *
+ * When set on CacheCheckOptions, a hit whose cosine distance lands in the
+ * uncertainty band (threshold - uncertaintyBand < score <= threshold) is
+ * passed to judgeFn before being returned. The judge accepts (promotes the
+ * hit to confidence: 'high') or rejects (treats it as a miss with
+ * nearestMiss populated).
+ *
+ * The judge is NOT invoked for:
+ *   - high-confidence hits (score <= threshold - uncertaintyBand)
+ *   - misses (score > threshold)
+ *   - the no-candidates case (FT.SEARCH returned zero rows)
+ *
+ * When rerank is also set, the judge runs on the reranked pick, not the
+ * original top-1.
+ */
+export interface JudgeOptions {
+    /**
+     * Function that decides whether a borderline cache hit is acceptable.
+     * Return true to accept (caller receives confidence: 'high').
+     * Return false to reject (caller receives a miss with nearestMiss).
+     *
+     * The function receives the original prompt text (or the resolved text
+     * portion of a multipart prompt), the cached response, the cosine distance,
+     * the effective threshold, and the category if one was supplied to check().
+     */
+    judgeFn: (input: {
+        prompt: string;
+        response: string;
+        similarity: number;
+        threshold: number;
+        category: string | undefined;
+    }) => Promise<boolean>;
+    /**
+     * Behavior when judgeFn throws or exceeds timeoutMs.
+     *   'accept' - return the cached response with confidence: 'uncertain'
+     *              (current pre-judge behavior, fail-open).
+     *   'reject' - treat as a miss (fail-closed).
+     * Default: 'accept'.
+     */
+    onError?: 'accept' | 'reject';
+    /**
+     * Per-call timeout in milliseconds. Default: 2000.
+     * The judge function is raced against this timeout; timeout is treated
+     * the same as a thrown error and routed through onError.
+     *
+     * Note: the underlying promise is not cancelled on timeout — JavaScript has
+     * no built-in cancellation primitive. A real LLM HTTP request will continue
+     * running in the background after the timeout fires, consuming API quota.
+     * To stop the underlying request, use an AbortController inside judgeFn and
+     * abort it when the signal you manage fires.
+     */
+    timeoutMs?: number;
+}
 export interface CacheCheckOptions {
     /** Per-request threshold override (cosine distance 0-2). Highest priority. */
     threshold?: number;
@@ -167,6 +222,11 @@ export interface CacheCheckOptions {
      * in rerankFn yourself.
      */
     rerank?: RerankOptions;
+    /**
+     * Optional LLM-as-judge adjudication for borderline hits.
+     * See JudgeOptions. Ignored on checkBatch() - call check() per prompt instead.
+     */
+    judge?: JudgeOptions;
 }
 export interface CacheStoreOptions {
     /** Per-entry TTL in seconds. Overrides SemanticCacheOptions.defaultTtl. */
@@ -223,10 +283,19 @@ export interface CacheCheckResult {
     /**
      * On a miss where a candidate existed but didn't clear the threshold,
      * describes how close it was. Useful for threshold tuning.
+     *
+     * Note: when the miss originates from a judge rejection, `deltaToThreshold`
+     * will be <= 0 because the score did clear the threshold — the judge said no.
+     * Existing non-judge misses always produce deltaToThreshold > 0.
+     * Use `deltaToThreshold <= 0` to detect judge-originated misses.
      */
     nearestMiss?: {
         similarity: number;
         deltaToThreshold: number;
+        /** The effective threshold that was applied. Present on judge-rejection misses. */
+        threshold?: number;
+        /** The Valkey key of the entry that was rejected. Present on judge-rejection misses. */
+        matchedKey?: string;
     };
     /**
      * Estimated cost saved (in dollars) by returning this cached result instead of calling the LLM.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@betterdb/semantic-cache",
-  "version": "0.4.0",
+  "version": "0.5.0",
   "description": "Valkey-native semantic cache for LLM applications with built-in OpenTelemetry and Prometheus instrumentation",
   "keywords": [
     "valkey",