npm - @betterdb/semantic-cache - Versions diffs - 0.1.0 → 0.2.0 - Mend

@betterdb/semantic-cache 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/README.md +211 -128
package/dist/SemanticCache.d.ts +85 -5
package/dist/SemanticCache.js +689 -47
package/dist/adapters/ai.js +6 -1
package/dist/adapters/anthropic.d.ts +32 -0
package/dist/adapters/anthropic.js +94 -0
package/dist/adapters/langchain.js +6 -1
package/dist/adapters/langgraph.d.ts +104 -0
package/dist/adapters/langgraph.js +271 -0
package/dist/adapters/llamaindex.d.ts +32 -0
package/dist/adapters/llamaindex.js +76 -0
package/dist/adapters/openai-responses.d.ts +31 -0
package/dist/adapters/openai-responses.js +112 -0
package/dist/adapters/openai.d.ts +42 -0
package/dist/adapters/openai.js +97 -0
package/dist/analytics.d.ts +24 -0
package/dist/analytics.js +116 -0
package/dist/cluster.d.ts +10 -0
package/dist/cluster.js +43 -0
package/dist/defaultCostTable.d.ts +11 -0
package/dist/defaultCostTable.js +1976 -0
package/dist/embed/bedrock.d.ts +32 -0
package/dist/embed/bedrock.js +109 -0
package/dist/embed/cohere.d.ts +34 -0
package/dist/embed/cohere.js +37 -0
package/dist/embed/ollama.d.ts +30 -0
package/dist/embed/ollama.js +24 -0
package/dist/embed/openai.d.ts +31 -0
package/dist/embed/openai.js +66 -0
package/dist/embed/voyage.d.ts +31 -0
package/dist/embed/voyage.js +32 -0
package/dist/index.d.ts +6 -1
package/dist/index.js +11 -1
package/dist/normalizer.d.ts +68 -0
package/dist/normalizer.js +102 -0
package/dist/telemetry.d.ts +3 -0
package/dist/telemetry.js +18 -0
package/dist/types.d.ts +107 -7
package/dist/utils.d.ts +58 -0
package/dist/utils.js +30 -0
package/package.json +81 -6

package/dist/SemanticCache.js CHANGED Viewed

@@ -2,10 +2,14 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.SemanticCache = void 0;
 const node_crypto_1 = require("node:crypto");
+const node_crypto_2 = require("node:crypto");
 const api_1 = require("@opentelemetry/api");
 const errors_1 = require("./errors");
 const telemetry_1 = require("./telemetry");
 const utils_1 = require("./utils");
+const defaultCostTable_1 = require("./defaultCostTable");
+const cluster_1 = require("./cluster");
+const analytics_1 = require("./analytics");
 const INVALIDATE_BATCH_SIZE = 1000;
 function errMsg(err) {
     return err instanceof Error ? err.message : String(err);
@@ -17,15 +21,27 @@ class SemanticCache {
     indexName;
     entryPrefix;
     statsKey;
+    similarityWindowKey;
     defaultThreshold;
     defaultTtl;
     categoryThresholds;
     uncertaintyBand;
     telemetry;
+    costTable;
+    embeddingCacheEnabled;
+    embeddingCacheTtl;
+    embedKeyPrefix;
     _initialized = false;
     _dimension = 0;
+    _hasBinaryRefs = false;
     _initPromise = null;
     _initGeneration = 0;
+    analyticsOpts;
+    usesDefaultCostTable;
+    analytics = analytics_1.NOOP_ANALYTICS;
+    statsTimer;
+    shutdownCalled = false;
+    analyticsInitiated = false;
     /**
      * Creates a new SemanticCache instance.
      *
@@ -42,17 +58,35 @@ class SemanticCache {
         this.indexName = `${this.name}:idx`;
         this.entryPrefix = `${this.name}:entry:`;
         this.statsKey = `${this.name}:__stats`;
+        this.similarityWindowKey = `${this.name}:__similarity_window`;
+        this.embedKeyPrefix = `${this.name}:embed:`;
         this.defaultThreshold = options.defaultThreshold ?? 0.1;
         this.defaultTtl = options.defaultTtl;
         this.categoryThresholds = options.categoryThresholds ?? {};
         this.uncertaintyBand = options.uncertaintyBand ?? 0.05;
+        // Build effective cost table
+        const useDefault = options.useDefaultCostTable ?? true;
+        if (!useDefault && !options.costTable) {
+            this.costTable = undefined;
+        }
+        else if (!useDefault) {
+            this.costTable = options.costTable;
+        }
+        else {
+            this.costTable = { ...defaultCostTable_1.DEFAULT_COST_TABLE, ...(options.costTable ?? {}) };
+        }
+        // Embedding cache config
+        this.embeddingCacheEnabled = options.embeddingCache?.enabled ?? true;
+        this.embeddingCacheTtl = options.embeddingCache?.ttl ?? 86400;
         this.telemetry = (0, telemetry_1.createTelemetry)({
             prefix: options.telemetry?.metricsPrefix ?? 'semantic_cache',
             tracerName: options.telemetry?.tracerName ?? '@betterdb/semantic-cache',
             registry: options.telemetry?.registry,
         });
+        this.analyticsOpts = options.analytics;
+        this.usesDefaultCostTable = useDefault;
     }
-    // ── Lifecycle ──────────────────────────────────────────────
+    // -- Lifecycle --
     async initialize() {
         if (!this._initPromise) {
             this._initPromise = this._doInitialize().catch((err) => {
@@ -65,7 +99,6 @@ class SemanticCache {
     async flush() {
         // Mark uninitialized immediately so concurrent check()/store() calls get
         // a clear SemanticCacheUsageError instead of cryptic Valkey errors.
-        // Bump generation so any in-flight _doInitialize() won't overwrite this state.
         this._initialized = false;
         this._initPromise = null;
         this._initGeneration++;
@@ -79,33 +112,59 @@ class SemanticCache {
                 throw new errors_1.ValkeyCommandError('FT.DROPINDEX', err);
             }
         }
-        const entryPattern = `${this.name}:entry:*`;
-        let cursor = '0';
-        do {
-            const [nextCursor, keys] = await this.client.scan(cursor, 'MATCH', entryPattern, 'COUNT', '100');
-            cursor = nextCursor;
-            if (keys.length > 0)
-                await this.client.del(keys);
-        } while (cursor !== '0');
+        // Cluster-aware SCAN for entry keys and embed cache keys
+        const patterns = [
+            `${this.name}:entry:*`,
+            `${this.name}:embed:*`,
+        ];
+        for (const pattern of patterns) {
+            await (0, cluster_1.clusterScan)(this.client, pattern, async (keys, nodeClient) => {
+                await nodeClient.del(keys);
+            });
+        }
         await this.client.del(this.statsKey);
+        await this.client.del(this.similarityWindowKey);
+        this.analytics.capture('cache_flush');
+    }
+    /** Shut down the analytics client and cancel the stats timer. */
+    async shutdown() {
+        this.shutdownCalled = true;
+        if (this.statsTimer) {
+            clearInterval(this.statsTimer);
+            this.statsTimer = undefined;
+        }
+        await this.analytics.shutdown();
     }
-    // ── Public operations ──────────────────────────────────────
+    // -- Public operations --
     async check(prompt, options) {
         this.assertInitialized('check');
         return this.traced('check', async (span) => {
             const category = options?.category ?? '';
-            const k = options?.k ?? 1;
             const threshold = options?.threshold ??
                 (category && this.categoryThresholds[category] !== undefined
                     ? this.categoryThresholds[category]
                     : this.defaultThreshold);
-            const { vector: embedding, durationSec: embedSec } = await this.embed(prompt);
+            // Resolve text and binary refs from prompt
+            const { text: promptText, binaryRefs } = await this.resolvePrompt(prompt);
+            // Stale model detection
+            const checkStale = (options?.staleAfterModelChange ?? false) && !!options?.currentModel;
+            // Rerank option
+            const rerankOpts = options?.rerank;
+            const k = rerankOpts ? rerankOpts.k : (options?.k ?? 1);
+            const { vector: embedding, durationSec: embedSec } = await this.embed(promptText);
             this.assertDimension(embedding);
-            // FT.SEARCH — Valkey Search 1.2 rejects KNN aliases in RETURN/SORTBY,
-            // so we omit both. Results include all fields and are pre-sorted by distance.
+            // Build filter
+            const userFilter = options?.filter;
+            // AND semantics: each ref must be present — chain separate TAG clauses.
+            const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
+                ? (binaryRefs.length === 1
+                    ? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
+                    : binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' '))
+                : null;
+            const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
+            const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
+            const query = `${filterExpr}=>[KNN ${k} @embedding $vec AS __score]`;
             const searchStart = performance.now();
-            const filter = options?.filter;
-            const query = `${filter ? `(${filter})` : '*'}=>[KNN ${k} @embedding $vec AS __score]`;
             let rawResult;
             try {
                 rawResult = await this.client.call('FT.SEARCH', this.indexName, query, 'PARAMS', '2', 'vec', (0, utils_1.encodeFloat32)(embedding), 'LIMIT', '0', String(k), 'DIALECT', '2');
@@ -136,6 +195,9 @@ class SemanticCache {
             }
             // Miss (no usable score, or score exceeds threshold)
             if (isNaN(score) || score > threshold) {
+                if (!isNaN(score)) {
+                    await this.recordSimilarityWindow(score, 'miss', category);
+                }
                 await this.recordStat('misses');
                 this.telemetry.metrics.requestsTotal
                     .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
@@ -151,42 +213,148 @@ class SemanticCache {
                 }
                 return result;
             }
-            // Hit
-            const confidence = score >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
+            // Rerank: apply rerankFn to all candidates above threshold
+            let winnerParsedIndex = 0;
+            if (rerankOpts && parsed.length > 0) {
+                // Preserve the original parsed[] index alongside each candidate so we
+                // can map back even when NaN-scored entries are filtered out.
+                const indexedCandidates = parsed
+                    .map((r, i) => ({ i, s: parseFloat(r.fields['__score'] ?? 'NaN') }))
+                    .filter(({ s }) => !isNaN(s))
+                    .map(({ i, s }) => ({
+                    origIdx: i,
+                    candidate: { response: parsed[i].fields['response'] ?? '', similarity: s },
+                }));
+                const picked = await rerankOpts.rerankFn(promptText, indexedCandidates.map((x) => x.candidate));
+                // Explicit bounds check: -1 means "reject all"; out-of-range is a caller bug
+                // treated as a miss rather than silently falling back to the top candidate.
+                if (picked === -1 || picked < 0 || picked >= indexedCandidates.length) {
+                    await this.recordSimilarityWindow(score, 'miss', category);
+                    await this.recordStat('misses');
+                    this.telemetry.metrics.requestsTotal
+                        .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
+                    span.setAttributes({ 'cache.hit': false, 'cache.name': this.name, 'cache.reranked': true });
+                    return { hit: false, confidence: 'miss' };
+                }
+                // Map back to the original parsed[] index (not the candidates[] index)
+                winnerParsedIndex = indexedCandidates[picked].origIdx;
+            }
+            const winner = parsed[winnerParsedIndex] ?? parsed[0];
+            const winnerScore = parseFloat(winner.fields['__score'] ?? String(score));
+            // Stale model check: if winner's model differs from currentModel, evict and treat as miss
+            if (checkStale) {
+                const storedModel = winner.fields['model'] ?? '';
+                if (storedModel && storedModel !== options.currentModel) {
+                    // Evict stale entry
+                    try {
+                        await this.client.del(winner.key);
+                    }
+                    catch { /* best effort */ }
+                    await this.recordSimilarityWindow(winnerScore, 'miss', category);
+                    this.telemetry.metrics.staleModelEvictions.labels({ cache_name: this.name }).inc();
+                    await this.recordStat('misses');
+                    this.telemetry.metrics.requestsTotal
+                        .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
+                    span.setAttributes({ 'cache.hit': false, 'cache.stale_evicted': true });
+                    return { hit: false, confidence: 'miss' };
+                }
+            }
+            // All checks passed — record as a genuine hit
+            await this.recordSimilarityWindow(winnerScore, 'hit', category);
+            const confidence = winnerScore >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
             await this.recordStat('hits');
             const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
             this.telemetry.metrics.requestsTotal
                 .labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
-            const matchedKey = parsed[0].key;
+            const matchedKey = winner.key;
             if (this.defaultTtl !== undefined && matchedKey) {
                 await this.client.expire(matchedKey, this.defaultTtl);
             }
+            // Cost saved
+            let costSaved;
+            const costMicrosStr = winner.fields['cost_micros'];
+            if (costMicrosStr) {
+                const costMicros = parseInt(costMicrosStr, 10);
+                if (!isNaN(costMicros) && costMicros > 0) {
+                    costSaved = costMicros / 1_000_000;
+                    // Atomically increment cost_saved_micros in stats
+                    await this.client.hincrby(this.statsKey, 'cost_saved_micros', costMicros);
+                    this.telemetry.metrics.costSavedTotal
+                        .labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
+                }
+            }
+            // Content blocks
+            let contentBlocks;
+            const contentBlocksStr = winner.fields['content_blocks'];
+            if (contentBlocksStr) {
+                try {
+                    contentBlocks = JSON.parse(contentBlocksStr);
+                }
+                catch { /* ignore parse errors */ }
+            }
             span.setAttributes({
-                'cache.hit': true, 'cache.similarity': score, 'cache.threshold': threshold,
+                'cache.hit': true, 'cache.similarity': winnerScore, 'cache.threshold': threshold,
                 'cache.confidence': confidence, 'cache.matched_key': matchedKey,
                 'cache.category': categoryLabel, ...timingAttrs,
             });
-            return {
-                hit: true, response: parsed[0].fields['response'],
-                similarity: score, confidence, matchedKey,
+            const result = {
+                hit: true, response: winner.fields['response'],
+                similarity: winnerScore, confidence, matchedKey,
             };
+            if (costSaved !== undefined)
+                result.costSaved = costSaved;
+            if (contentBlocks)
+                result.contentBlocks = contentBlocks;
+            return result;
         });
     }
     async store(prompt, response, options) {
         this.assertInitialized('store');
         return this.traced('store', async (span) => {
-            const { vector: embedding, durationSec: embedSec } = await this.embed(prompt);
+            const { text: promptText, binaryRefs } = await this.resolvePrompt(prompt);
+            const { vector: embedding, durationSec: embedSec } = await this.embed(promptText);
             this.assertDimension(embedding);
             const entryKey = `${this.entryPrefix}${(0, node_crypto_1.randomUUID)()}`;
             const category = options?.category ?? '';
             const model = options?.model ?? '';
+            // Compute cost if tokens and model provided
+            let costMicros;
+            if (options?.model &&
+                options?.inputTokens !== undefined &&
+                options?.outputTokens !== undefined &&
+                this.costTable) {
+                const pricing = this.costTable[options.model];
+                if (pricing) {
+                    costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
+                        options.outputTokens * pricing.outputPer1k / 1000) * 1_000_000);
+                }
+            }
+            const hashFields = {
+                prompt: promptText,
+                response,
+                model,
+                category,
+                inserted_at: Date.now().toString(),
+                metadata: JSON.stringify(options?.metadata ?? {}),
+                embedding: (0, utils_1.encodeFloat32)(embedding),
+            };
+            if (binaryRefs.length > 0) {
+                hashFields['binary_refs'] = binaryRefs.join(',');
+            }
+            if (costMicros !== undefined && costMicros > 0) {
+                hashFields['cost_micros'] = String(costMicros);
+            }
+            if (options?.temperature !== undefined) {
+                hashFields['temperature'] = String(options.temperature);
+            }
+            if (options?.topP !== undefined) {
+                hashFields['top_p'] = String(options.topP);
+            }
+            if (options?.seed !== undefined) {
+                hashFields['seed'] = String(options.seed);
+            }
             try {
-                await this.client.hset(entryKey, {
-                    prompt, response, model, category,
-                    inserted_at: Date.now().toString(),
-                    metadata: JSON.stringify(options?.metadata ?? {}),
-                    embedding: (0, utils_1.encodeFloat32)(embedding),
-                });
+                await this.client.hset(entryKey, hashFields);
             }
             catch (err) {
                 throw new errors_1.ValkeyCommandError('HSET', err);
@@ -202,11 +370,195 @@ class SemanticCache {
             return entryKey;
         });
     }
+    /**
+     * Store structured content blocks as the cached response.
+     * Populates both the response field (from TextBlock text) and content_blocks (full JSON).
+     */
+    async storeMultipart(prompt, blocks, options) {
+        this.assertInitialized('storeMultipart');
+        return this.traced('storeMultipart', async (span) => {
+            const { text: promptText, binaryRefs } = await this.resolvePrompt(prompt);
+            const { vector: embedding, durationSec: embedSec } = await this.embed(promptText);
+            this.assertDimension(embedding);
+            // Derive text response from blocks for backward compat
+            const textResponse = (0, utils_1.extractText)(blocks);
+            const entryKey = `${this.entryPrefix}${(0, node_crypto_1.randomUUID)()}`;
+            const category = options?.category ?? '';
+            const model = options?.model ?? '';
+            let costMicros;
+            if (options?.model && options?.inputTokens !== undefined && options?.outputTokens !== undefined && this.costTable) {
+                const pricing = this.costTable[options.model];
+                if (pricing) {
+                    costMicros = Math.round((options.inputTokens * pricing.inputPer1k / 1000 +
+                        options.outputTokens * pricing.outputPer1k / 1000) * 1_000_000);
+                }
+            }
+            const hashFields = {
+                prompt: promptText,
+                response: textResponse,
+                model,
+                category,
+                inserted_at: Date.now().toString(),
+                metadata: JSON.stringify(options?.metadata ?? {}),
+                embedding: (0, utils_1.encodeFloat32)(embedding),
+                content_blocks: JSON.stringify(blocks),
+            };
+            if (binaryRefs.length > 0) {
+                hashFields['binary_refs'] = binaryRefs.join(',');
+            }
+            if (costMicros !== undefined && costMicros > 0) {
+                hashFields['cost_micros'] = String(costMicros);
+            }
+            if (options?.temperature !== undefined)
+                hashFields['temperature'] = String(options.temperature);
+            if (options?.topP !== undefined)
+                hashFields['top_p'] = String(options.topP);
+            if (options?.seed !== undefined)
+                hashFields['seed'] = String(options.seed);
+            try {
+                await this.client.hset(entryKey, hashFields);
+            }
+            catch (err) {
+                throw new errors_1.ValkeyCommandError('HSET', err);
+            }
+            const ttl = options?.ttl ?? this.defaultTtl;
+            if (ttl !== undefined)
+                await this.client.expire(entryKey, ttl);
+            span.setAttributes({
+                'cache.name': this.name, 'cache.key': entryKey, 'cache.ttl': ttl ?? -1,
+                'cache.category': category || 'none', 'cache.model': model || 'none',
+                'embedding_latency_ms': embedSec * 1000,
+            });
+            return entryKey;
+        });
+    }
+    /**
+     * Check multiple prompts in parallel, using pipelined FT.SEARCH calls.
+     * Returns results in input order.
+     */
+    async checkBatch(prompts, options) {
+        this.assertInitialized('checkBatch');
+        if (prompts.length === 0)
+            return [];
+        if (options?.rerank) {
+            throw new errors_1.SemanticCacheUsageError("checkBatch() does not support the 'rerank' option. Use check() for reranking individual prompts.");
+        }
+        if (options?.staleAfterModelChange) {
+            throw new errors_1.SemanticCacheUsageError("checkBatch() does not support 'staleAfterModelChange'. Use check() for stale-model eviction.");
+        }
+        return this.traced('checkBatch', async (span) => {
+            // Resolve all prompts and embed in parallel
+            const resolved = await Promise.all(prompts.map((p) => this.resolvePrompt(p)));
+            const embeddings = await Promise.all(resolved.map(({ text }) => this.embed(text)));
+            const category = options?.category ?? '';
+            const threshold = options?.threshold ??
+                (category && this.categoryThresholds[category] !== undefined
+                    ? this.categoryThresholds[category]
+                    : this.defaultThreshold);
+            const k = options?.k ?? 1;
+            const userFilter = options?.filter;
+            // Pipeline all FT.SEARCH calls
+            const pipeline = this.client.pipeline();
+            for (let i = 0; i < prompts.length; i++) {
+                const { binaryRefs } = resolved[i];
+                const { vector: embedding } = embeddings[i];
+                const binaryFilter = binaryRefs.length > 0 && this._hasBinaryRefs
+                    ? (binaryRefs.length === 1
+                        ? `@binary_refs:{${(0, utils_1.escapeTag)(binaryRefs[0])}}`
+                        : binaryRefs.map((r) => `@binary_refs:{${(0, utils_1.escapeTag)(r)}}`).join(' '))
+                    : null;
+                const combinedFilter = [userFilter, binaryFilter].filter(Boolean).join(' ');
+                const filterExpr = combinedFilter ? `(${combinedFilter})` : '*';
+                const query = `${filterExpr}=>[KNN ${k} @embedding $vec AS __score]`;
+                pipeline.call('FT.SEARCH', this.indexName, query, 'PARAMS', '2', 'vec', (0, utils_1.encodeFloat32)(embedding), 'LIMIT', '0', String(k), 'DIALECT', '2');
+            }
+            const pipelineResults = await pipeline.exec();
+            span.setAttributes({ 'cache.batch_size': prompts.length, 'cache.name': this.name });
+            const results = [];
+            const categoryLabel = category || 'none';
+            for (let i = 0; i < prompts.length; i++) {
+                const pipelineEntry = pipelineResults?.[i];
+                const err = pipelineEntry?.[0];
+                const rawResult = pipelineEntry?.[1];
+                if (err) {
+                    await this.recordStat('misses');
+                    this.telemetry.metrics.requestsTotal
+                        .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
+                    results.push({ hit: false, confidence: 'miss' });
+                    continue;
+                }
+                const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
+                if (parsed.length === 0) {
+                    await this.recordStat('misses');
+                    this.telemetry.metrics.requestsTotal
+                        .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
+                    results.push({ hit: false, confidence: 'miss' });
+                    continue;
+                }
+                const scoreStr = parsed[0].fields['__score'];
+                const score = scoreStr !== undefined ? parseFloat(scoreStr) : NaN;
+                if (isNaN(score) || score > threshold) {
+                    if (!isNaN(score)) {
+                        await this.recordSimilarityWindow(score, 'miss', category);
+                    }
+                    await this.recordStat('misses');
+                    this.telemetry.metrics.requestsTotal
+                        .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
+                    const result = { hit: false, confidence: 'miss' };
+                    if (!isNaN(score)) {
+                        result.similarity = score;
+                        result.nearestMiss = { similarity: score, deltaToThreshold: score - threshold };
+                    }
+                    results.push(result);
+                    continue;
+                }
+                await this.recordSimilarityWindow(score, 'hit', category);
+                const confidence = score >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
+                await this.recordStat('hits');
+                const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
+                this.telemetry.metrics.requestsTotal
+                    .labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
+                const matchedKey = parsed[0].key;
+                if (this.defaultTtl !== undefined && matchedKey) {
+                    await this.client.expire(matchedKey, this.defaultTtl);
+                }
+                let costSaved;
+                const costMicrosStr = parsed[0].fields['cost_micros'];
+                if (costMicrosStr) {
+                    const costMicros = parseInt(costMicrosStr, 10);
+                    if (!isNaN(costMicros) && costMicros > 0) {
+                        costSaved = costMicros / 1_000_000;
+                        await this.client.hincrby(this.statsKey, 'cost_saved_micros', costMicros);
+                        this.telemetry.metrics.costSavedTotal
+                            .labels({ cache_name: this.name, category: categoryLabel }).inc(costSaved);
+                    }
+                }
+                let contentBlocks;
+                const contentBlocksStr = parsed[0].fields['content_blocks'];
+                if (contentBlocksStr) {
+                    try {
+                        contentBlocks = JSON.parse(contentBlocksStr);
+                    }
+                    catch { /* ignore */ }
+                }
+                const result = {
+                    hit: true, response: parsed[0].fields['response'],
+                    similarity: score, confidence, matchedKey,
+                };
+                if (costSaved !== undefined)
+                    result.costSaved = costSaved;
+                if (contentBlocks)
+                    result.contentBlocks = contentBlocks;
+                results.push(result);
+            }
+            return results;
+        });
+    }
     /**
      * Deletes all entries matching a valkey-search filter expression.
      *
      * **Security note:** `filter` is passed directly to FT.SEARCH. Only pass
-     * trusted, programmatically-constructed expressions — never unsanitised
+     * trusted, programmatically-constructed expressions - never unsanitised
      * user input.
      */
     async invalidate(filter) {
@@ -242,13 +594,34 @@ class SemanticCache {
             return { deleted: keys.length, truncated };
         });
     }
+    /** Delete all entries tagged with the given model name. */
+    async invalidateByModel(model) {
+        let total = 0;
+        let result;
+        do {
+            result = await this.invalidate(`@model:{${(0, utils_1.escapeTag)(model)}}`);
+            total += result.deleted;
+        } while (result.truncated);
+        return total;
+    }
+    /** Delete all entries tagged with the given category. */
+    async invalidateByCategory(category) {
+        let total = 0;
+        let result;
+        do {
+            result = await this.invalidate(`@category:{${(0, utils_1.escapeTag)(category)}}`);
+            total += result.deleted;
+        } while (result.truncated);
+        return total;
+    }
     async stats() {
         this.assertInitialized('stats');
         const raw = await this.client.hgetall(this.statsKey);
-        const hits = parseInt(raw.hits ?? '0', 10);
-        const misses = parseInt(raw.misses ?? '0', 10);
-        const total = parseInt(raw.total ?? '0', 10);
-        return { hits, misses, total, hitRate: total === 0 ? 0 : hits / total };
+        const hits = parseInt(raw?.hits ?? '0', 10);
+        const misses = parseInt(raw?.misses ?? '0', 10);
+        const total = parseInt(raw?.total ?? '0', 10);
+        const costSavedMicros = parseInt(raw?.cost_saved_micros ?? '0', 10);
+        return { hits, misses, total, hitRate: total === 0 ? 0 : hits / total, costSavedMicros };
     }
     async indexInfo() {
         this.assertInitialized('indexInfo');
@@ -271,27 +644,217 @@ class SemanticCache {
         }
         return { name: this.indexName, numDocs, dimension: this._dimension, indexingState };
     }
-    // ── Private helpers ────────────────────────────────────────
+    /**
+     * Analyze the rolling similarity score window and recommend threshold adjustments.
+     */
+    async thresholdEffectiveness(options) {
+        this.assertInitialized('thresholdEffectiveness');
+        const minSamples = options?.minSamples ?? 100;
+        const category = options?.category;
+        const threshold = category && this.categoryThresholds[category] !== undefined
+            ? this.categoryThresholds[category]
+            : this.defaultThreshold;
+        // Read all window entries
+        let rawEntries;
+        try {
+            rawEntries = (await this.client.zrange(this.similarityWindowKey, '0', '-1'));
+        }
+        catch {
+            rawEntries = [];
+        }
+        // Parse and optionally filter by category
+        const entries = [];
+        for (const raw of rawEntries) {
+            try {
+                const entry = JSON.parse(String(raw));
+                if (typeof entry.score === 'number' &&
+                    (entry.result === 'hit' || entry.result === 'miss')) {
+                    if (!category || entry.category === category) {
+                        entries.push(entry);
+                    }
+                }
+            }
+            catch { /* skip corrupt entries */ }
+        }
+        const sampleCount = entries.length;
+        const categoryLabel = category ?? 'all';
+        if (sampleCount < minSamples) {
+            return {
+                category: categoryLabel,
+                sampleCount,
+                currentThreshold: threshold,
+                hitRate: 0,
+                uncertainHitRate: 0,
+                nearMissRate: 0,
+                avgHitSimilarity: 0,
+                avgMissSimilarity: 0,
+                recommendation: 'insufficient_data',
+                reasoning: `Only ${sampleCount} samples collected; ${minSamples} required for a reliable recommendation.`,
+            };
+        }
+        const hits = entries.filter((e) => e.result === 'hit');
+        const misses = entries.filter((e) => e.result === 'miss');
+        const hitRate = hits.length / sampleCount;
+        const uncertainHits = hits.filter((e) => e.score >= threshold - this.uncertaintyBand);
+        const uncertainHitRate = hits.length > 0 ? uncertainHits.length / hits.length : 0;
+        // Near-misses are scores just ABOVE the threshold (genuine close misses).
+        // Scores below the threshold recorded as misses (rerank rejection, stale eviction)
+        // must be excluded — they produce negative avgNearMissDelta, causing
+        // recommendedThreshold = threshold + negative < threshold, contradicting "loosen".
+        const nearMisses = misses.filter((e) => e.score > threshold && e.score <= threshold + 0.03);
+        const nearMissRate = misses.length > 0 ? nearMisses.length / misses.length : 0;
+        const avgHitSimilarity = hits.length > 0 ? hits.reduce((s, e) => s + e.score, 0) / hits.length : 0;
+        const avgMissSimilarity = misses.length > 0 ? misses.reduce((s, e) => s + e.score, 0) / misses.length : 0;
+        // avgNearMissDelta: how far above the threshold near-misses are on average
+        const avgNearMissDelta = nearMisses.length > 0
+            ? nearMisses.reduce((s, e) => s + (e.score - threshold), 0) / nearMisses.length
+            : 0;
+        let recommendation;
+        let recommendedThreshold;
+        let reasoning;
+        if (uncertainHitRate > 0.2) {
+            recommendation = 'tighten_threshold';
+            recommendedThreshold = Math.max(0, threshold - this.uncertaintyBand * 1.5);
+            reasoning = `${(uncertainHitRate * 100).toFixed(1)}% of hits are in the uncertainty band - tighten the threshold to reduce false positives.`;
+        }
+        else if (nearMissRate > 0.3 && avgNearMissDelta < 0.03) {
+            recommendation = 'loosen_threshold';
+            recommendedThreshold = threshold + avgNearMissDelta;
+            reasoning = `${(nearMissRate * 100).toFixed(1)}% of misses are very close to the threshold - consider loosening to capture more hits.`;
+        }
+        else {
+            recommendation = 'optimal';
+            reasoning = `Hit rate is ${(hitRate * 100).toFixed(1)}% with ${(uncertainHitRate * 100).toFixed(1)}% uncertain hits - threshold appears well-calibrated.`;
+        }
+        return {
+            category: categoryLabel,
+            sampleCount,
+            currentThreshold: threshold,
+            hitRate,
+            uncertainHitRate,
+            nearMissRate,
+            avgHitSimilarity,
+            avgMissSimilarity,
+            recommendation,
+            recommendedThreshold,
+            reasoning,
+        };
+    }
+    /**
+     * Returns threshold effectiveness results for every category seen in the
+     * rolling window, plus one aggregate result for all categories combined.
+     */
+    async thresholdEffectivenessAll(options) {
+        this.assertInitialized('thresholdEffectivenessAll');
+        let rawEntries;
+        try {
+            rawEntries = (await this.client.zrange(this.similarityWindowKey, '0', '-1'));
+        }
+        catch {
+            rawEntries = [];
+        }
+        // Collect unique categories
+        const categories = new Set();
+        for (const raw of rawEntries) {
+            try {
+                const entry = JSON.parse(raw);
+                if (entry.category)
+                    categories.add(entry.category);
+            }
+            catch { /* skip */ }
+        }
+        const results = await Promise.all([
+            this.thresholdEffectiveness({ minSamples: options?.minSamples }),
+            ...[...categories].filter(Boolean).map((cat) => this.thresholdEffectiveness({ category: cat, minSamples: options?.minSamples })),
+        ]);
+        return results;
+    }
+    // -- Internal helpers exposed to package adapters --
+    /** @internal Default similarity threshold. */
+    get _defaultThreshold() { return this.defaultThreshold; }
+    /**
+     * Execute a stable FT.SEARCH for use by adapters (e.g. LangGraph).
+     * SORTBY inserted_at ASC gives stable ordering across paginated calls.
+     * @internal
+     */
+    async _searchEntries(filterExpr, limit, offset) {
+        return this.client.call('FT.SEARCH', this.indexName, filterExpr, 'SORTBY', 'inserted_at', 'ASC', 'LIMIT', String(offset), String(limit), 'DIALECT', '2');
+    }
+    /**
+     * Embed text for use by adapters (e.g. LangGraph semantic search).
+     * @internal
+     */
+    async _embedText(text) {
+        return this.embed(text);
+    }
+    // -- Private helpers --
     async _doInitialize() {
         const gen = this._initGeneration;
         return this.traced('initialize', async () => {
-            const dim = await this.ensureIndexAndGetDimension();
-            // If flush() ran while we were initializing, don't overwrite its state.
+            const { dim, hasBinaryRefs } = await this.ensureIndexAndGetDimension();
             if (this._initGeneration !== gen)
                 return;
             this._dimension = dim;
+            this._hasBinaryRefs = hasBinaryRefs;
             this._initialized = true;
+            // Fire analytics init once (not on every flush+initialize cycle)
+            this.initAnalyticsSafe().catch(() => { });
         });
     }
+    async initAnalyticsSafe() {
+        if (this.analyticsInitiated)
+            return;
+        this.analyticsInitiated = true;
+        try {
+            const a = await (0, analytics_1.createAnalytics)(this.analyticsOpts);
+            if (this.shutdownCalled) {
+                await a.shutdown();
+                return;
+            }
+            this.analytics = a;
+            await a.init(this.client, this.name, {
+                defaultThreshold: this.defaultThreshold,
+                uncertaintyBand: this.uncertaintyBand,
+                defaultTtl: this.defaultTtl ?? null,
+                hasCostTable: !!this.costTable,
+                usesDefaultCostTable: this.usesDefaultCostTable,
+                embeddingCacheEnabled: this.embeddingCacheEnabled,
+                categoryThresholdCount: Object.keys(this.categoryThresholds).length,
+                dimension: this._dimension,
+            });
+            const intervalMs = this.analyticsOpts?.statsIntervalMs ?? 300_000;
+            if (!this.shutdownCalled && intervalMs > 0) {
+                this.statsTimer = setInterval(() => this.captureStatsSnapshot(), intervalMs);
+                this.statsTimer.unref();
+            }
+        }
+        catch {
+            // never throw from analytics
+        }
+    }
+    captureStatsSnapshot() {
+        this.stats()
+            .then((s) => {
+            this.analytics.capture('stats_snapshot', {
+                hits: s.hits,
+                misses: s.misses,
+                hit_rate: s.hitRate,
+                cost_saved_micros: s.costSavedMicros,
+            });
+        })
+            .catch(() => { });
+    }
     async ensureIndexAndGetDimension() {
         // Try reading an existing index
         try {
             const info = (await this.client.call('FT.INFO', this.indexName));
             const dim = this.parseDimensionFromInfo(info);
+            const hasBinaryRefs = this.parseHasBinaryRefsFromInfo(info);
             if (dim > 0)
-                return dim;
-            // Couldn't parse dimension from FT.INFO — fall back to probe
-            return (await this.embed('probe')).vector.length;
+                return { dim, hasBinaryRefs };
+            // Couldn't parse dimension from FT.INFO - fall back to probe
+            const probeDim = (await this.embed('probe')).vector.length;
+            return { dim: probeDim, hasBinaryRefs };
         }
         catch (err) {
             if (err instanceof errors_1.EmbeddingError)
@@ -300,18 +863,69 @@ class SemanticCache {
                 throw new errors_1.ValkeyCommandError('FT.INFO', err);
             }
         }
-        // Index doesn't exist — probe dimension and create it
+        // Index doesn't exist - probe dimension and create it
         const dim = (await this.embed('probe')).vector.length;
         try {
-            await this.client.call('FT.CREATE', this.indexName, 'ON', 'HASH', 'PREFIX', '1', this.entryPrefix, 'SCHEMA', 'prompt', 'TEXT', 'NOSTEM', 'response', 'TEXT', 'NOSTEM', 'model', 'TAG', 'category', 'TAG', 'inserted_at', 'NUMERIC', 'SORTABLE', 'embedding', 'VECTOR', 'HNSW', '6', 'TYPE', 'FLOAT32', 'DIM', String(dim), 'DISTANCE_METRIC', 'COSINE');
+            await this.client.call('FT.CREATE', this.indexName, 'ON', 'HASH', 'PREFIX', '1', this.entryPrefix, 'SCHEMA', 'prompt', 'TEXT', 'NOSTEM', 'response', 'TEXT', 'NOSTEM', 'model', 'TAG', 'category', 'TAG', 'binary_refs', 'TAG', 'inserted_at', 'NUMERIC', 'SORTABLE', 'temperature', 'NUMERIC', 'top_p', 'NUMERIC', 'seed', 'NUMERIC', 'embedding', 'VECTOR', 'HNSW', '6', 'TYPE', 'FLOAT32', 'DIM', String(dim), 'DISTANCE_METRIC', 'COSINE');
         }
         catch (err) {
             throw new errors_1.ValkeyCommandError('FT.CREATE', err);
         }
-        return dim;
+        return { dim, hasBinaryRefs: true };
+    }
+    /** Check if the index schema has a binary_refs field. */
+    parseHasBinaryRefsFromInfo(info) {
+        for (let i = 0; i < info.length - 1; i += 2) {
+            const key = String(info[i]);
+            if (key !== 'attributes' && key !== 'fields')
+                continue;
+            const attributes = info[i + 1];
+            if (!Array.isArray(attributes))
+                continue;
+            for (const attr of attributes) {
+                if (!Array.isArray(attr))
+                    continue;
+                for (let j = 0; j < attr.length - 1; j++) {
+                    if (String(attr[j]) === 'identifier' && String(attr[j + 1]) === 'binary_refs') {
+                        return true;
+                    }
+                }
+            }
+        }
+        return false;
     }
-    /** Wraps embedFn with error handling and duration tracking. */
+    /** Resolve a prompt (string or ContentBlock[]) into text + binary refs. */
+    resolvePrompt(prompt) {
+        if (typeof prompt === 'string') {
+            return { text: prompt, binaryRefs: [] };
+        }
+        const text = (0, utils_1.extractText)(prompt);
+        const binaryRefs = (0, utils_1.extractBinaryRefs)(prompt);
+        return { text, binaryRefs };
+    }
+    /** Wraps embedFn with error handling, duration tracking, and optional embedding cache. */
     async embed(text) {
+        // Check embedding cache
+        if (this.embeddingCacheEnabled && text) {
+            const hash = (0, node_crypto_2.createHash)('sha256').update(text).digest('hex');
+            const embedKey = `${this.embedKeyPrefix}${hash}`;
+            try {
+                const cached = await this.client.getBuffer(embedKey);
+                if (cached) {
+                    this.telemetry.metrics.embeddingCacheTotal
+                        .labels({ cache_name: this.name, result: 'hit' }).inc();
+                    // Decode Float32 buffer
+                    const vector = [];
+                    for (let i = 0; i < cached.length; i += 4) {
+                        vector.push(cached.readFloatLE(i));
+                    }
+                    return { vector, durationSec: 0 };
+                }
+            }
+            catch { /* ignore cache read errors */ }
+            this.telemetry.metrics.embeddingCacheTotal
+                .labels({ cache_name: this.name, result: 'miss' }).inc();
+        }
         const start = performance.now();
         let vector;
         try {
@@ -324,12 +938,22 @@ class SemanticCache {
         this.telemetry.metrics.embeddingDuration
             .labels({ cache_name: this.name })
             .observe(durationSec);
+        // Store in embedding cache
+        if (this.embeddingCacheEnabled && text) {
+            const hash = (0, node_crypto_2.createHash)('sha256').update(text).digest('hex');
+            const embedKey = `${this.embedKeyPrefix}${hash}`;
+            try {
+                const buf = (0, utils_1.encodeFloat32)(vector);
+                await this.client.set(embedKey, buf, 'EX', this.embeddingCacheTtl);
+            }
+            catch { /* ignore cache write errors */ }
+        }
         return { vector, durationSec };
     }
     /**
      * Wraps a method body in an OTel span with automatic status, end, and
      * operation duration metric. The span is passed to fn so callers can
-     * set attributes — but callers must NOT call span.end() or span.setStatus(),
+     * set attributes - but callers must NOT call span.end() or span.setStatus(),
      * as traced() handles both.
      */
     async traced(operation, fn) {
@@ -359,6 +983,24 @@ class SemanticCache {
         pipeline.hincrby(this.statsKey, field, 1);
         await pipeline.exec();
     }
+    /** Append to the rolling similarity window sorted set and trim to 10,000 entries or 7 days. */
+    async recordSimilarityWindow(score, result, category) {
+        const now = Date.now();
+        // Include a unique nonce so identical (score, result, category) tuples are
+        // each recorded as distinct ZADD members instead of overwriting each other.
+        const member = JSON.stringify({ score, result, category, _n: Math.random() });
+        const sevenDaysAgo = now - 7 * 24 * 60 * 60 * 1000;
+        try {
+            const pipeline = this.client.pipeline();
+            pipeline.zadd(this.similarityWindowKey, now, member);
+            // Trim by time: remove entries older than 7 days
+            pipeline.zremrangebyscore(this.similarityWindowKey, '-inf', sevenDaysAgo);
+            // Trim by count: keep at most 10,000 most recent
+            pipeline.zremrangebyrank(this.similarityWindowKey, 0, -10001);
+            await pipeline.exec();
+        }
+        catch { /* best effort - never fail on window writes */ }
+    }
     assertInitialized(method) {
         if (!this._initialized) {
             throw new errors_1.SemanticCacheUsageError(`SemanticCache.initialize() must be called before ${method}().`);