npm - @betterdb/semantic-cache - Versions diffs - 0.1.0 → 0.2.0 - Mend

@betterdb/semantic-cache 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/README.md +211 -128
package/dist/SemanticCache.d.ts +85 -5
package/dist/SemanticCache.js +689 -47
package/dist/adapters/ai.js +6 -1
package/dist/adapters/anthropic.d.ts +32 -0
package/dist/adapters/anthropic.js +94 -0
package/dist/adapters/langchain.js +6 -1
package/dist/adapters/langgraph.d.ts +104 -0
package/dist/adapters/langgraph.js +271 -0
package/dist/adapters/llamaindex.d.ts +32 -0
package/dist/adapters/llamaindex.js +76 -0
package/dist/adapters/openai-responses.d.ts +31 -0
package/dist/adapters/openai-responses.js +112 -0
package/dist/adapters/openai.d.ts +42 -0
package/dist/adapters/openai.js +97 -0
package/dist/analytics.d.ts +24 -0
package/dist/analytics.js +116 -0
package/dist/cluster.d.ts +10 -0
package/dist/cluster.js +43 -0
package/dist/defaultCostTable.d.ts +11 -0
package/dist/defaultCostTable.js +1976 -0
package/dist/embed/bedrock.d.ts +32 -0
package/dist/embed/bedrock.js +109 -0
package/dist/embed/cohere.d.ts +34 -0
package/dist/embed/cohere.js +37 -0
package/dist/embed/ollama.d.ts +30 -0
package/dist/embed/ollama.js +24 -0
package/dist/embed/openai.d.ts +31 -0
package/dist/embed/openai.js +66 -0
package/dist/embed/voyage.d.ts +31 -0
package/dist/embed/voyage.js +32 -0
package/dist/index.d.ts +6 -1
package/dist/index.js +11 -1
package/dist/normalizer.d.ts +68 -0
package/dist/normalizer.js +102 -0
package/dist/telemetry.d.ts +3 -0
package/dist/telemetry.js +18 -0
package/dist/types.d.ts +107 -7
package/dist/utils.d.ts +58 -0
package/dist/utils.js +30 -0
package/package.json +81 -6

package/dist/telemetry.js CHANGED Viewed

@@ -42,6 +42,21 @@ function createTelemetry(opts) {
         labelNames: ['cache_name'],
         buckets: operationBuckets,
     });
+    const costSavedTotal = getOrCreateCounter(registry, {
+        name: `${opts.prefix}_cost_saved_total`,
+        help: 'Estimated cost saved in dollars from semantic cache hits',
+        labelNames: ['cache_name', 'category'],
+    });
+    const embeddingCacheTotal = getOrCreateCounter(registry, {
+        name: `${opts.prefix}_embedding_cache_total`,
+        help: 'Total embedding cache lookups (hit or miss)',
+        labelNames: ['cache_name', 'result'],
+    });
+    const staleModelEvictions = getOrCreateCounter(registry, {
+        name: `${opts.prefix}_stale_model_evictions_total`,
+        help: 'Entries evicted due to staleAfterModelChange detection',
+        labelNames: ['cache_name'],
+    });
     return {
         tracer,
         metrics: {
@@ -49,6 +64,9 @@ function createTelemetry(opts) {
             similarityScore,
             operationDuration,
             embeddingDuration,
+            costSavedTotal,
+            embeddingCacheTotal,
+            staleModelEvictions,
         },
     };
 }

package/dist/types.d.ts CHANGED Viewed

@@ -2,6 +2,10 @@ import type Valkey from 'iovalkey';
 import type { Registry } from 'prom-client';
 export type { Valkey };
 export type EmbedFn = (text: string) => Promise<number[]>;
+export interface ModelCost {
+    inputPer1k: number;
+    outputPer1k: number;
+}
 export interface SemanticCacheOptions {
     /** Index name prefix used for Valkey keys. Default: 'betterdb_scache'. */
     name?: string;
@@ -9,6 +13,16 @@ export interface SemanticCacheOptions {
     client: Valkey;
     /** Async function that returns a float embedding vector for a text string. Required. */
     embedFn: EmbedFn;
+    /**
+     * Model pricing for cost savings tracking. Optional.
+     * Keys are model names (e.g. 'gpt-4o'), values are per-1k-token costs.
+     */
+    costTable?: Record<string, ModelCost>;
+    /**
+     * Use bundled default cost table from LiteLLM. User costTable entries override defaults.
+     * Default: true.
+     */
+    useDefaultCostTable?: boolean;
     /**
      * Default similarity threshold as cosine DISTANCE (0–2 scale, lower = more similar).
      * A lookup is a hit when score <= threshold. Default: 0.1.
@@ -39,6 +53,22 @@ export interface SemanticCacheOptions {
      * Default: 0.05. Set to 0 to disable uncertainty flagging (all hits are 'high').
      */
     uncertaintyBand?: number;
+    /**
+     * Pluggable binary content normalizer for stable hashing of images, audio, and documents.
+     * Default: passthrough (uses the ref string as-is).
+     * Pass this to adapter prepareSemanticParams() calls to share the same normalization strategy.
+     */
+    normalizer?: import('./normalizer').BinaryNormalizer;
+    /**
+     * Embedding cache configuration. When enabled, computed embeddings are stored in Valkey
+     * so that repeated check() calls on the same text skip the embedFn call.
+     */
+    embeddingCache?: {
+        /** Enable embedding caching. Default: true. */
+        enabled?: boolean;
+        /** TTL for cached embeddings in seconds. Default: 86400 (24 hours). */
+        ttl?: number;
+    };
     telemetry?: {
         /** OTel tracer name. Default: '@betterdb/semantic-cache'. */
         tracerName?: string;
@@ -52,11 +82,36 @@ export interface SemanticCacheOptions {
          */
         registry?: Registry;
     };
+    analytics?: {
+        /** PostHog API key. Overrides the build-time baked key if set. */
+        apiKey?: string;
+        /** PostHog host. Overrides the build-time baked host if set. */
+        host?: string;
+        /** Disable analytics. Also controlled by BETTERDB_TELEMETRY env var. */
+        disabled?: boolean;
+        /** Interval in ms for periodic stats snapshots. Default: 300_000 (5 min). 0 to disable. */
+        statsIntervalMs?: number;
+    };
+}
+export interface RerankOptions {
+    /**
+     * Number of top-k candidates to retrieve before reranking.
+     * A higher k gives the rerankFn more candidates to choose from.
+     */
+    k: number;
+    /**
+     * Function that receives the query text and ranked candidates, and returns
+     * the index of the best candidate. Return -1 to reject all candidates (miss).
+     */
+    rerankFn: (query: string, candidates: Array<{
+        response: string;
+        similarity: number;
+    }>) => Promise<number>;
 }
 export interface CacheCheckOptions {
-    /** Per-request threshold override (cosine distance 0–2). Highest priority. */
+    /** Per-request threshold override (cosine distance 0-2). Highest priority. */
     threshold?: number;
-    /** Category tag — used for per-category threshold lookup and metric labels. */
+    /** Category tag - used for per-category threshold lookup and metric labels. */
     category?: string;
     /**
      * Additional FT.SEARCH pre-filter expression.
@@ -64,16 +119,33 @@ export interface CacheCheckOptions {
      * Applied as: "({filter})=>[KNN {k} @embedding $vec AS __score]"
      *
      * **Security note:** this string is interpolated directly into the FT.SEARCH
-     * query. Only pass trusted, programmatically-constructed expressions — never
+     * query. Only pass trusted, programmatically-constructed expressions - never
      * unsanitised user input.
      */
     filter?: string;
     /**
      * Number of nearest neighbours to fetch via KNN. Default: 1.
-     * Currently only the closest result is evaluated for hit/miss.
-     * Values > 1 are reserved for future multi-candidate support.
+     * Ignored when rerank is set (rerank.k takes precedence).
      */
     k?: number;
+    /**
+     * When true, a cache hit whose stored model differs from currentModel is
+     * treated as a miss and the stale entry is deleted. Useful for automatically
+     * evicting cache entries when you upgrade the model you use for a given prompt.
+     * Requires currentModel to be set.
+     * Default: false.
+     */
+    staleAfterModelChange?: boolean;
+    /** The model name to compare against stored entries when staleAfterModelChange is true. */
+    currentModel?: string;
+    /**
+     * Optional rerank hook. When set, FT.SEARCH retrieves rerank.k candidates
+     * and passes them to rerank.rerankFn. The function returns the index of the
+     * best candidate, or -1 to treat all as a miss.
+     * The threshold is NOT applied to the reranked pick unless you filter candidates
+     * in rerankFn yourself.
+     */
+    rerank?: RerankOptions;
 }
 export interface CacheStoreOptions {
     /** Per-entry TTL in seconds. Overrides SemanticCacheOptions.defaultTtl. */
@@ -84,16 +156,33 @@ export interface CacheStoreOptions {
     model?: string;
     /**
      * Arbitrary metadata stored as JSON alongside the entry.
-     * Stored for external consumption (e.g. BetterDB Monitor) — not returned by check().
+     * Stored for external consumption (e.g. BetterDB Monitor) - not returned by check().
      */
     metadata?: Record<string, string | number>;
+    /**
+     * Number of input tokens used to generate the cached response.
+     * When provided along with outputTokens and model, the cost is computed and stored.
+     * On future cache hits, the stored cost is reported as costSaved in CacheCheckResult.
+     */
+    inputTokens?: number;
+    /**
+     * Number of output tokens in the cached response.
+     * See inputTokens for full description.
+     */
+    outputTokens?: number;
+    /** LLM sampling temperature stored as a NUMERIC field for opt-in filtering. */
+    temperature?: number;
+    /** Top-p nucleus sampling parameter stored as a NUMERIC field for opt-in filtering. */
+    topP?: number;
+    /** Random seed stored as a NUMERIC field for opt-in filtering. */
+    seed?: number;
 }
 export type CacheConfidence = 'high' | 'uncertain' | 'miss';
 export interface CacheCheckResult {
     hit: boolean;
     response?: string;
     /**
-     * Cosine distance score (0–2). Present when a nearest neighbour was found,
+     * Cosine distance score (0-2). Present when a nearest neighbour was found,
      * regardless of whether it was a hit or miss.
      */
     similarity?: number;
@@ -118,6 +207,15 @@ export interface CacheCheckResult {
         similarity: number;
         deltaToThreshold: number;
     };
+    /**
+     * Estimated cost saved (in dollars) by returning this cached result instead of calling the LLM.
+     * Present on hit when the original store() call included inputTokens/outputTokens and model.
+     */
+    costSaved?: number;
+    /**
+     * Structured response content blocks. Present on hit when the entry was stored via storeMultipart().
+     */
+    contentBlocks?: import('./utils').ContentBlock[];
 }
 export interface InvalidateResult {
     /** Number of entries deleted in this call. */
@@ -133,6 +231,8 @@ export interface CacheStats {
     misses: number;
     total: number;
     hitRate: number;
+    /** Accumulated cost saved in microdollars (divide by 1_000_000 for dollars). */
+    costSavedMicros: number;
 }
 export interface IndexInfo {
     name: string;

package/dist/utils.d.ts CHANGED Viewed

@@ -1,5 +1,63 @@
 /** SHA-256 hex digest of a string. */
 export declare function sha256(text: string): string;
+/** Escape a string for safe use as a Valkey Search TAG filter value.
+ * Spaces are included because Valkey Search treats unescaped spaces as term
+ * separators (OR semantics), which would broaden the filter unintentionally.
+ */
+export declare function escapeTag(value: string): string;
+export type ContentBlock = TextBlock | BinaryBlock | ToolCallBlock | ToolResultBlock | ReasoningBlock;
+export interface TextBlock {
+    type: 'text';
+    text: string;
+    hints?: BlockHints;
+}
+export interface BinaryBlock {
+    type: 'binary';
+    kind: 'image' | 'audio' | 'document';
+    mediaType: string;
+    ref: string;
+    detail?: 'auto' | 'low' | 'high' | 'original';
+    filename?: string;
+    hints?: BlockHints;
+}
+export interface ToolCallBlock {
+    type: 'tool_call';
+    id: string;
+    name: string;
+    args: unknown;
+    hints?: BlockHints;
+}
+export interface ToolResultBlock {
+    type: 'tool_result';
+    toolCallId: string;
+    content: Array<TextBlock | BinaryBlock>;
+    isError?: boolean;
+    hints?: BlockHints;
+}
+export interface ReasoningBlock {
+    type: 'reasoning';
+    text: string;
+    opaqueSignature?: string;
+    redacted?: boolean;
+    hints?: BlockHints;
+}
+export interface BlockHints {
+    anthropicCacheControl?: {
+        type: 'ephemeral';
+        ttl?: '5m' | '1h';
+    };
+    [k: string]: unknown;
+}
+/**
+ * Extract all text from a ContentBlock array, joining TextBlock.text values with a space.
+ * Used to derive the embedding text from a multi-modal prompt.
+ */
+export declare function extractText(blocks: ContentBlock[]): string;
+/**
+ * Extract all binary refs from a ContentBlock array, sorted for stability.
+ * Used for the binary_refs TAG field on cache entries.
+ */
+export declare function extractBinaryRefs(blocks: ContentBlock[]): string[];
 /**
  * Encode number[] as a little-endian Float32 Buffer.
  * Used to store embeddings as binary HSET field values.

package/dist/utils.js CHANGED Viewed

@@ -1,6 +1,9 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.sha256 = sha256;
+exports.escapeTag = escapeTag;
+exports.extractText = extractText;
+exports.extractBinaryRefs = extractBinaryRefs;
 exports.encodeFloat32 = encodeFloat32;
 exports.parseFtSearchResponse = parseFtSearchResponse;
 const node_crypto_1 = require("node:crypto");
@@ -8,6 +11,33 @@ const node_crypto_1 = require("node:crypto");
 function sha256(text) {
     return (0, node_crypto_1.createHash)('sha256').update(text).digest('hex');
 }
+/** Escape a string for safe use as a Valkey Search TAG filter value.
+ * Spaces are included because Valkey Search treats unescaped spaces as term
+ * separators (OR semantics), which would broaden the filter unintentionally.
+ */
+function escapeTag(value) {
+    return value.replace(/[,.<>{}[\]"':;!@#$%^&*()\-+=~|/\\ ]/g, '\\$&');
+}
+/**
+ * Extract all text from a ContentBlock array, joining TextBlock.text values with a space.
+ * Used to derive the embedding text from a multi-modal prompt.
+ */
+function extractText(blocks) {
+    return blocks
+        .filter((b) => b.type === 'text')
+        .map((b) => b.text)
+        .join(' ');
+}
+/**
+ * Extract all binary refs from a ContentBlock array, sorted for stability.
+ * Used for the binary_refs TAG field on cache entries.
+ */
+function extractBinaryRefs(blocks) {
+    return blocks
+        .filter((b) => b.type === 'binary')
+        .map((b) => b.ref)
+        .sort();
+}
 /**
  * Encode number[] as a little-endian Float32 Buffer.
  * Used to store embeddings as binary HSET field values.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@betterdb/semantic-cache",
-  "version": "0.1.0",
+  "version": "0.2.0",
   "description": "Valkey-native semantic cache for LLM applications with built-in OpenTelemetry and Prometheus instrumentation",
   "keywords": [
     "valkey",
@@ -8,7 +8,11 @@
     "semantic-cache",
     "llm",
     "opentelemetry",
-    "prometheus"
+    "prometheus",
+    "langchain",
+    "langgraph",
+    "openai",
+    "anthropic"
   ],
   "license": "MIT",
   "repository": {
@@ -33,6 +37,56 @@
       "import": "./dist/adapters/ai.js",
       "require": "./dist/adapters/ai.js",
       "types": "./dist/adapters/ai.d.ts"
+    },
+    "./langgraph": {
+      "import": "./dist/adapters/langgraph.js",
+      "require": "./dist/adapters/langgraph.js",
+      "types": "./dist/adapters/langgraph.d.ts"
+    },
+    "./openai": {
+      "import": "./dist/adapters/openai.js",
+      "require": "./dist/adapters/openai.js",
+      "types": "./dist/adapters/openai.d.ts"
+    },
+    "./anthropic": {
+      "import": "./dist/adapters/anthropic.js",
+      "require": "./dist/adapters/anthropic.js",
+      "types": "./dist/adapters/anthropic.d.ts"
+    },
+    "./llamaindex": {
+      "import": "./dist/adapters/llamaindex.js",
+      "require": "./dist/adapters/llamaindex.js",
+      "types": "./dist/adapters/llamaindex.d.ts"
+    },
+    "./openai-responses": {
+      "import": "./dist/adapters/openai-responses.js",
+      "require": "./dist/adapters/openai-responses.js",
+      "types": "./dist/adapters/openai-responses.d.ts"
+    },
+    "./embed/openai": {
+      "import": "./dist/embed/openai.js",
+      "require": "./dist/embed/openai.js",
+      "types": "./dist/embed/openai.d.ts"
+    },
+    "./embed/bedrock": {
+      "import": "./dist/embed/bedrock.js",
+      "require": "./dist/embed/bedrock.js",
+      "types": "./dist/embed/bedrock.d.ts"
+    },
+    "./embed/voyage": {
+      "import": "./dist/embed/voyage.js",
+      "require": "./dist/embed/voyage.js",
+      "types": "./dist/embed/voyage.d.ts"
+    },
+    "./embed/cohere": {
+      "import": "./dist/embed/cohere.js",
+      "require": "./dist/embed/cohere.js",
+      "types": "./dist/embed/cohere.d.ts"
+    },
+    "./embed/ollama": {
+      "import": "./dist/embed/ollama.js",
+      "require": "./dist/embed/ollama.js",
+      "types": "./dist/embed/ollama.d.ts"
     }
   },
   "files": [
@@ -40,11 +94,12 @@
     "README.md"
   ],
   "scripts": {
-    "build": "tsc",
+    "build": "tsc && node scripts/inject-telemetry-defaults.mjs",
     "typecheck": "tsc --noEmit",
     "test": "vitest run",
     "test:watch": "vitest",
-    "clean": "rm -rf dist"
+    "clean": "rm -rf dist",
+    "update:pricing": "node scripts/update-model-prices.mjs"
   },
   "dependencies": {
     "@opentelemetry/api": "^1.9.0",
@@ -54,16 +109,36 @@
     "node": ">=20.0.0"
   },
   "peerDependencies": {
-    "iovalkey": ">=0.3.0",
+    "posthog-node": ">=4.0.0",
+    "@anthropic-ai/sdk": ">=0.90.0",
     "@langchain/core": ">=0.3.0",
-    "ai": ">=4.0.0"
+    "@langchain/langgraph-checkpoint": ">=0.1.0",
+    "@llamaindex/core": ">=0.6.0",
+    "ai": ">=4.0.0",
+    "iovalkey": ">=0.3.0",
+    "openai": ">=6.0.0"
   },
   "peerDependenciesMeta": {
+    "posthog-node": {
+      "optional": true
+    },
+    "@anthropic-ai/sdk": {
+      "optional": true
+    },
     "@langchain/core": {
       "optional": true
     },
+    "@langchain/langgraph-checkpoint": {
+      "optional": true
+    },
+    "@llamaindex/core": {
+      "optional": true
+    },
     "ai": {
       "optional": true
+    },
+    "openai": {
+      "optional": true
     }
   }
 }