@betterdb/semantic-cache 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +211 -128
  2. package/dist/SemanticCache.d.ts +85 -5
  3. package/dist/SemanticCache.js +689 -47
  4. package/dist/adapters/ai.js +6 -1
  5. package/dist/adapters/anthropic.d.ts +32 -0
  6. package/dist/adapters/anthropic.js +94 -0
  7. package/dist/adapters/langchain.js +6 -1
  8. package/dist/adapters/langgraph.d.ts +104 -0
  9. package/dist/adapters/langgraph.js +271 -0
  10. package/dist/adapters/llamaindex.d.ts +32 -0
  11. package/dist/adapters/llamaindex.js +76 -0
  12. package/dist/adapters/openai-responses.d.ts +31 -0
  13. package/dist/adapters/openai-responses.js +112 -0
  14. package/dist/adapters/openai.d.ts +42 -0
  15. package/dist/adapters/openai.js +97 -0
  16. package/dist/analytics.d.ts +24 -0
  17. package/dist/analytics.js +116 -0
  18. package/dist/cluster.d.ts +10 -0
  19. package/dist/cluster.js +43 -0
  20. package/dist/defaultCostTable.d.ts +11 -0
  21. package/dist/defaultCostTable.js +1976 -0
  22. package/dist/embed/bedrock.d.ts +32 -0
  23. package/dist/embed/bedrock.js +109 -0
  24. package/dist/embed/cohere.d.ts +34 -0
  25. package/dist/embed/cohere.js +37 -0
  26. package/dist/embed/ollama.d.ts +30 -0
  27. package/dist/embed/ollama.js +24 -0
  28. package/dist/embed/openai.d.ts +31 -0
  29. package/dist/embed/openai.js +66 -0
  30. package/dist/embed/voyage.d.ts +31 -0
  31. package/dist/embed/voyage.js +32 -0
  32. package/dist/index.d.ts +6 -1
  33. package/dist/index.js +11 -1
  34. package/dist/normalizer.d.ts +68 -0
  35. package/dist/normalizer.js +102 -0
  36. package/dist/telemetry.d.ts +3 -0
  37. package/dist/telemetry.js +18 -0
  38. package/dist/types.d.ts +107 -7
  39. package/dist/utils.d.ts +58 -0
  40. package/dist/utils.js +30 -0
  41. package/package.json +81 -6
package/dist/telemetry.js CHANGED
@@ -42,6 +42,21 @@ function createTelemetry(opts) {
42
42
  labelNames: ['cache_name'],
43
43
  buckets: operationBuckets,
44
44
  });
45
+ const costSavedTotal = getOrCreateCounter(registry, {
46
+ name: `${opts.prefix}_cost_saved_total`,
47
+ help: 'Estimated cost saved in dollars from semantic cache hits',
48
+ labelNames: ['cache_name', 'category'],
49
+ });
50
+ const embeddingCacheTotal = getOrCreateCounter(registry, {
51
+ name: `${opts.prefix}_embedding_cache_total`,
52
+ help: 'Total embedding cache lookups (hit or miss)',
53
+ labelNames: ['cache_name', 'result'],
54
+ });
55
+ const staleModelEvictions = getOrCreateCounter(registry, {
56
+ name: `${opts.prefix}_stale_model_evictions_total`,
57
+ help: 'Entries evicted due to staleAfterModelChange detection',
58
+ labelNames: ['cache_name'],
59
+ });
45
60
  return {
46
61
  tracer,
47
62
  metrics: {
@@ -49,6 +64,9 @@ function createTelemetry(opts) {
49
64
  similarityScore,
50
65
  operationDuration,
51
66
  embeddingDuration,
67
+ costSavedTotal,
68
+ embeddingCacheTotal,
69
+ staleModelEvictions,
52
70
  },
53
71
  };
54
72
  }
package/dist/types.d.ts CHANGED
@@ -2,6 +2,10 @@ import type Valkey from 'iovalkey';
2
2
  import type { Registry } from 'prom-client';
3
3
  export type { Valkey };
4
4
  export type EmbedFn = (text: string) => Promise<number[]>;
5
+ export interface ModelCost {
6
+ inputPer1k: number;
7
+ outputPer1k: number;
8
+ }
5
9
  export interface SemanticCacheOptions {
6
10
  /** Index name prefix used for Valkey keys. Default: 'betterdb_scache'. */
7
11
  name?: string;
@@ -9,6 +13,16 @@ export interface SemanticCacheOptions {
9
13
  client: Valkey;
10
14
  /** Async function that returns a float embedding vector for a text string. Required. */
11
15
  embedFn: EmbedFn;
16
+ /**
17
+ * Model pricing for cost savings tracking. Optional.
18
+ * Keys are model names (e.g. 'gpt-4o'), values are per-1k-token costs.
19
+ */
20
+ costTable?: Record<string, ModelCost>;
21
+ /**
22
+ * Use bundled default cost table from LiteLLM. User costTable entries override defaults.
23
+ * Default: true.
24
+ */
25
+ useDefaultCostTable?: boolean;
12
26
  /**
13
27
  * Default similarity threshold as cosine DISTANCE (0–2 scale, lower = more similar).
14
28
  * A lookup is a hit when score <= threshold. Default: 0.1.
@@ -39,6 +53,22 @@ export interface SemanticCacheOptions {
39
53
  * Default: 0.05. Set to 0 to disable uncertainty flagging (all hits are 'high').
40
54
  */
41
55
  uncertaintyBand?: number;
56
+ /**
57
+ * Pluggable binary content normalizer for stable hashing of images, audio, and documents.
58
+ * Default: passthrough (uses the ref string as-is).
59
+ * Pass this to adapter prepareSemanticParams() calls to share the same normalization strategy.
60
+ */
61
+ normalizer?: import('./normalizer').BinaryNormalizer;
62
+ /**
63
+ * Embedding cache configuration. When enabled, computed embeddings are stored in Valkey
64
+ * so that repeated check() calls on the same text skip the embedFn call.
65
+ */
66
+ embeddingCache?: {
67
+ /** Enable embedding caching. Default: true. */
68
+ enabled?: boolean;
69
+ /** TTL for cached embeddings in seconds. Default: 86400 (24 hours). */
70
+ ttl?: number;
71
+ };
42
72
  telemetry?: {
43
73
  /** OTel tracer name. Default: '@betterdb/semantic-cache'. */
44
74
  tracerName?: string;
@@ -52,11 +82,36 @@ export interface SemanticCacheOptions {
52
82
  */
53
83
  registry?: Registry;
54
84
  };
85
+ analytics?: {
86
+ /** PostHog API key. Overrides the build-time baked key if set. */
87
+ apiKey?: string;
88
+ /** PostHog host. Overrides the build-time baked host if set. */
89
+ host?: string;
90
+ /** Disable analytics. Also controlled by BETTERDB_TELEMETRY env var. */
91
+ disabled?: boolean;
92
+ /** Interval in ms for periodic stats snapshots. Default: 300_000 (5 min). 0 to disable. */
93
+ statsIntervalMs?: number;
94
+ };
95
+ }
96
+ export interface RerankOptions {
97
+ /**
98
+ * Number of top-k candidates to retrieve before reranking.
99
+ * A higher k gives the rerankFn more candidates to choose from.
100
+ */
101
+ k: number;
102
+ /**
103
+ * Function that receives the query text and ranked candidates, and returns
104
+ * the index of the best candidate. Return -1 to reject all candidates (miss).
105
+ */
106
+ rerankFn: (query: string, candidates: Array<{
107
+ response: string;
108
+ similarity: number;
109
+ }>) => Promise<number>;
55
110
  }
56
111
  export interface CacheCheckOptions {
57
- /** Per-request threshold override (cosine distance 02). Highest priority. */
112
+ /** Per-request threshold override (cosine distance 0-2). Highest priority. */
58
113
  threshold?: number;
59
- /** Category tag used for per-category threshold lookup and metric labels. */
114
+ /** Category tag - used for per-category threshold lookup and metric labels. */
60
115
  category?: string;
61
116
  /**
62
117
  * Additional FT.SEARCH pre-filter expression.
@@ -64,16 +119,33 @@ export interface CacheCheckOptions {
64
119
  * Applied as: "({filter})=>[KNN {k} @embedding $vec AS __score]"
65
120
  *
66
121
  * **Security note:** this string is interpolated directly into the FT.SEARCH
67
- * query. Only pass trusted, programmatically-constructed expressions never
122
+ * query. Only pass trusted, programmatically-constructed expressions - never
68
123
  * unsanitised user input.
69
124
  */
70
125
  filter?: string;
71
126
  /**
72
127
  * Number of nearest neighbours to fetch via KNN. Default: 1.
73
- * Currently only the closest result is evaluated for hit/miss.
74
- * Values > 1 are reserved for future multi-candidate support.
128
+ * Ignored when rerank is set (rerank.k takes precedence).
75
129
  */
76
130
  k?: number;
131
+ /**
132
+ * When true, a cache hit whose stored model differs from currentModel is
133
+ * treated as a miss and the stale entry is deleted. Useful for automatically
134
+ * evicting cache entries when you upgrade the model you use for a given prompt.
135
+ * Requires currentModel to be set.
136
+ * Default: false.
137
+ */
138
+ staleAfterModelChange?: boolean;
139
+ /** The model name to compare against stored entries when staleAfterModelChange is true. */
140
+ currentModel?: string;
141
+ /**
142
+ * Optional rerank hook. When set, FT.SEARCH retrieves rerank.k candidates
143
+ * and passes them to rerank.rerankFn. The function returns the index of the
144
+ * best candidate, or -1 to treat all as a miss.
145
+ * The threshold is NOT applied to the reranked pick unless you filter candidates
146
+ * in rerankFn yourself.
147
+ */
148
+ rerank?: RerankOptions;
77
149
  }
78
150
  export interface CacheStoreOptions {
79
151
  /** Per-entry TTL in seconds. Overrides SemanticCacheOptions.defaultTtl. */
@@ -84,16 +156,33 @@ export interface CacheStoreOptions {
84
156
  model?: string;
85
157
  /**
86
158
  * Arbitrary metadata stored as JSON alongside the entry.
87
- * Stored for external consumption (e.g. BetterDB Monitor) not returned by check().
159
+ * Stored for external consumption (e.g. BetterDB Monitor) - not returned by check().
88
160
  */
89
161
  metadata?: Record<string, string | number>;
162
+ /**
163
+ * Number of input tokens used to generate the cached response.
164
+ * When provided along with outputTokens and model, the cost is computed and stored.
165
+ * On future cache hits, the stored cost is reported as costSaved in CacheCheckResult.
166
+ */
167
+ inputTokens?: number;
168
+ /**
169
+ * Number of output tokens in the cached response.
170
+ * See inputTokens for full description.
171
+ */
172
+ outputTokens?: number;
173
+ /** LLM sampling temperature stored as a NUMERIC field for opt-in filtering. */
174
+ temperature?: number;
175
+ /** Top-p nucleus sampling parameter stored as a NUMERIC field for opt-in filtering. */
176
+ topP?: number;
177
+ /** Random seed stored as a NUMERIC field for opt-in filtering. */
178
+ seed?: number;
90
179
  }
91
180
  export type CacheConfidence = 'high' | 'uncertain' | 'miss';
92
181
  export interface CacheCheckResult {
93
182
  hit: boolean;
94
183
  response?: string;
95
184
  /**
96
- * Cosine distance score (02). Present when a nearest neighbour was found,
185
+ * Cosine distance score (0-2). Present when a nearest neighbour was found,
97
186
  * regardless of whether it was a hit or miss.
98
187
  */
99
188
  similarity?: number;
@@ -118,6 +207,15 @@ export interface CacheCheckResult {
118
207
  similarity: number;
119
208
  deltaToThreshold: number;
120
209
  };
210
+ /**
211
+ * Estimated cost saved (in dollars) by returning this cached result instead of calling the LLM.
212
+ * Present on hit when the original store() call included inputTokens/outputTokens and model.
213
+ */
214
+ costSaved?: number;
215
+ /**
216
+ * Structured response content blocks. Present on hit when the entry was stored via storeMultipart().
217
+ */
218
+ contentBlocks?: import('./utils').ContentBlock[];
121
219
  }
122
220
  export interface InvalidateResult {
123
221
  /** Number of entries deleted in this call. */
@@ -133,6 +231,8 @@ export interface CacheStats {
133
231
  misses: number;
134
232
  total: number;
135
233
  hitRate: number;
234
+ /** Accumulated cost saved in microdollars (divide by 1_000_000 for dollars). */
235
+ costSavedMicros: number;
136
236
  }
137
237
  export interface IndexInfo {
138
238
  name: string;
package/dist/utils.d.ts CHANGED
@@ -1,5 +1,63 @@
1
1
  /** SHA-256 hex digest of a string. */
2
2
  export declare function sha256(text: string): string;
3
+ /** Escape a string for safe use as a Valkey Search TAG filter value.
4
+ * Spaces are included because Valkey Search treats unescaped spaces as term
5
+ * separators (OR semantics), which would broaden the filter unintentionally.
6
+ */
7
+ export declare function escapeTag(value: string): string;
8
+ export type ContentBlock = TextBlock | BinaryBlock | ToolCallBlock | ToolResultBlock | ReasoningBlock;
9
+ export interface TextBlock {
10
+ type: 'text';
11
+ text: string;
12
+ hints?: BlockHints;
13
+ }
14
+ export interface BinaryBlock {
15
+ type: 'binary';
16
+ kind: 'image' | 'audio' | 'document';
17
+ mediaType: string;
18
+ ref: string;
19
+ detail?: 'auto' | 'low' | 'high' | 'original';
20
+ filename?: string;
21
+ hints?: BlockHints;
22
+ }
23
+ export interface ToolCallBlock {
24
+ type: 'tool_call';
25
+ id: string;
26
+ name: string;
27
+ args: unknown;
28
+ hints?: BlockHints;
29
+ }
30
+ export interface ToolResultBlock {
31
+ type: 'tool_result';
32
+ toolCallId: string;
33
+ content: Array<TextBlock | BinaryBlock>;
34
+ isError?: boolean;
35
+ hints?: BlockHints;
36
+ }
37
+ export interface ReasoningBlock {
38
+ type: 'reasoning';
39
+ text: string;
40
+ opaqueSignature?: string;
41
+ redacted?: boolean;
42
+ hints?: BlockHints;
43
+ }
44
+ export interface BlockHints {
45
+ anthropicCacheControl?: {
46
+ type: 'ephemeral';
47
+ ttl?: '5m' | '1h';
48
+ };
49
+ [k: string]: unknown;
50
+ }
51
+ /**
52
+ * Extract all text from a ContentBlock array, joining TextBlock.text values with a space.
53
+ * Used to derive the embedding text from a multi-modal prompt.
54
+ */
55
+ export declare function extractText(blocks: ContentBlock[]): string;
56
+ /**
57
+ * Extract all binary refs from a ContentBlock array, sorted for stability.
58
+ * Used for the binary_refs TAG field on cache entries.
59
+ */
60
+ export declare function extractBinaryRefs(blocks: ContentBlock[]): string[];
3
61
  /**
4
62
  * Encode number[] as a little-endian Float32 Buffer.
5
63
  * Used to store embeddings as binary HSET field values.
package/dist/utils.js CHANGED
@@ -1,6 +1,9 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.sha256 = sha256;
4
+ exports.escapeTag = escapeTag;
5
+ exports.extractText = extractText;
6
+ exports.extractBinaryRefs = extractBinaryRefs;
4
7
  exports.encodeFloat32 = encodeFloat32;
5
8
  exports.parseFtSearchResponse = parseFtSearchResponse;
6
9
  const node_crypto_1 = require("node:crypto");
@@ -8,6 +11,33 @@ const node_crypto_1 = require("node:crypto");
8
11
  function sha256(text) {
9
12
  return (0, node_crypto_1.createHash)('sha256').update(text).digest('hex');
10
13
  }
14
+ /** Escape a string for safe use as a Valkey Search TAG filter value.
15
+ * Spaces are included because Valkey Search treats unescaped spaces as term
16
+ * separators (OR semantics), which would broaden the filter unintentionally.
17
+ */
18
+ function escapeTag(value) {
19
+ return value.replace(/[,.<>{}[\]"':;!@#$%^&*()\-+=~|/\\ ]/g, '\\$&');
20
+ }
21
+ /**
22
+ * Extract all text from a ContentBlock array, joining TextBlock.text values with a space.
23
+ * Used to derive the embedding text from a multi-modal prompt.
24
+ */
25
+ function extractText(blocks) {
26
+ return blocks
27
+ .filter((b) => b.type === 'text')
28
+ .map((b) => b.text)
29
+ .join(' ');
30
+ }
31
+ /**
32
+ * Extract all binary refs from a ContentBlock array, sorted for stability.
33
+ * Used for the binary_refs TAG field on cache entries.
34
+ */
35
+ function extractBinaryRefs(blocks) {
36
+ return blocks
37
+ .filter((b) => b.type === 'binary')
38
+ .map((b) => b.ref)
39
+ .sort();
40
+ }
11
41
  /**
12
42
  * Encode number[] as a little-endian Float32 Buffer.
13
43
  * Used to store embeddings as binary HSET field values.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@betterdb/semantic-cache",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "Valkey-native semantic cache for LLM applications with built-in OpenTelemetry and Prometheus instrumentation",
5
5
  "keywords": [
6
6
  "valkey",
@@ -8,7 +8,11 @@
8
8
  "semantic-cache",
9
9
  "llm",
10
10
  "opentelemetry",
11
- "prometheus"
11
+ "prometheus",
12
+ "langchain",
13
+ "langgraph",
14
+ "openai",
15
+ "anthropic"
12
16
  ],
13
17
  "license": "MIT",
14
18
  "repository": {
@@ -33,6 +37,56 @@
33
37
  "import": "./dist/adapters/ai.js",
34
38
  "require": "./dist/adapters/ai.js",
35
39
  "types": "./dist/adapters/ai.d.ts"
40
+ },
41
+ "./langgraph": {
42
+ "import": "./dist/adapters/langgraph.js",
43
+ "require": "./dist/adapters/langgraph.js",
44
+ "types": "./dist/adapters/langgraph.d.ts"
45
+ },
46
+ "./openai": {
47
+ "import": "./dist/adapters/openai.js",
48
+ "require": "./dist/adapters/openai.js",
49
+ "types": "./dist/adapters/openai.d.ts"
50
+ },
51
+ "./anthropic": {
52
+ "import": "./dist/adapters/anthropic.js",
53
+ "require": "./dist/adapters/anthropic.js",
54
+ "types": "./dist/adapters/anthropic.d.ts"
55
+ },
56
+ "./llamaindex": {
57
+ "import": "./dist/adapters/llamaindex.js",
58
+ "require": "./dist/adapters/llamaindex.js",
59
+ "types": "./dist/adapters/llamaindex.d.ts"
60
+ },
61
+ "./openai-responses": {
62
+ "import": "./dist/adapters/openai-responses.js",
63
+ "require": "./dist/adapters/openai-responses.js",
64
+ "types": "./dist/adapters/openai-responses.d.ts"
65
+ },
66
+ "./embed/openai": {
67
+ "import": "./dist/embed/openai.js",
68
+ "require": "./dist/embed/openai.js",
69
+ "types": "./dist/embed/openai.d.ts"
70
+ },
71
+ "./embed/bedrock": {
72
+ "import": "./dist/embed/bedrock.js",
73
+ "require": "./dist/embed/bedrock.js",
74
+ "types": "./dist/embed/bedrock.d.ts"
75
+ },
76
+ "./embed/voyage": {
77
+ "import": "./dist/embed/voyage.js",
78
+ "require": "./dist/embed/voyage.js",
79
+ "types": "./dist/embed/voyage.d.ts"
80
+ },
81
+ "./embed/cohere": {
82
+ "import": "./dist/embed/cohere.js",
83
+ "require": "./dist/embed/cohere.js",
84
+ "types": "./dist/embed/cohere.d.ts"
85
+ },
86
+ "./embed/ollama": {
87
+ "import": "./dist/embed/ollama.js",
88
+ "require": "./dist/embed/ollama.js",
89
+ "types": "./dist/embed/ollama.d.ts"
36
90
  }
37
91
  },
38
92
  "files": [
@@ -40,11 +94,12 @@
40
94
  "README.md"
41
95
  ],
42
96
  "scripts": {
43
- "build": "tsc",
97
+ "build": "tsc && node scripts/inject-telemetry-defaults.mjs",
44
98
  "typecheck": "tsc --noEmit",
45
99
  "test": "vitest run",
46
100
  "test:watch": "vitest",
47
- "clean": "rm -rf dist"
101
+ "clean": "rm -rf dist",
102
+ "update:pricing": "node scripts/update-model-prices.mjs"
48
103
  },
49
104
  "dependencies": {
50
105
  "@opentelemetry/api": "^1.9.0",
@@ -54,16 +109,36 @@
54
109
  "node": ">=20.0.0"
55
110
  },
56
111
  "peerDependencies": {
57
- "iovalkey": ">=0.3.0",
112
+ "posthog-node": ">=4.0.0",
113
+ "@anthropic-ai/sdk": ">=0.90.0",
58
114
  "@langchain/core": ">=0.3.0",
59
- "ai": ">=4.0.0"
115
+ "@langchain/langgraph-checkpoint": ">=0.1.0",
116
+ "@llamaindex/core": ">=0.6.0",
117
+ "ai": ">=4.0.0",
118
+ "iovalkey": ">=0.3.0",
119
+ "openai": ">=6.0.0"
60
120
  },
61
121
  "peerDependenciesMeta": {
122
+ "posthog-node": {
123
+ "optional": true
124
+ },
125
+ "@anthropic-ai/sdk": {
126
+ "optional": true
127
+ },
62
128
  "@langchain/core": {
63
129
  "optional": true
64
130
  },
131
+ "@langchain/langgraph-checkpoint": {
132
+ "optional": true
133
+ },
134
+ "@llamaindex/core": {
135
+ "optional": true
136
+ },
65
137
  "ai": {
66
138
  "optional": true
139
+ },
140
+ "openai": {
141
+ "optional": true
67
142
  }
68
143
  }
69
144
  }