@betterdb/semantic-cache 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,416 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.SemanticCache = void 0;
4
+ const node_crypto_1 = require("node:crypto");
5
+ const api_1 = require("@opentelemetry/api");
6
+ const errors_1 = require("./errors");
7
+ const telemetry_1 = require("./telemetry");
8
+ const utils_1 = require("./utils");
9
+ const INVALIDATE_BATCH_SIZE = 1000;
10
+ function errMsg(err) {
11
+ return err instanceof Error ? err.message : String(err);
12
+ }
13
+ class SemanticCache {
14
+ client;
15
+ embedFn;
16
+ name;
17
+ indexName;
18
+ entryPrefix;
19
+ statsKey;
20
+ defaultThreshold;
21
+ defaultTtl;
22
+ categoryThresholds;
23
+ uncertaintyBand;
24
+ telemetry;
25
+ _initialized = false;
26
+ _dimension = 0;
27
+ _initPromise = null;
28
+ _initGeneration = 0;
29
+ /**
30
+ * Creates a new SemanticCache instance.
31
+ *
32
+ * The caller owns the iovalkey client lifecycle. SemanticCache does not
33
+ * close or disconnect the client when it is done. Call client.quit() or
34
+ * client.disconnect() yourself when the application shuts down.
35
+ *
36
+ * Call initialize() before using check() or store().
37
+ */
38
+ constructor(options) {
39
+ this.client = options.client;
40
+ this.embedFn = options.embedFn;
41
+ this.name = options.name ?? 'betterdb_scache';
42
+ this.indexName = `${this.name}:idx`;
43
+ this.entryPrefix = `${this.name}:entry:`;
44
+ this.statsKey = `${this.name}:__stats`;
45
+ this.defaultThreshold = options.defaultThreshold ?? 0.1;
46
+ this.defaultTtl = options.defaultTtl;
47
+ this.categoryThresholds = options.categoryThresholds ?? {};
48
+ this.uncertaintyBand = options.uncertaintyBand ?? 0.05;
49
+ this.telemetry = (0, telemetry_1.createTelemetry)({
50
+ prefix: options.telemetry?.metricsPrefix ?? 'semantic_cache',
51
+ tracerName: options.telemetry?.tracerName ?? '@betterdb/semantic-cache',
52
+ registry: options.telemetry?.registry,
53
+ });
54
+ }
55
+ // ── Lifecycle ──────────────────────────────────────────────
56
+ async initialize() {
57
+ if (!this._initPromise) {
58
+ this._initPromise = this._doInitialize().catch((err) => {
59
+ this._initPromise = null;
60
+ throw err;
61
+ });
62
+ }
63
+ return this._initPromise;
64
+ }
65
+ async flush() {
66
+ // Mark uninitialized immediately so concurrent check()/store() calls get
67
+ // a clear SemanticCacheUsageError instead of cryptic Valkey errors.
68
+ // Bump generation so any in-flight _doInitialize() won't overwrite this state.
69
+ this._initialized = false;
70
+ this._initPromise = null;
71
+ this._initGeneration++;
72
+ // Valkey Search 1.2 does not support the DD (Delete Documents) flag on
73
+ // FT.DROPINDEX. Drop the index first, then clean up keys separately.
74
+ try {
75
+ await this.client.call('FT.DROPINDEX', this.indexName);
76
+ }
77
+ catch (err) {
78
+ if (!this.isIndexNotFoundError(err)) {
79
+ throw new errors_1.ValkeyCommandError('FT.DROPINDEX', err);
80
+ }
81
+ }
82
+ const entryPattern = `${this.name}:entry:*`;
83
+ let cursor = '0';
84
+ do {
85
+ const [nextCursor, keys] = await this.client.scan(cursor, 'MATCH', entryPattern, 'COUNT', '100');
86
+ cursor = nextCursor;
87
+ if (keys.length > 0)
88
+ await this.client.del(keys);
89
+ } while (cursor !== '0');
90
+ await this.client.del(this.statsKey);
91
+ }
92
+ // ── Public operations ──────────────────────────────────────
93
+ async check(prompt, options) {
94
+ this.assertInitialized('check');
95
+ return this.traced('check', async (span) => {
96
+ const category = options?.category ?? '';
97
+ const k = options?.k ?? 1;
98
+ const threshold = options?.threshold ??
99
+ (category && this.categoryThresholds[category] !== undefined
100
+ ? this.categoryThresholds[category]
101
+ : this.defaultThreshold);
102
+ const { vector: embedding, durationSec: embedSec } = await this.embed(prompt);
103
+ this.assertDimension(embedding);
104
+ // FT.SEARCH — Valkey Search 1.2 rejects KNN aliases in RETURN/SORTBY,
105
+ // so we omit both. Results include all fields and are pre-sorted by distance.
106
+ const searchStart = performance.now();
107
+ const filter = options?.filter;
108
+ const query = `${filter ? `(${filter})` : '*'}=>[KNN ${k} @embedding $vec AS __score]`;
109
+ let rawResult;
110
+ try {
111
+ rawResult = await this.client.call('FT.SEARCH', this.indexName, query, 'PARAMS', '2', 'vec', (0, utils_1.encodeFloat32)(embedding), 'LIMIT', '0', String(k), 'DIALECT', '2');
112
+ }
113
+ catch (err) {
114
+ throw new errors_1.ValkeyCommandError('FT.SEARCH', err);
115
+ }
116
+ const searchMs = performance.now() - searchStart;
117
+ const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
118
+ const categoryLabel = category || 'none';
119
+ const timingAttrs = { 'embedding_latency_ms': embedSec * 1000, 'search_latency_ms': searchMs };
120
+ // No candidates at all
121
+ if (parsed.length === 0) {
122
+ await this.recordStat('misses');
123
+ this.telemetry.metrics.requestsTotal
124
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
125
+ span.setAttributes({
126
+ 'cache.hit': false, 'cache.name': this.name,
127
+ 'cache.category': categoryLabel, ...timingAttrs,
128
+ });
129
+ return { hit: false, confidence: 'miss' };
130
+ }
131
+ const scoreStr = parsed[0].fields['__score'];
132
+ const score = scoreStr !== undefined ? parseFloat(scoreStr) : NaN;
133
+ if (!isNaN(score)) {
134
+ this.telemetry.metrics.similarityScore
135
+ .labels({ cache_name: this.name, category: categoryLabel }).observe(score);
136
+ }
137
+ // Miss (no usable score, or score exceeds threshold)
138
+ if (isNaN(score) || score > threshold) {
139
+ await this.recordStat('misses');
140
+ this.telemetry.metrics.requestsTotal
141
+ .labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
142
+ span.setAttributes({
143
+ 'cache.hit': false, 'cache.name': this.name,
144
+ 'cache.category': categoryLabel, ...timingAttrs,
145
+ ...(isNaN(score) ? {} : { 'cache.similarity': score, 'cache.threshold': threshold }),
146
+ });
147
+ const result = { hit: false, confidence: 'miss' };
148
+ if (!isNaN(score)) {
149
+ result.similarity = score;
150
+ result.nearestMiss = { similarity: score, deltaToThreshold: score - threshold };
151
+ }
152
+ return result;
153
+ }
154
+ // Hit
155
+ const confidence = score >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
156
+ await this.recordStat('hits');
157
+ const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
158
+ this.telemetry.metrics.requestsTotal
159
+ .labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
160
+ const matchedKey = parsed[0].key;
161
+ if (this.defaultTtl !== undefined && matchedKey) {
162
+ await this.client.expire(matchedKey, this.defaultTtl);
163
+ }
164
+ span.setAttributes({
165
+ 'cache.hit': true, 'cache.similarity': score, 'cache.threshold': threshold,
166
+ 'cache.confidence': confidence, 'cache.matched_key': matchedKey,
167
+ 'cache.category': categoryLabel, ...timingAttrs,
168
+ });
169
+ return {
170
+ hit: true, response: parsed[0].fields['response'],
171
+ similarity: score, confidence, matchedKey,
172
+ };
173
+ });
174
+ }
175
+ async store(prompt, response, options) {
176
+ this.assertInitialized('store');
177
+ return this.traced('store', async (span) => {
178
+ const { vector: embedding, durationSec: embedSec } = await this.embed(prompt);
179
+ this.assertDimension(embedding);
180
+ const entryKey = `${this.entryPrefix}${(0, node_crypto_1.randomUUID)()}`;
181
+ const category = options?.category ?? '';
182
+ const model = options?.model ?? '';
183
+ try {
184
+ await this.client.hset(entryKey, {
185
+ prompt, response, model, category,
186
+ inserted_at: Date.now().toString(),
187
+ metadata: JSON.stringify(options?.metadata ?? {}),
188
+ embedding: (0, utils_1.encodeFloat32)(embedding),
189
+ });
190
+ }
191
+ catch (err) {
192
+ throw new errors_1.ValkeyCommandError('HSET', err);
193
+ }
194
+ const ttl = options?.ttl ?? this.defaultTtl;
195
+ if (ttl !== undefined)
196
+ await this.client.expire(entryKey, ttl);
197
+ span.setAttributes({
198
+ 'cache.name': this.name, 'cache.key': entryKey, 'cache.ttl': ttl ?? -1,
199
+ 'cache.category': category || 'none', 'cache.model': model || 'none',
200
+ 'embedding_latency_ms': embedSec * 1000,
201
+ });
202
+ return entryKey;
203
+ });
204
+ }
205
+ /**
206
+ * Deletes all entries matching a valkey-search filter expression.
207
+ *
208
+ * **Security note:** `filter` is passed directly to FT.SEARCH. Only pass
209
+ * trusted, programmatically-constructed expressions — never unsanitised
210
+ * user input.
211
+ */
212
+ async invalidate(filter) {
213
+ this.assertInitialized('invalidate');
214
+ return this.traced('invalidate', async (span) => {
215
+ let rawResult;
216
+ try {
217
+ rawResult = await this.client.call('FT.SEARCH', this.indexName, filter, 'RETURN', '0', 'LIMIT', '0', String(INVALIDATE_BATCH_SIZE), 'DIALECT', '2');
218
+ }
219
+ catch (err) {
220
+ throw new errors_1.ValkeyCommandError('FT.SEARCH', err);
221
+ }
222
+ const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
223
+ if (parsed.length === 0) {
224
+ span.setAttributes({
225
+ 'cache.name': this.name, 'cache.filter': filter,
226
+ 'cache.deleted_count': 0, 'cache.truncated': false,
227
+ });
228
+ return { deleted: 0, truncated: false };
229
+ }
230
+ const keys = parsed.map((r) => r.key);
231
+ const truncated = keys.length === INVALIDATE_BATCH_SIZE;
232
+ try {
233
+ await this.client.del(keys);
234
+ }
235
+ catch (err) {
236
+ throw new errors_1.ValkeyCommandError('DEL', err);
237
+ }
238
+ span.setAttributes({
239
+ 'cache.name': this.name, 'cache.filter': filter,
240
+ 'cache.deleted_count': keys.length, 'cache.truncated': truncated,
241
+ });
242
+ return { deleted: keys.length, truncated };
243
+ });
244
+ }
245
+ async stats() {
246
+ this.assertInitialized('stats');
247
+ const raw = await this.client.hgetall(this.statsKey);
248
+ const hits = parseInt(raw.hits ?? '0', 10);
249
+ const misses = parseInt(raw.misses ?? '0', 10);
250
+ const total = parseInt(raw.total ?? '0', 10);
251
+ return { hits, misses, total, hitRate: total === 0 ? 0 : hits / total };
252
+ }
253
+ async indexInfo() {
254
+ this.assertInitialized('indexInfo');
255
+ let raw;
256
+ try {
257
+ raw = await this.client.call('FT.INFO', this.indexName);
258
+ }
259
+ catch (err) {
260
+ throw new errors_1.ValkeyCommandError('FT.INFO', err);
261
+ }
262
+ const info = raw;
263
+ let numDocs = 0;
264
+ let indexingState = 'unknown';
265
+ for (let i = 0; i < info.length - 1; i += 2) {
266
+ const key = String(info[i]);
267
+ if (key === 'num_docs')
268
+ numDocs = parseInt(String(info[i + 1]), 10) || 0;
269
+ else if (key === 'indexing')
270
+ indexingState = String(info[i + 1]);
271
+ }
272
+ return { name: this.indexName, numDocs, dimension: this._dimension, indexingState };
273
+ }
274
+ // ── Private helpers ────────────────────────────────────────
275
+ async _doInitialize() {
276
+ const gen = this._initGeneration;
277
+ return this.traced('initialize', async () => {
278
+ const dim = await this.ensureIndexAndGetDimension();
279
+ // If flush() ran while we were initializing, don't overwrite its state.
280
+ if (this._initGeneration !== gen)
281
+ return;
282
+ this._dimension = dim;
283
+ this._initialized = true;
284
+ });
285
+ }
286
+ async ensureIndexAndGetDimension() {
287
+ // Try reading an existing index
288
+ try {
289
+ const info = (await this.client.call('FT.INFO', this.indexName));
290
+ const dim = this.parseDimensionFromInfo(info);
291
+ if (dim > 0)
292
+ return dim;
293
+ // Couldn't parse dimension from FT.INFO — fall back to probe
294
+ return (await this.embed('probe')).vector.length;
295
+ }
296
+ catch (err) {
297
+ if (err instanceof errors_1.EmbeddingError)
298
+ throw err;
299
+ if (!this.isIndexNotFoundError(err)) {
300
+ throw new errors_1.ValkeyCommandError('FT.INFO', err);
301
+ }
302
+ }
303
+ // Index doesn't exist — probe dimension and create it
304
+ const dim = (await this.embed('probe')).vector.length;
305
+ try {
306
+ await this.client.call('FT.CREATE', this.indexName, 'ON', 'HASH', 'PREFIX', '1', this.entryPrefix, 'SCHEMA', 'prompt', 'TEXT', 'NOSTEM', 'response', 'TEXT', 'NOSTEM', 'model', 'TAG', 'category', 'TAG', 'inserted_at', 'NUMERIC', 'SORTABLE', 'embedding', 'VECTOR', 'HNSW', '6', 'TYPE', 'FLOAT32', 'DIM', String(dim), 'DISTANCE_METRIC', 'COSINE');
307
+ }
308
+ catch (err) {
309
+ throw new errors_1.ValkeyCommandError('FT.CREATE', err);
310
+ }
311
+ return dim;
312
+ }
313
+ /** Wraps embedFn with error handling and duration tracking. */
314
+ async embed(text) {
315
+ const start = performance.now();
316
+ let vector;
317
+ try {
318
+ vector = await this.embedFn(text);
319
+ }
320
+ catch (err) {
321
+ throw new errors_1.EmbeddingError(`embedFn failed: ${errMsg(err)}`, err);
322
+ }
323
+ const durationSec = (performance.now() - start) / 1000;
324
+ this.telemetry.metrics.embeddingDuration
325
+ .labels({ cache_name: this.name })
326
+ .observe(durationSec);
327
+ return { vector, durationSec };
328
+ }
329
+ /**
330
+ * Wraps a method body in an OTel span with automatic status, end, and
331
+ * operation duration metric. The span is passed to fn so callers can
332
+ * set attributes — but callers must NOT call span.end() or span.setStatus(),
333
+ * as traced() handles both.
334
+ */
335
+ async traced(operation, fn) {
336
+ const start = performance.now();
337
+ return this.telemetry.tracer.startActiveSpan(`semantic_cache.${operation}`, async (span) => {
338
+ try {
339
+ const result = await fn(span);
340
+ span.setStatus({ code: api_1.SpanStatusCode.OK });
341
+ return result;
342
+ }
343
+ catch (err) {
344
+ span.setStatus({ code: api_1.SpanStatusCode.ERROR, message: String(err) });
345
+ throw err;
346
+ }
347
+ finally {
348
+ span.end();
349
+ this.telemetry.metrics.operationDuration
350
+ .labels({ cache_name: this.name, operation })
351
+ .observe((performance.now() - start) / 1000);
352
+ }
353
+ });
354
+ }
355
+ /** Increment stats counters via pipeline. */
356
+ async recordStat(field) {
357
+ const pipeline = this.client.pipeline();
358
+ pipeline.hincrby(this.statsKey, 'total', 1);
359
+ pipeline.hincrby(this.statsKey, field, 1);
360
+ await pipeline.exec();
361
+ }
362
+ assertInitialized(method) {
363
+ if (!this._initialized) {
364
+ throw new errors_1.SemanticCacheUsageError(`SemanticCache.initialize() must be called before ${method}().`);
365
+ }
366
+ }
367
+ assertDimension(embedding) {
368
+ if (embedding.length !== this._dimension) {
369
+ throw new errors_1.SemanticCacheUsageError(`Embedding dimension mismatch: index expects ${this._dimension}, embedFn returned ${embedding.length}. Call flush() then initialize() to rebuild.`);
370
+ }
371
+ }
372
+ isIndexNotFoundError(err) {
373
+ const msg = err instanceof Error ? err.message.toLowerCase() : '';
374
+ return (msg.includes('unknown index name') ||
375
+ msg.includes('no such index') ||
376
+ msg.includes('not found'));
377
+ }
378
+ parseDimensionFromInfo(info) {
379
+ for (let i = 0; i < info.length - 1; i += 2) {
380
+ const key = String(info[i]);
381
+ if (key !== 'attributes' && key !== 'fields')
382
+ continue;
383
+ const attributes = info[i + 1];
384
+ if (!Array.isArray(attributes))
385
+ continue;
386
+ for (const attr of attributes) {
387
+ if (!Array.isArray(attr))
388
+ continue;
389
+ let isVector = false;
390
+ let dim = 0;
391
+ for (let j = 0; j < attr.length - 1; j++) {
392
+ const attrKey = String(attr[j]);
393
+ if (attrKey === 'type' && String(attr[j + 1]) === 'VECTOR')
394
+ isVector = true;
395
+ if (attrKey.toLowerCase() === 'dim')
396
+ dim = parseInt(String(attr[j + 1]), 10) || 0;
397
+ // Valkey Search 1.2 nests dimension inside an 'index' sub-array
398
+ if (attrKey === 'index' && Array.isArray(attr[j + 1])) {
399
+ const indexArr = attr[j + 1];
400
+ for (let k = 0; k < indexArr.length - 1; k++) {
401
+ if (String(indexArr[k]) === 'dimensions') {
402
+ const d = parseInt(String(indexArr[k + 1]), 10) || 0;
403
+ if (d > 0)
404
+ dim = d;
405
+ }
406
+ }
407
+ }
408
+ }
409
+ if (isVector && dim > 0)
410
+ return dim;
411
+ }
412
+ }
413
+ return 0;
414
+ }
415
+ }
416
+ exports.SemanticCache = SemanticCache;
@@ -0,0 +1,43 @@
1
+ import type { LanguageModelMiddleware } from 'ai';
2
+ import { SemanticCache } from '../SemanticCache';
3
+ export interface SemanticCacheMiddlewareOptions {
4
+ /** A pre-configured SemanticCache instance. */
5
+ cache: SemanticCache;
6
+ /**
7
+ * Extract the prompt text from AI SDK messages.
8
+ * Default: joins all user message content text parts.
9
+ */
10
+ extractPrompt?: (params: {
11
+ prompt: Array<{
12
+ role: string;
13
+ content: unknown;
14
+ }>;
15
+ }) => string;
16
+ /**
17
+ * Extract the response text from an AI SDK result.
18
+ * Default: finds the first text content part.
19
+ */
20
+ extractResponse?: (result: {
21
+ content: Array<{
22
+ type: string;
23
+ text?: string;
24
+ }>;
25
+ }) => string;
26
+ }
27
+ /**
28
+ * Creates a LanguageModelMiddleware that adds semantic caching to any
29
+ * AI SDK language model. Use with wrapLanguageModel() from the 'ai' package.
30
+ *
31
+ * @example
32
+ * ```typescript
33
+ * import { wrapLanguageModel } from 'ai';
34
+ * import { openai } from '@ai-sdk/openai';
35
+ * import { createSemanticCacheMiddleware } from '@betterdb/semantic-cache/ai';
36
+ *
37
+ * const model = wrapLanguageModel({
38
+ * model: openai('gpt-4o'),
39
+ * middleware: createSemanticCacheMiddleware({ cache }),
40
+ * });
41
+ * ```
42
+ */
43
+ export declare function createSemanticCacheMiddleware(opts: SemanticCacheMiddlewareOptions): LanguageModelMiddleware;
@@ -0,0 +1,98 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createSemanticCacheMiddleware = createSemanticCacheMiddleware;
4
+ function defaultExtractPrompt(params) {
5
+ const parts = [];
6
+ for (const msg of params.prompt) {
7
+ if (msg.role === 'user' && Array.isArray(msg.content)) {
8
+ for (const part of msg.content) {
9
+ if (typeof part === 'object' &&
10
+ part !== null &&
11
+ 'type' in part &&
12
+ part.type === 'text' &&
13
+ 'text' in part) {
14
+ parts.push(part.text);
15
+ }
16
+ }
17
+ }
18
+ }
19
+ return parts.join('\n');
20
+ }
21
+ function defaultExtractResponse(result) {
22
+ for (const part of result.content ?? []) {
23
+ if (part.type === 'text' && part.text) {
24
+ return part.text;
25
+ }
26
+ }
27
+ return '';
28
+ }
29
+ /**
30
+ * Creates a LanguageModelMiddleware that adds semantic caching to any
31
+ * AI SDK language model. Use with wrapLanguageModel() from the 'ai' package.
32
+ *
33
+ * @example
34
+ * ```typescript
35
+ * import { wrapLanguageModel } from 'ai';
36
+ * import { openai } from '@ai-sdk/openai';
37
+ * import { createSemanticCacheMiddleware } from '@betterdb/semantic-cache/ai';
38
+ *
39
+ * const model = wrapLanguageModel({
40
+ * model: openai('gpt-4o'),
41
+ * middleware: createSemanticCacheMiddleware({ cache }),
42
+ * });
43
+ * ```
44
+ */
45
+ function createSemanticCacheMiddleware(opts) {
46
+ const { cache } = opts;
47
+ const extractPrompt = opts.extractPrompt ?? defaultExtractPrompt;
48
+ const extractResponse = opts.extractResponse ?? defaultExtractResponse;
49
+ let initPromise = null;
50
+ async function ensureInitialized() {
51
+ if (!initPromise) {
52
+ initPromise = cache.initialize().catch((err) => {
53
+ initPromise = null; // allow retry on transient failure
54
+ throw err;
55
+ });
56
+ }
57
+ await initPromise;
58
+ }
59
+ return {
60
+ specificationVersion: 'v3',
61
+ wrapGenerate: async ({ doGenerate, params }) => {
62
+ await ensureInitialized();
63
+ const prompt = extractPrompt(params);
64
+ if (prompt) {
65
+ try {
66
+ const cached = await cache.check(prompt);
67
+ if (cached.hit && cached.response) {
68
+ // Return a minimal generate result. Cast required because
69
+ // LanguageModelV3GenerateResult is imported transitively via the
70
+ // LanguageModelMiddleware type — we construct it inline to avoid
71
+ // depending on @ai-sdk/provider directly.
72
+ return {
73
+ content: [{ type: 'text', text: cached.response }],
74
+ finishReason: 'stop',
75
+ usage: { promptTokens: 0, completionTokens: 0 },
76
+ warnings: [],
77
+ };
78
+ }
79
+ }
80
+ catch {
81
+ // Swallow check errors — caching should not break inference
82
+ }
83
+ }
84
+ const result = await doGenerate();
85
+ if (prompt) {
86
+ const response = extractResponse(result);
87
+ if (response) {
88
+ await cache.store(prompt, response).catch(() => {
89
+ // Swallow store errors — caching should not break inference
90
+ });
91
+ }
92
+ }
93
+ return result;
94
+ },
95
+ // wrapStream is intentionally not implemented — semantic caching of
96
+ // streaming responses is not supported in v0.1
97
+ };
98
+ }
@@ -0,0 +1,29 @@
1
+ import { BaseCache } from '@langchain/core/caches';
2
+ import type { Generation } from '@langchain/core/outputs';
3
+ import { SemanticCache } from '../SemanticCache';
4
+ export interface BetterDBSemanticCacheOptions {
5
+ /** A pre-configured SemanticCache instance. */
6
+ cache: SemanticCache;
7
+ /**
8
+ * When true, cache lookups and stores are scoped to the specific LLM
9
+ * configuration (model, temperature, etc.). This prevents cross-model
10
+ * cache pollution but reduces hit rates — a prompt cached against gpt-4o
11
+ * will not hit against gpt-4o-mini even if the responses would be identical.
12
+ *
13
+ * The llm_string is hashed (SHA-256, first 16 hex chars) for use as a
14
+ * Valkey TAG field. The hash is deterministic: same LLM config = same hash.
15
+ *
16
+ * Default: false.
17
+ */
18
+ filterByModel?: boolean;
19
+ }
20
+ export declare class BetterDBSemanticCache extends BaseCache {
21
+ private cache;
22
+ private filterByModel;
23
+ private initPromise;
24
+ constructor(opts: BetterDBSemanticCacheOptions);
25
+ private ensureInitialized;
26
+ private modelHash;
27
+ lookup(prompt: string, llm_string: string): Promise<Generation[] | null>;
28
+ update(prompt: string, llm_string: string, return_val: Generation[]): Promise<void>;
29
+ }
@@ -0,0 +1,50 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.BetterDBSemanticCache = void 0;
4
+ const caches_1 = require("@langchain/core/caches");
5
+ const utils_1 = require("../utils");
6
+ class BetterDBSemanticCache extends caches_1.BaseCache {
7
+ cache;
8
+ filterByModel;
9
+ initPromise = null;
10
+ constructor(opts) {
11
+ super();
12
+ this.cache = opts.cache;
13
+ this.filterByModel = opts.filterByModel ?? false;
14
+ }
15
+ async ensureInitialized() {
16
+ if (!this.initPromise) {
17
+ this.initPromise = this.cache.initialize().catch((err) => {
18
+ this.initPromise = null; // allow retry on transient failure
19
+ throw err;
20
+ });
21
+ }
22
+ await this.initPromise;
23
+ }
24
+ modelHash(llm_string) {
25
+ // llm_string is a serialised LangChain LLM config — not human-readable.
26
+ // Hash it to a stable, TAG-safe identifier.
27
+ return (0, utils_1.sha256)(llm_string).slice(0, 16);
28
+ }
29
+ async lookup(prompt, llm_string) {
30
+ await this.ensureInitialized();
31
+ const opts = {};
32
+ if (this.filterByModel) {
33
+ opts.filter = `@model:{${this.modelHash(llm_string)}}`;
34
+ }
35
+ const result = await this.cache.check(prompt, opts);
36
+ if (!result.hit || !result.response)
37
+ return null;
38
+ return [{ text: result.response }];
39
+ }
40
+ async update(prompt, llm_string, return_val) {
41
+ await this.ensureInitialized();
42
+ const text = return_val.map((g) => g.text).join('');
43
+ if (!text)
44
+ return;
45
+ await this.cache.store(prompt, text, {
46
+ model: this.modelHash(llm_string),
47
+ });
48
+ }
49
+ }
50
+ exports.BetterDBSemanticCache = BetterDBSemanticCache;
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Thrown when the caller does something wrong — e.g. calling check()
3
+ * before initialize(), or providing an embedding with the wrong dimension.
4
+ * The message is always actionable: it tells the caller what to fix.
5
+ */
6
+ export declare class SemanticCacheUsageError extends Error {
7
+ constructor(message: string);
8
+ }
9
+ /**
10
+ * Thrown when the embedding function fails.
11
+ * Check the underlying cause for the original error from the embedding provider.
12
+ */
13
+ export declare class EmbeddingError extends Error {
14
+ readonly cause: unknown;
15
+ constructor(message: string, cause: unknown);
16
+ }
17
+ /**
18
+ * Thrown when a Valkey command fails unexpectedly.
19
+ * Includes the command name and the underlying error.
20
+ */
21
+ export declare class ValkeyCommandError extends Error {
22
+ readonly command: string;
23
+ readonly cause: unknown;
24
+ constructor(command: string, cause: unknown);
25
+ }