@betterdb/semantic-cache 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +223 -0
- package/dist/SemanticCache.d.ts +59 -0
- package/dist/SemanticCache.js +416 -0
- package/dist/adapters/ai.d.ts +43 -0
- package/dist/adapters/ai.js +98 -0
- package/dist/adapters/langchain.d.ts +29 -0
- package/dist/adapters/langchain.js +50 -0
- package/dist/errors.d.ts +25 -0
- package/dist/errors.js +43 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +9 -0
- package/dist/telemetry.d.ts +19 -0
- package/dist/telemetry.js +54 -0
- package/dist/types.d.ts +142 -0
- package/dist/types.js +2 -0
- package/dist/utils.d.ts +25 -0
- package/dist/utils.js +77 -0
- package/package.json +69 -0
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.SemanticCache = void 0;
|
|
4
|
+
const node_crypto_1 = require("node:crypto");
|
|
5
|
+
const api_1 = require("@opentelemetry/api");
|
|
6
|
+
const errors_1 = require("./errors");
|
|
7
|
+
const telemetry_1 = require("./telemetry");
|
|
8
|
+
const utils_1 = require("./utils");
|
|
9
|
+
const INVALIDATE_BATCH_SIZE = 1000;
|
|
10
|
+
function errMsg(err) {
|
|
11
|
+
return err instanceof Error ? err.message : String(err);
|
|
12
|
+
}
|
|
13
|
+
class SemanticCache {
|
|
14
|
+
client;
|
|
15
|
+
embedFn;
|
|
16
|
+
name;
|
|
17
|
+
indexName;
|
|
18
|
+
entryPrefix;
|
|
19
|
+
statsKey;
|
|
20
|
+
defaultThreshold;
|
|
21
|
+
defaultTtl;
|
|
22
|
+
categoryThresholds;
|
|
23
|
+
uncertaintyBand;
|
|
24
|
+
telemetry;
|
|
25
|
+
_initialized = false;
|
|
26
|
+
_dimension = 0;
|
|
27
|
+
_initPromise = null;
|
|
28
|
+
_initGeneration = 0;
|
|
29
|
+
/**
|
|
30
|
+
* Creates a new SemanticCache instance.
|
|
31
|
+
*
|
|
32
|
+
* The caller owns the iovalkey client lifecycle. SemanticCache does not
|
|
33
|
+
* close or disconnect the client when it is done. Call client.quit() or
|
|
34
|
+
* client.disconnect() yourself when the application shuts down.
|
|
35
|
+
*
|
|
36
|
+
* Call initialize() before using check() or store().
|
|
37
|
+
*/
|
|
38
|
+
constructor(options) {
|
|
39
|
+
this.client = options.client;
|
|
40
|
+
this.embedFn = options.embedFn;
|
|
41
|
+
this.name = options.name ?? 'betterdb_scache';
|
|
42
|
+
this.indexName = `${this.name}:idx`;
|
|
43
|
+
this.entryPrefix = `${this.name}:entry:`;
|
|
44
|
+
this.statsKey = `${this.name}:__stats`;
|
|
45
|
+
this.defaultThreshold = options.defaultThreshold ?? 0.1;
|
|
46
|
+
this.defaultTtl = options.defaultTtl;
|
|
47
|
+
this.categoryThresholds = options.categoryThresholds ?? {};
|
|
48
|
+
this.uncertaintyBand = options.uncertaintyBand ?? 0.05;
|
|
49
|
+
this.telemetry = (0, telemetry_1.createTelemetry)({
|
|
50
|
+
prefix: options.telemetry?.metricsPrefix ?? 'semantic_cache',
|
|
51
|
+
tracerName: options.telemetry?.tracerName ?? '@betterdb/semantic-cache',
|
|
52
|
+
registry: options.telemetry?.registry,
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
// ── Lifecycle ──────────────────────────────────────────────
|
|
56
|
+
async initialize() {
|
|
57
|
+
if (!this._initPromise) {
|
|
58
|
+
this._initPromise = this._doInitialize().catch((err) => {
|
|
59
|
+
this._initPromise = null;
|
|
60
|
+
throw err;
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
return this._initPromise;
|
|
64
|
+
}
|
|
65
|
+
async flush() {
|
|
66
|
+
// Mark uninitialized immediately so concurrent check()/store() calls get
|
|
67
|
+
// a clear SemanticCacheUsageError instead of cryptic Valkey errors.
|
|
68
|
+
// Bump generation so any in-flight _doInitialize() won't overwrite this state.
|
|
69
|
+
this._initialized = false;
|
|
70
|
+
this._initPromise = null;
|
|
71
|
+
this._initGeneration++;
|
|
72
|
+
// Valkey Search 1.2 does not support the DD (Delete Documents) flag on
|
|
73
|
+
// FT.DROPINDEX. Drop the index first, then clean up keys separately.
|
|
74
|
+
try {
|
|
75
|
+
await this.client.call('FT.DROPINDEX', this.indexName);
|
|
76
|
+
}
|
|
77
|
+
catch (err) {
|
|
78
|
+
if (!this.isIndexNotFoundError(err)) {
|
|
79
|
+
throw new errors_1.ValkeyCommandError('FT.DROPINDEX', err);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
const entryPattern = `${this.name}:entry:*`;
|
|
83
|
+
let cursor = '0';
|
|
84
|
+
do {
|
|
85
|
+
const [nextCursor, keys] = await this.client.scan(cursor, 'MATCH', entryPattern, 'COUNT', '100');
|
|
86
|
+
cursor = nextCursor;
|
|
87
|
+
if (keys.length > 0)
|
|
88
|
+
await this.client.del(keys);
|
|
89
|
+
} while (cursor !== '0');
|
|
90
|
+
await this.client.del(this.statsKey);
|
|
91
|
+
}
|
|
92
|
+
// ── Public operations ──────────────────────────────────────
|
|
93
|
+
async check(prompt, options) {
|
|
94
|
+
this.assertInitialized('check');
|
|
95
|
+
return this.traced('check', async (span) => {
|
|
96
|
+
const category = options?.category ?? '';
|
|
97
|
+
const k = options?.k ?? 1;
|
|
98
|
+
const threshold = options?.threshold ??
|
|
99
|
+
(category && this.categoryThresholds[category] !== undefined
|
|
100
|
+
? this.categoryThresholds[category]
|
|
101
|
+
: this.defaultThreshold);
|
|
102
|
+
const { vector: embedding, durationSec: embedSec } = await this.embed(prompt);
|
|
103
|
+
this.assertDimension(embedding);
|
|
104
|
+
// FT.SEARCH — Valkey Search 1.2 rejects KNN aliases in RETURN/SORTBY,
|
|
105
|
+
// so we omit both. Results include all fields and are pre-sorted by distance.
|
|
106
|
+
const searchStart = performance.now();
|
|
107
|
+
const filter = options?.filter;
|
|
108
|
+
const query = `${filter ? `(${filter})` : '*'}=>[KNN ${k} @embedding $vec AS __score]`;
|
|
109
|
+
let rawResult;
|
|
110
|
+
try {
|
|
111
|
+
rawResult = await this.client.call('FT.SEARCH', this.indexName, query, 'PARAMS', '2', 'vec', (0, utils_1.encodeFloat32)(embedding), 'LIMIT', '0', String(k), 'DIALECT', '2');
|
|
112
|
+
}
|
|
113
|
+
catch (err) {
|
|
114
|
+
throw new errors_1.ValkeyCommandError('FT.SEARCH', err);
|
|
115
|
+
}
|
|
116
|
+
const searchMs = performance.now() - searchStart;
|
|
117
|
+
const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
|
|
118
|
+
const categoryLabel = category || 'none';
|
|
119
|
+
const timingAttrs = { 'embedding_latency_ms': embedSec * 1000, 'search_latency_ms': searchMs };
|
|
120
|
+
// No candidates at all
|
|
121
|
+
if (parsed.length === 0) {
|
|
122
|
+
await this.recordStat('misses');
|
|
123
|
+
this.telemetry.metrics.requestsTotal
|
|
124
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
|
|
125
|
+
span.setAttributes({
|
|
126
|
+
'cache.hit': false, 'cache.name': this.name,
|
|
127
|
+
'cache.category': categoryLabel, ...timingAttrs,
|
|
128
|
+
});
|
|
129
|
+
return { hit: false, confidence: 'miss' };
|
|
130
|
+
}
|
|
131
|
+
const scoreStr = parsed[0].fields['__score'];
|
|
132
|
+
const score = scoreStr !== undefined ? parseFloat(scoreStr) : NaN;
|
|
133
|
+
if (!isNaN(score)) {
|
|
134
|
+
this.telemetry.metrics.similarityScore
|
|
135
|
+
.labels({ cache_name: this.name, category: categoryLabel }).observe(score);
|
|
136
|
+
}
|
|
137
|
+
// Miss (no usable score, or score exceeds threshold)
|
|
138
|
+
if (isNaN(score) || score > threshold) {
|
|
139
|
+
await this.recordStat('misses');
|
|
140
|
+
this.telemetry.metrics.requestsTotal
|
|
141
|
+
.labels({ cache_name: this.name, result: 'miss', category: categoryLabel }).inc();
|
|
142
|
+
span.setAttributes({
|
|
143
|
+
'cache.hit': false, 'cache.name': this.name,
|
|
144
|
+
'cache.category': categoryLabel, ...timingAttrs,
|
|
145
|
+
...(isNaN(score) ? {} : { 'cache.similarity': score, 'cache.threshold': threshold }),
|
|
146
|
+
});
|
|
147
|
+
const result = { hit: false, confidence: 'miss' };
|
|
148
|
+
if (!isNaN(score)) {
|
|
149
|
+
result.similarity = score;
|
|
150
|
+
result.nearestMiss = { similarity: score, deltaToThreshold: score - threshold };
|
|
151
|
+
}
|
|
152
|
+
return result;
|
|
153
|
+
}
|
|
154
|
+
// Hit
|
|
155
|
+
const confidence = score >= threshold - this.uncertaintyBand ? 'uncertain' : 'high';
|
|
156
|
+
await this.recordStat('hits');
|
|
157
|
+
const metricResult = confidence === 'uncertain' ? 'uncertain_hit' : 'hit';
|
|
158
|
+
this.telemetry.metrics.requestsTotal
|
|
159
|
+
.labels({ cache_name: this.name, result: metricResult, category: categoryLabel }).inc();
|
|
160
|
+
const matchedKey = parsed[0].key;
|
|
161
|
+
if (this.defaultTtl !== undefined && matchedKey) {
|
|
162
|
+
await this.client.expire(matchedKey, this.defaultTtl);
|
|
163
|
+
}
|
|
164
|
+
span.setAttributes({
|
|
165
|
+
'cache.hit': true, 'cache.similarity': score, 'cache.threshold': threshold,
|
|
166
|
+
'cache.confidence': confidence, 'cache.matched_key': matchedKey,
|
|
167
|
+
'cache.category': categoryLabel, ...timingAttrs,
|
|
168
|
+
});
|
|
169
|
+
return {
|
|
170
|
+
hit: true, response: parsed[0].fields['response'],
|
|
171
|
+
similarity: score, confidence, matchedKey,
|
|
172
|
+
};
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
async store(prompt, response, options) {
|
|
176
|
+
this.assertInitialized('store');
|
|
177
|
+
return this.traced('store', async (span) => {
|
|
178
|
+
const { vector: embedding, durationSec: embedSec } = await this.embed(prompt);
|
|
179
|
+
this.assertDimension(embedding);
|
|
180
|
+
const entryKey = `${this.entryPrefix}${(0, node_crypto_1.randomUUID)()}`;
|
|
181
|
+
const category = options?.category ?? '';
|
|
182
|
+
const model = options?.model ?? '';
|
|
183
|
+
try {
|
|
184
|
+
await this.client.hset(entryKey, {
|
|
185
|
+
prompt, response, model, category,
|
|
186
|
+
inserted_at: Date.now().toString(),
|
|
187
|
+
metadata: JSON.stringify(options?.metadata ?? {}),
|
|
188
|
+
embedding: (0, utils_1.encodeFloat32)(embedding),
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
catch (err) {
|
|
192
|
+
throw new errors_1.ValkeyCommandError('HSET', err);
|
|
193
|
+
}
|
|
194
|
+
const ttl = options?.ttl ?? this.defaultTtl;
|
|
195
|
+
if (ttl !== undefined)
|
|
196
|
+
await this.client.expire(entryKey, ttl);
|
|
197
|
+
span.setAttributes({
|
|
198
|
+
'cache.name': this.name, 'cache.key': entryKey, 'cache.ttl': ttl ?? -1,
|
|
199
|
+
'cache.category': category || 'none', 'cache.model': model || 'none',
|
|
200
|
+
'embedding_latency_ms': embedSec * 1000,
|
|
201
|
+
});
|
|
202
|
+
return entryKey;
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Deletes all entries matching a valkey-search filter expression.
|
|
207
|
+
*
|
|
208
|
+
* **Security note:** `filter` is passed directly to FT.SEARCH. Only pass
|
|
209
|
+
* trusted, programmatically-constructed expressions — never unsanitised
|
|
210
|
+
* user input.
|
|
211
|
+
*/
|
|
212
|
+
async invalidate(filter) {
|
|
213
|
+
this.assertInitialized('invalidate');
|
|
214
|
+
return this.traced('invalidate', async (span) => {
|
|
215
|
+
let rawResult;
|
|
216
|
+
try {
|
|
217
|
+
rawResult = await this.client.call('FT.SEARCH', this.indexName, filter, 'RETURN', '0', 'LIMIT', '0', String(INVALIDATE_BATCH_SIZE), 'DIALECT', '2');
|
|
218
|
+
}
|
|
219
|
+
catch (err) {
|
|
220
|
+
throw new errors_1.ValkeyCommandError('FT.SEARCH', err);
|
|
221
|
+
}
|
|
222
|
+
const parsed = (0, utils_1.parseFtSearchResponse)(rawResult);
|
|
223
|
+
if (parsed.length === 0) {
|
|
224
|
+
span.setAttributes({
|
|
225
|
+
'cache.name': this.name, 'cache.filter': filter,
|
|
226
|
+
'cache.deleted_count': 0, 'cache.truncated': false,
|
|
227
|
+
});
|
|
228
|
+
return { deleted: 0, truncated: false };
|
|
229
|
+
}
|
|
230
|
+
const keys = parsed.map((r) => r.key);
|
|
231
|
+
const truncated = keys.length === INVALIDATE_BATCH_SIZE;
|
|
232
|
+
try {
|
|
233
|
+
await this.client.del(keys);
|
|
234
|
+
}
|
|
235
|
+
catch (err) {
|
|
236
|
+
throw new errors_1.ValkeyCommandError('DEL', err);
|
|
237
|
+
}
|
|
238
|
+
span.setAttributes({
|
|
239
|
+
'cache.name': this.name, 'cache.filter': filter,
|
|
240
|
+
'cache.deleted_count': keys.length, 'cache.truncated': truncated,
|
|
241
|
+
});
|
|
242
|
+
return { deleted: keys.length, truncated };
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
async stats() {
|
|
246
|
+
this.assertInitialized('stats');
|
|
247
|
+
const raw = await this.client.hgetall(this.statsKey);
|
|
248
|
+
const hits = parseInt(raw.hits ?? '0', 10);
|
|
249
|
+
const misses = parseInt(raw.misses ?? '0', 10);
|
|
250
|
+
const total = parseInt(raw.total ?? '0', 10);
|
|
251
|
+
return { hits, misses, total, hitRate: total === 0 ? 0 : hits / total };
|
|
252
|
+
}
|
|
253
|
+
async indexInfo() {
|
|
254
|
+
this.assertInitialized('indexInfo');
|
|
255
|
+
let raw;
|
|
256
|
+
try {
|
|
257
|
+
raw = await this.client.call('FT.INFO', this.indexName);
|
|
258
|
+
}
|
|
259
|
+
catch (err) {
|
|
260
|
+
throw new errors_1.ValkeyCommandError('FT.INFO', err);
|
|
261
|
+
}
|
|
262
|
+
const info = raw;
|
|
263
|
+
let numDocs = 0;
|
|
264
|
+
let indexingState = 'unknown';
|
|
265
|
+
for (let i = 0; i < info.length - 1; i += 2) {
|
|
266
|
+
const key = String(info[i]);
|
|
267
|
+
if (key === 'num_docs')
|
|
268
|
+
numDocs = parseInt(String(info[i + 1]), 10) || 0;
|
|
269
|
+
else if (key === 'indexing')
|
|
270
|
+
indexingState = String(info[i + 1]);
|
|
271
|
+
}
|
|
272
|
+
return { name: this.indexName, numDocs, dimension: this._dimension, indexingState };
|
|
273
|
+
}
|
|
274
|
+
// ── Private helpers ────────────────────────────────────────
|
|
275
|
+
async _doInitialize() {
|
|
276
|
+
const gen = this._initGeneration;
|
|
277
|
+
return this.traced('initialize', async () => {
|
|
278
|
+
const dim = await this.ensureIndexAndGetDimension();
|
|
279
|
+
// If flush() ran while we were initializing, don't overwrite its state.
|
|
280
|
+
if (this._initGeneration !== gen)
|
|
281
|
+
return;
|
|
282
|
+
this._dimension = dim;
|
|
283
|
+
this._initialized = true;
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
async ensureIndexAndGetDimension() {
|
|
287
|
+
// Try reading an existing index
|
|
288
|
+
try {
|
|
289
|
+
const info = (await this.client.call('FT.INFO', this.indexName));
|
|
290
|
+
const dim = this.parseDimensionFromInfo(info);
|
|
291
|
+
if (dim > 0)
|
|
292
|
+
return dim;
|
|
293
|
+
// Couldn't parse dimension from FT.INFO — fall back to probe
|
|
294
|
+
return (await this.embed('probe')).vector.length;
|
|
295
|
+
}
|
|
296
|
+
catch (err) {
|
|
297
|
+
if (err instanceof errors_1.EmbeddingError)
|
|
298
|
+
throw err;
|
|
299
|
+
if (!this.isIndexNotFoundError(err)) {
|
|
300
|
+
throw new errors_1.ValkeyCommandError('FT.INFO', err);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
// Index doesn't exist — probe dimension and create it
|
|
304
|
+
const dim = (await this.embed('probe')).vector.length;
|
|
305
|
+
try {
|
|
306
|
+
await this.client.call('FT.CREATE', this.indexName, 'ON', 'HASH', 'PREFIX', '1', this.entryPrefix, 'SCHEMA', 'prompt', 'TEXT', 'NOSTEM', 'response', 'TEXT', 'NOSTEM', 'model', 'TAG', 'category', 'TAG', 'inserted_at', 'NUMERIC', 'SORTABLE', 'embedding', 'VECTOR', 'HNSW', '6', 'TYPE', 'FLOAT32', 'DIM', String(dim), 'DISTANCE_METRIC', 'COSINE');
|
|
307
|
+
}
|
|
308
|
+
catch (err) {
|
|
309
|
+
throw new errors_1.ValkeyCommandError('FT.CREATE', err);
|
|
310
|
+
}
|
|
311
|
+
return dim;
|
|
312
|
+
}
|
|
313
|
+
/** Wraps embedFn with error handling and duration tracking. */
|
|
314
|
+
async embed(text) {
|
|
315
|
+
const start = performance.now();
|
|
316
|
+
let vector;
|
|
317
|
+
try {
|
|
318
|
+
vector = await this.embedFn(text);
|
|
319
|
+
}
|
|
320
|
+
catch (err) {
|
|
321
|
+
throw new errors_1.EmbeddingError(`embedFn failed: ${errMsg(err)}`, err);
|
|
322
|
+
}
|
|
323
|
+
const durationSec = (performance.now() - start) / 1000;
|
|
324
|
+
this.telemetry.metrics.embeddingDuration
|
|
325
|
+
.labels({ cache_name: this.name })
|
|
326
|
+
.observe(durationSec);
|
|
327
|
+
return { vector, durationSec };
|
|
328
|
+
}
|
|
329
|
+
/**
|
|
330
|
+
* Wraps a method body in an OTel span with automatic status, end, and
|
|
331
|
+
* operation duration metric. The span is passed to fn so callers can
|
|
332
|
+
* set attributes — but callers must NOT call span.end() or span.setStatus(),
|
|
333
|
+
* as traced() handles both.
|
|
334
|
+
*/
|
|
335
|
+
async traced(operation, fn) {
|
|
336
|
+
const start = performance.now();
|
|
337
|
+
return this.telemetry.tracer.startActiveSpan(`semantic_cache.${operation}`, async (span) => {
|
|
338
|
+
try {
|
|
339
|
+
const result = await fn(span);
|
|
340
|
+
span.setStatus({ code: api_1.SpanStatusCode.OK });
|
|
341
|
+
return result;
|
|
342
|
+
}
|
|
343
|
+
catch (err) {
|
|
344
|
+
span.setStatus({ code: api_1.SpanStatusCode.ERROR, message: String(err) });
|
|
345
|
+
throw err;
|
|
346
|
+
}
|
|
347
|
+
finally {
|
|
348
|
+
span.end();
|
|
349
|
+
this.telemetry.metrics.operationDuration
|
|
350
|
+
.labels({ cache_name: this.name, operation })
|
|
351
|
+
.observe((performance.now() - start) / 1000);
|
|
352
|
+
}
|
|
353
|
+
});
|
|
354
|
+
}
|
|
355
|
+
/** Increment stats counters via pipeline. */
|
|
356
|
+
async recordStat(field) {
|
|
357
|
+
const pipeline = this.client.pipeline();
|
|
358
|
+
pipeline.hincrby(this.statsKey, 'total', 1);
|
|
359
|
+
pipeline.hincrby(this.statsKey, field, 1);
|
|
360
|
+
await pipeline.exec();
|
|
361
|
+
}
|
|
362
|
+
assertInitialized(method) {
|
|
363
|
+
if (!this._initialized) {
|
|
364
|
+
throw new errors_1.SemanticCacheUsageError(`SemanticCache.initialize() must be called before ${method}().`);
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
assertDimension(embedding) {
|
|
368
|
+
if (embedding.length !== this._dimension) {
|
|
369
|
+
throw new errors_1.SemanticCacheUsageError(`Embedding dimension mismatch: index expects ${this._dimension}, embedFn returned ${embedding.length}. Call flush() then initialize() to rebuild.`);
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
isIndexNotFoundError(err) {
|
|
373
|
+
const msg = err instanceof Error ? err.message.toLowerCase() : '';
|
|
374
|
+
return (msg.includes('unknown index name') ||
|
|
375
|
+
msg.includes('no such index') ||
|
|
376
|
+
msg.includes('not found'));
|
|
377
|
+
}
|
|
378
|
+
parseDimensionFromInfo(info) {
|
|
379
|
+
for (let i = 0; i < info.length - 1; i += 2) {
|
|
380
|
+
const key = String(info[i]);
|
|
381
|
+
if (key !== 'attributes' && key !== 'fields')
|
|
382
|
+
continue;
|
|
383
|
+
const attributes = info[i + 1];
|
|
384
|
+
if (!Array.isArray(attributes))
|
|
385
|
+
continue;
|
|
386
|
+
for (const attr of attributes) {
|
|
387
|
+
if (!Array.isArray(attr))
|
|
388
|
+
continue;
|
|
389
|
+
let isVector = false;
|
|
390
|
+
let dim = 0;
|
|
391
|
+
for (let j = 0; j < attr.length - 1; j++) {
|
|
392
|
+
const attrKey = String(attr[j]);
|
|
393
|
+
if (attrKey === 'type' && String(attr[j + 1]) === 'VECTOR')
|
|
394
|
+
isVector = true;
|
|
395
|
+
if (attrKey.toLowerCase() === 'dim')
|
|
396
|
+
dim = parseInt(String(attr[j + 1]), 10) || 0;
|
|
397
|
+
// Valkey Search 1.2 nests dimension inside an 'index' sub-array
|
|
398
|
+
if (attrKey === 'index' && Array.isArray(attr[j + 1])) {
|
|
399
|
+
const indexArr = attr[j + 1];
|
|
400
|
+
for (let k = 0; k < indexArr.length - 1; k++) {
|
|
401
|
+
if (String(indexArr[k]) === 'dimensions') {
|
|
402
|
+
const d = parseInt(String(indexArr[k + 1]), 10) || 0;
|
|
403
|
+
if (d > 0)
|
|
404
|
+
dim = d;
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
if (isVector && dim > 0)
|
|
410
|
+
return dim;
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
return 0;
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
exports.SemanticCache = SemanticCache;
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import type { LanguageModelMiddleware } from 'ai';
|
|
2
|
+
import { SemanticCache } from '../SemanticCache';
|
|
3
|
+
export interface SemanticCacheMiddlewareOptions {
|
|
4
|
+
/** A pre-configured SemanticCache instance. */
|
|
5
|
+
cache: SemanticCache;
|
|
6
|
+
/**
|
|
7
|
+
* Extract the prompt text from AI SDK messages.
|
|
8
|
+
* Default: joins all user message content text parts.
|
|
9
|
+
*/
|
|
10
|
+
extractPrompt?: (params: {
|
|
11
|
+
prompt: Array<{
|
|
12
|
+
role: string;
|
|
13
|
+
content: unknown;
|
|
14
|
+
}>;
|
|
15
|
+
}) => string;
|
|
16
|
+
/**
|
|
17
|
+
* Extract the response text from an AI SDK result.
|
|
18
|
+
* Default: finds the first text content part.
|
|
19
|
+
*/
|
|
20
|
+
extractResponse?: (result: {
|
|
21
|
+
content: Array<{
|
|
22
|
+
type: string;
|
|
23
|
+
text?: string;
|
|
24
|
+
}>;
|
|
25
|
+
}) => string;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Creates a LanguageModelMiddleware that adds semantic caching to any
|
|
29
|
+
* AI SDK language model. Use with wrapLanguageModel() from the 'ai' package.
|
|
30
|
+
*
|
|
31
|
+
* @example
|
|
32
|
+
* ```typescript
|
|
33
|
+
* import { wrapLanguageModel } from 'ai';
|
|
34
|
+
* import { openai } from '@ai-sdk/openai';
|
|
35
|
+
* import { createSemanticCacheMiddleware } from '@betterdb/semantic-cache/ai';
|
|
36
|
+
*
|
|
37
|
+
* const model = wrapLanguageModel({
|
|
38
|
+
* model: openai('gpt-4o'),
|
|
39
|
+
* middleware: createSemanticCacheMiddleware({ cache }),
|
|
40
|
+
* });
|
|
41
|
+
* ```
|
|
42
|
+
*/
|
|
43
|
+
export declare function createSemanticCacheMiddleware(opts: SemanticCacheMiddlewareOptions): LanguageModelMiddleware;
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createSemanticCacheMiddleware = createSemanticCacheMiddleware;
|
|
4
|
+
function defaultExtractPrompt(params) {
|
|
5
|
+
const parts = [];
|
|
6
|
+
for (const msg of params.prompt) {
|
|
7
|
+
if (msg.role === 'user' && Array.isArray(msg.content)) {
|
|
8
|
+
for (const part of msg.content) {
|
|
9
|
+
if (typeof part === 'object' &&
|
|
10
|
+
part !== null &&
|
|
11
|
+
'type' in part &&
|
|
12
|
+
part.type === 'text' &&
|
|
13
|
+
'text' in part) {
|
|
14
|
+
parts.push(part.text);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
return parts.join('\n');
|
|
20
|
+
}
|
|
21
|
+
function defaultExtractResponse(result) {
|
|
22
|
+
for (const part of result.content ?? []) {
|
|
23
|
+
if (part.type === 'text' && part.text) {
|
|
24
|
+
return part.text;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return '';
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Creates a LanguageModelMiddleware that adds semantic caching to any
|
|
31
|
+
* AI SDK language model. Use with wrapLanguageModel() from the 'ai' package.
|
|
32
|
+
*
|
|
33
|
+
* @example
|
|
34
|
+
* ```typescript
|
|
35
|
+
* import { wrapLanguageModel } from 'ai';
|
|
36
|
+
* import { openai } from '@ai-sdk/openai';
|
|
37
|
+
* import { createSemanticCacheMiddleware } from '@betterdb/semantic-cache/ai';
|
|
38
|
+
*
|
|
39
|
+
* const model = wrapLanguageModel({
|
|
40
|
+
* model: openai('gpt-4o'),
|
|
41
|
+
* middleware: createSemanticCacheMiddleware({ cache }),
|
|
42
|
+
* });
|
|
43
|
+
* ```
|
|
44
|
+
*/
|
|
45
|
+
function createSemanticCacheMiddleware(opts) {
|
|
46
|
+
const { cache } = opts;
|
|
47
|
+
const extractPrompt = opts.extractPrompt ?? defaultExtractPrompt;
|
|
48
|
+
const extractResponse = opts.extractResponse ?? defaultExtractResponse;
|
|
49
|
+
let initPromise = null;
|
|
50
|
+
async function ensureInitialized() {
|
|
51
|
+
if (!initPromise) {
|
|
52
|
+
initPromise = cache.initialize().catch((err) => {
|
|
53
|
+
initPromise = null; // allow retry on transient failure
|
|
54
|
+
throw err;
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
await initPromise;
|
|
58
|
+
}
|
|
59
|
+
return {
|
|
60
|
+
specificationVersion: 'v3',
|
|
61
|
+
wrapGenerate: async ({ doGenerate, params }) => {
|
|
62
|
+
await ensureInitialized();
|
|
63
|
+
const prompt = extractPrompt(params);
|
|
64
|
+
if (prompt) {
|
|
65
|
+
try {
|
|
66
|
+
const cached = await cache.check(prompt);
|
|
67
|
+
if (cached.hit && cached.response) {
|
|
68
|
+
// Return a minimal generate result. Cast required because
|
|
69
|
+
// LanguageModelV3GenerateResult is imported transitively via the
|
|
70
|
+
// LanguageModelMiddleware type — we construct it inline to avoid
|
|
71
|
+
// depending on @ai-sdk/provider directly.
|
|
72
|
+
return {
|
|
73
|
+
content: [{ type: 'text', text: cached.response }],
|
|
74
|
+
finishReason: 'stop',
|
|
75
|
+
usage: { promptTokens: 0, completionTokens: 0 },
|
|
76
|
+
warnings: [],
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
catch {
|
|
81
|
+
// Swallow check errors — caching should not break inference
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
const result = await doGenerate();
|
|
85
|
+
if (prompt) {
|
|
86
|
+
const response = extractResponse(result);
|
|
87
|
+
if (response) {
|
|
88
|
+
await cache.store(prompt, response).catch(() => {
|
|
89
|
+
// Swallow store errors — caching should not break inference
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return result;
|
|
94
|
+
},
|
|
95
|
+
// wrapStream is intentionally not implemented — semantic caching of
|
|
96
|
+
// streaming responses is not supported in v0.1
|
|
97
|
+
};
|
|
98
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { BaseCache } from '@langchain/core/caches';
|
|
2
|
+
import type { Generation } from '@langchain/core/outputs';
|
|
3
|
+
import { SemanticCache } from '../SemanticCache';
|
|
4
|
+
export interface BetterDBSemanticCacheOptions {
|
|
5
|
+
/** A pre-configured SemanticCache instance. */
|
|
6
|
+
cache: SemanticCache;
|
|
7
|
+
/**
|
|
8
|
+
* When true, cache lookups and stores are scoped to the specific LLM
|
|
9
|
+
* configuration (model, temperature, etc.). This prevents cross-model
|
|
10
|
+
* cache pollution but reduces hit rates — a prompt cached against gpt-4o
|
|
11
|
+
* will not hit against gpt-4o-mini even if the responses would be identical.
|
|
12
|
+
*
|
|
13
|
+
* The llm_string is hashed (SHA-256, first 16 hex chars) for use as a
|
|
14
|
+
* Valkey TAG field. The hash is deterministic: same LLM config = same hash.
|
|
15
|
+
*
|
|
16
|
+
* Default: false.
|
|
17
|
+
*/
|
|
18
|
+
filterByModel?: boolean;
|
|
19
|
+
}
|
|
20
|
+
export declare class BetterDBSemanticCache extends BaseCache {
|
|
21
|
+
private cache;
|
|
22
|
+
private filterByModel;
|
|
23
|
+
private initPromise;
|
|
24
|
+
constructor(opts: BetterDBSemanticCacheOptions);
|
|
25
|
+
private ensureInitialized;
|
|
26
|
+
private modelHash;
|
|
27
|
+
lookup(prompt: string, llm_string: string): Promise<Generation[] | null>;
|
|
28
|
+
update(prompt: string, llm_string: string, return_val: Generation[]): Promise<void>;
|
|
29
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.BetterDBSemanticCache = void 0;
|
|
4
|
+
const caches_1 = require("@langchain/core/caches");
|
|
5
|
+
const utils_1 = require("../utils");
|
|
6
|
+
class BetterDBSemanticCache extends caches_1.BaseCache {
|
|
7
|
+
cache;
|
|
8
|
+
filterByModel;
|
|
9
|
+
initPromise = null;
|
|
10
|
+
constructor(opts) {
|
|
11
|
+
super();
|
|
12
|
+
this.cache = opts.cache;
|
|
13
|
+
this.filterByModel = opts.filterByModel ?? false;
|
|
14
|
+
}
|
|
15
|
+
async ensureInitialized() {
|
|
16
|
+
if (!this.initPromise) {
|
|
17
|
+
this.initPromise = this.cache.initialize().catch((err) => {
|
|
18
|
+
this.initPromise = null; // allow retry on transient failure
|
|
19
|
+
throw err;
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
await this.initPromise;
|
|
23
|
+
}
|
|
24
|
+
modelHash(llm_string) {
|
|
25
|
+
// llm_string is a serialised LangChain LLM config — not human-readable.
|
|
26
|
+
// Hash it to a stable, TAG-safe identifier.
|
|
27
|
+
return (0, utils_1.sha256)(llm_string).slice(0, 16);
|
|
28
|
+
}
|
|
29
|
+
async lookup(prompt, llm_string) {
|
|
30
|
+
await this.ensureInitialized();
|
|
31
|
+
const opts = {};
|
|
32
|
+
if (this.filterByModel) {
|
|
33
|
+
opts.filter = `@model:{${this.modelHash(llm_string)}}`;
|
|
34
|
+
}
|
|
35
|
+
const result = await this.cache.check(prompt, opts);
|
|
36
|
+
if (!result.hit || !result.response)
|
|
37
|
+
return null;
|
|
38
|
+
return [{ text: result.response }];
|
|
39
|
+
}
|
|
40
|
+
async update(prompt, llm_string, return_val) {
|
|
41
|
+
await this.ensureInitialized();
|
|
42
|
+
const text = return_val.map((g) => g.text).join('');
|
|
43
|
+
if (!text)
|
|
44
|
+
return;
|
|
45
|
+
await this.cache.store(prompt, text, {
|
|
46
|
+
model: this.modelHash(llm_string),
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
exports.BetterDBSemanticCache = BetterDBSemanticCache;
|
package/dist/errors.d.ts
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Thrown when the caller does something wrong — e.g. calling check()
|
|
3
|
+
* before initialize(), or providing an embedding with the wrong dimension.
|
|
4
|
+
* The message is always actionable: it tells the caller what to fix.
|
|
5
|
+
*/
|
|
6
|
+
export declare class SemanticCacheUsageError extends Error {
|
|
7
|
+
constructor(message: string);
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Thrown when the embedding function fails.
|
|
11
|
+
* Check the underlying cause for the original error from the embedding provider.
|
|
12
|
+
*/
|
|
13
|
+
export declare class EmbeddingError extends Error {
|
|
14
|
+
readonly cause: unknown;
|
|
15
|
+
constructor(message: string, cause: unknown);
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Thrown when a Valkey command fails unexpectedly.
|
|
19
|
+
* Includes the command name and the underlying error.
|
|
20
|
+
*/
|
|
21
|
+
export declare class ValkeyCommandError extends Error {
|
|
22
|
+
readonly command: string;
|
|
23
|
+
readonly cause: unknown;
|
|
24
|
+
constructor(command: string, cause: unknown);
|
|
25
|
+
}
|