@lov3kaizen/agentsea-cache 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +323 -0
- package/dist/BaseMatchStrategy-1E1SHaUt.d.ts +60 -0
- package/dist/SemanticCache-vysguwUQ.d.ts +65 -0
- package/dist/SimilarityEngine-Cwv_mF9a.d.ts +41 -0
- package/dist/analytics/index.d.ts +123 -0
- package/dist/analytics/index.js +275 -0
- package/dist/analytics/index.js.map +1 -0
- package/dist/cache.types-DMuyQseO.d.ts +99 -0
- package/dist/index.d.ts +47 -0
- package/dist/index.js +3301 -0
- package/dist/index.js.map +1 -0
- package/dist/integrations/agentsea/index.d.ts +103 -0
- package/dist/integrations/agentsea/index.js +201 -0
- package/dist/integrations/agentsea/index.js.map +1 -0
- package/dist/integrations/gateway/index.d.ts +98 -0
- package/dist/integrations/gateway/index.js +205 -0
- package/dist/integrations/gateway/index.js.map +1 -0
- package/dist/invalidation/index.d.ts +113 -0
- package/dist/invalidation/index.js +360 -0
- package/dist/invalidation/index.js.map +1 -0
- package/dist/store.types-BQy5Yyz9.d.ts +111 -0
- package/dist/stores/index.d.ts +138 -0
- package/dist/stores/index.js +1147 -0
- package/dist/stores/index.js.map +1 -0
- package/dist/strategies/index.d.ts +36 -0
- package/dist/strategies/index.js +280 -0
- package/dist/strategies/index.js.map +1 -0
- package/dist/streaming/index.d.ts +206 -0
- package/dist/streaming/index.js +794 -0
- package/dist/streaming/index.js.map +1 -0
- package/package.json +108 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,3301 @@
|
|
|
1
|
+
import EventEmitter2, { EventEmitter } from 'eventemitter3';
|
|
2
|
+
import { nanoid } from 'nanoid';
|
|
3
|
+
import murmurhash from 'murmurhash';
|
|
4
|
+
import { LRUCache } from 'lru-cache';
|
|
5
|
+
|
|
6
|
+
// src/core/SemanticCache.ts
|
|
7
|
+
function generateId(prefix) {
|
|
8
|
+
const id = nanoid(16);
|
|
9
|
+
return prefix ? `${prefix}_${id}` : id;
|
|
10
|
+
}
|
|
11
|
+
function now() {
|
|
12
|
+
return Date.now();
|
|
13
|
+
}
|
|
14
|
+
function isExpired(createdAt, ttlSeconds) {
|
|
15
|
+
if (ttlSeconds <= 0) return false;
|
|
16
|
+
return now() > createdAt + ttlSeconds * 1e3;
|
|
17
|
+
}
|
|
18
|
+
function estimateEntrySize(entry) {
|
|
19
|
+
const vectorSize = (entry.embedding?.length ?? 0) * 4;
|
|
20
|
+
const messageSize = entry.request.messages.reduce(
|
|
21
|
+
(acc, m) => acc + (m.content?.length ?? 0) * 2,
|
|
22
|
+
0
|
|
23
|
+
);
|
|
24
|
+
const responseSize = (entry.response.content?.length ?? 0) * 2;
|
|
25
|
+
const overheadSize = 500;
|
|
26
|
+
return vectorSize + messageSize + responseSize + overheadSize;
|
|
27
|
+
}
|
|
28
|
+
function percentile(values, p) {
|
|
29
|
+
if (values.length === 0) return 0;
|
|
30
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
31
|
+
const index = Math.ceil(p / 100 * sorted.length) - 1;
|
|
32
|
+
return sorted[Math.max(0, index)];
|
|
33
|
+
}
|
|
34
|
+
function mean(values) {
|
|
35
|
+
if (values.length === 0) return 0;
|
|
36
|
+
return values.reduce((a, b) => a + b, 0) / values.length;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// src/analytics/CacheAnalytics.ts
|
|
40
|
+
var DEFAULT_MODEL_PRICING = {
|
|
41
|
+
"gpt-4o": { inputPer1K: 25e-4, outputPer1K: 0.01 },
|
|
42
|
+
"gpt-4o-mini": { inputPer1K: 15e-5, outputPer1K: 6e-4 },
|
|
43
|
+
"gpt-4-turbo": { inputPer1K: 0.01, outputPer1K: 0.03 },
|
|
44
|
+
"gpt-4": { inputPer1K: 0.03, outputPer1K: 0.06 },
|
|
45
|
+
"gpt-3.5-turbo": { inputPer1K: 5e-4, outputPer1K: 15e-4 },
|
|
46
|
+
"claude-3-opus": { inputPer1K: 0.015, outputPer1K: 0.075 },
|
|
47
|
+
"claude-sonnet-4-20250514": { inputPer1K: 3e-3, outputPer1K: 0.015 },
|
|
48
|
+
"claude-3-5-sonnet": { inputPer1K: 3e-3, outputPer1K: 0.015 },
|
|
49
|
+
"claude-3-haiku": { inputPer1K: 25e-5, outputPer1K: 125e-5 },
|
|
50
|
+
default: { inputPer1K: 5e-3, outputPer1K: 0.015 }
|
|
51
|
+
};
|
|
52
|
+
var DEFAULT_CONFIG = {
|
|
53
|
+
enabled: true,
|
|
54
|
+
sampleRate: 1,
|
|
55
|
+
retentionSeconds: 86400 * 7,
|
|
56
|
+
// 7 days
|
|
57
|
+
flushIntervalMs: 6e4,
|
|
58
|
+
// 1 minute
|
|
59
|
+
modelPricing: DEFAULT_MODEL_PRICING
|
|
60
|
+
};
|
|
61
|
+
var CacheAnalytics = class {
|
|
62
|
+
config;
|
|
63
|
+
hits = 0;
|
|
64
|
+
misses = 0;
|
|
65
|
+
exactHits = 0;
|
|
66
|
+
semanticHits = 0;
|
|
67
|
+
tokensSaved = 0;
|
|
68
|
+
inputTokensSaved = 0;
|
|
69
|
+
outputTokensSaved = 0;
|
|
70
|
+
latencies = [];
|
|
71
|
+
modelHits = /* @__PURE__ */ new Map();
|
|
72
|
+
namespaceHits = /* @__PURE__ */ new Map();
|
|
73
|
+
hitEvents = [];
|
|
74
|
+
missEvents = [];
|
|
75
|
+
setCount = 0;
|
|
76
|
+
constructor(config) {
|
|
77
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Record a cache hit
|
|
81
|
+
*/
|
|
82
|
+
recordHit(entry, type, latencyMs) {
|
|
83
|
+
if (!this.config.enabled) return;
|
|
84
|
+
if (Math.random() > this.config.sampleRate) return;
|
|
85
|
+
this.hits++;
|
|
86
|
+
if (type === "exact") {
|
|
87
|
+
this.exactHits++;
|
|
88
|
+
} else {
|
|
89
|
+
this.semanticHits++;
|
|
90
|
+
}
|
|
91
|
+
this.latencies.push(latencyMs);
|
|
92
|
+
const usage = entry.response.usage;
|
|
93
|
+
this.tokensSaved += usage.totalTokens;
|
|
94
|
+
this.inputTokensSaved += usage.promptTokens;
|
|
95
|
+
this.outputTokensSaved += usage.completionTokens;
|
|
96
|
+
const modelCount = this.modelHits.get(entry.request.model) ?? 0;
|
|
97
|
+
this.modelHits.set(entry.request.model, modelCount + 1);
|
|
98
|
+
if (entry.metadata.namespace) {
|
|
99
|
+
const nsCount = this.namespaceHits.get(entry.metadata.namespace) ?? 0;
|
|
100
|
+
this.namespaceHits.set(entry.metadata.namespace, nsCount + 1);
|
|
101
|
+
}
|
|
102
|
+
this.hitEvents.push({
|
|
103
|
+
timestamp: Date.now(),
|
|
104
|
+
type,
|
|
105
|
+
model: entry.request.model,
|
|
106
|
+
namespace: entry.metadata.namespace,
|
|
107
|
+
similarity: entry.metadata.similarity,
|
|
108
|
+
latencyMs,
|
|
109
|
+
tokensSaved: usage.totalTokens
|
|
110
|
+
});
|
|
111
|
+
this.trimEvents();
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Record a cache miss
|
|
115
|
+
*/
|
|
116
|
+
recordMiss(latencyMs, reason = "not_found") {
|
|
117
|
+
if (!this.config.enabled) return;
|
|
118
|
+
if (Math.random() > this.config.sampleRate) return;
|
|
119
|
+
this.misses++;
|
|
120
|
+
this.latencies.push(latencyMs);
|
|
121
|
+
this.missEvents.push({
|
|
122
|
+
timestamp: Date.now(),
|
|
123
|
+
model: "unknown",
|
|
124
|
+
latencyMs,
|
|
125
|
+
reason
|
|
126
|
+
});
|
|
127
|
+
this.trimEvents();
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Record a cache set operation
|
|
131
|
+
*/
|
|
132
|
+
recordSet(_entry) {
|
|
133
|
+
if (!this.config.enabled) return;
|
|
134
|
+
this.setCount++;
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Get analytics summary
|
|
138
|
+
*/
|
|
139
|
+
getSummary() {
|
|
140
|
+
const total = this.hits + this.misses;
|
|
141
|
+
return {
|
|
142
|
+
totalHits: this.hits,
|
|
143
|
+
totalMisses: this.misses,
|
|
144
|
+
exactHits: this.exactHits,
|
|
145
|
+
semanticHits: this.semanticHits,
|
|
146
|
+
hitRate: total > 0 ? this.hits / total : 0,
|
|
147
|
+
avgLatencyMs: mean(this.latencies),
|
|
148
|
+
p50LatencyMs: percentile(this.latencies, 50),
|
|
149
|
+
p95LatencyMs: percentile(this.latencies, 95),
|
|
150
|
+
p99LatencyMs: percentile(this.latencies, 99),
|
|
151
|
+
totalTokensSaved: this.tokensSaved,
|
|
152
|
+
estimatedCostSavingsUSD: this.calculateCostSavings(),
|
|
153
|
+
topModels: this.getTopModels(5),
|
|
154
|
+
topNamespaces: this.getTopNamespaces(5),
|
|
155
|
+
hourlyStats: this.getHourlyStats()
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Get cost savings report
|
|
160
|
+
*/
|
|
161
|
+
getCostSavingsReport(periodLabel = "all-time") {
|
|
162
|
+
const total = this.hits + this.misses;
|
|
163
|
+
const costSaved = this.calculateCostSavings();
|
|
164
|
+
const avgCostPerRequest = costSaved / (this.hits || 1);
|
|
165
|
+
const estimatedCostWithoutCache = avgCostPerRequest * total;
|
|
166
|
+
return {
|
|
167
|
+
period: periodLabel,
|
|
168
|
+
totalRequests: total,
|
|
169
|
+
cachedRequests: this.hits,
|
|
170
|
+
hitRate: total > 0 ? this.hits / total : 0,
|
|
171
|
+
inputTokensSaved: this.inputTokensSaved,
|
|
172
|
+
outputTokensSaved: this.outputTokensSaved,
|
|
173
|
+
totalTokensSaved: this.tokensSaved,
|
|
174
|
+
estimatedCostWithoutCache,
|
|
175
|
+
actualCostWithCache: estimatedCostWithoutCache - costSaved,
|
|
176
|
+
costSaved,
|
|
177
|
+
reductionPercent: estimatedCostWithoutCache > 0 ? costSaved / estimatedCostWithoutCache * 100 : 0
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Get performance metrics
|
|
182
|
+
*/
|
|
183
|
+
getPerformanceMetrics() {
|
|
184
|
+
const total = this.hits + this.misses;
|
|
185
|
+
return {
|
|
186
|
+
avgLookupMs: mean(this.latencies),
|
|
187
|
+
avgEmbeddingMs: 0,
|
|
188
|
+
// Would need to track separately
|
|
189
|
+
avgStoreReadMs: mean(this.latencies),
|
|
190
|
+
avgStoreWriteMs: 0,
|
|
191
|
+
// Would need to track separately
|
|
192
|
+
p50LatencyMs: percentile(this.latencies, 50),
|
|
193
|
+
p95LatencyMs: percentile(this.latencies, 95),
|
|
194
|
+
p99LatencyMs: percentile(this.latencies, 99),
|
|
195
|
+
totalOperations: total + this.setCount,
|
|
196
|
+
failedOperations: 0,
|
|
197
|
+
// Would need to track separately
|
|
198
|
+
errorRate: 0
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Reset all analytics
|
|
203
|
+
*/
|
|
204
|
+
reset() {
|
|
205
|
+
this.hits = 0;
|
|
206
|
+
this.misses = 0;
|
|
207
|
+
this.exactHits = 0;
|
|
208
|
+
this.semanticHits = 0;
|
|
209
|
+
this.tokensSaved = 0;
|
|
210
|
+
this.inputTokensSaved = 0;
|
|
211
|
+
this.outputTokensSaved = 0;
|
|
212
|
+
this.latencies = [];
|
|
213
|
+
this.modelHits.clear();
|
|
214
|
+
this.namespaceHits.clear();
|
|
215
|
+
this.hitEvents = [];
|
|
216
|
+
this.missEvents = [];
|
|
217
|
+
this.setCount = 0;
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Export analytics data
|
|
221
|
+
*/
|
|
222
|
+
export(format = "json") {
|
|
223
|
+
const data = this.getSummary();
|
|
224
|
+
if (format === "csv") {
|
|
225
|
+
const headers = Object.keys(data).filter(
|
|
226
|
+
(k) => typeof data[k] !== "object"
|
|
227
|
+
);
|
|
228
|
+
const values = headers.map((h) => data[h]);
|
|
229
|
+
return `${headers.join(",")}
|
|
230
|
+
${values.join(",")}`;
|
|
231
|
+
}
|
|
232
|
+
return JSON.stringify(data, null, 2);
|
|
233
|
+
}
|
|
234
|
+
calculateCostSavings() {
|
|
235
|
+
let savings = 0;
|
|
236
|
+
for (const [model, hits] of this.modelHits) {
|
|
237
|
+
const pricing = this.config.modelPricing[model] ?? this.config.modelPricing["default"];
|
|
238
|
+
const avgInputTokens = this.inputTokensSaved / (this.hits || 1);
|
|
239
|
+
const avgOutputTokens = this.outputTokensSaved / (this.hits || 1);
|
|
240
|
+
const inputCost = avgInputTokens / 1e3 * pricing.inputPer1K * hits;
|
|
241
|
+
const outputCost = avgOutputTokens / 1e3 * pricing.outputPer1K * hits;
|
|
242
|
+
savings += inputCost + outputCost;
|
|
243
|
+
}
|
|
244
|
+
if (savings === 0 && this.tokensSaved > 0) {
|
|
245
|
+
const defaultPricing = this.config.modelPricing["default"];
|
|
246
|
+
savings = this.inputTokensSaved / 1e3 * defaultPricing.inputPer1K + this.outputTokensSaved / 1e3 * defaultPricing.outputPer1K;
|
|
247
|
+
}
|
|
248
|
+
return savings;
|
|
249
|
+
}
|
|
250
|
+
getTopModels(n) {
|
|
251
|
+
return Array.from(this.modelHits.entries()).map(([model, hits]) => ({ model, hits })).sort((a, b) => b.hits - a.hits).slice(0, n);
|
|
252
|
+
}
|
|
253
|
+
getTopNamespaces(n) {
|
|
254
|
+
return Array.from(this.namespaceHits.entries()).map(([namespace, hits]) => ({ namespace, hits })).sort((a, b) => b.hits - a.hits).slice(0, n);
|
|
255
|
+
}
|
|
256
|
+
getHourlyStats() {
|
|
257
|
+
const hourlyData = /* @__PURE__ */ new Map();
|
|
258
|
+
for (const event of this.hitEvents) {
|
|
259
|
+
const hour = new Date(event.timestamp).getHours();
|
|
260
|
+
const data = hourlyData.get(hour) ?? {
|
|
261
|
+
hits: 0,
|
|
262
|
+
misses: 0,
|
|
263
|
+
latencies: []
|
|
264
|
+
};
|
|
265
|
+
data.hits++;
|
|
266
|
+
data.latencies.push(event.latencyMs);
|
|
267
|
+
hourlyData.set(hour, data);
|
|
268
|
+
}
|
|
269
|
+
for (const event of this.missEvents) {
|
|
270
|
+
const hour = new Date(event.timestamp).getHours();
|
|
271
|
+
const data = hourlyData.get(hour) ?? {
|
|
272
|
+
hits: 0,
|
|
273
|
+
misses: 0,
|
|
274
|
+
latencies: []
|
|
275
|
+
};
|
|
276
|
+
data.misses++;
|
|
277
|
+
data.latencies.push(event.latencyMs);
|
|
278
|
+
hourlyData.set(hour, data);
|
|
279
|
+
}
|
|
280
|
+
return Array.from(hourlyData.entries()).map(([hour, data]) => ({
|
|
281
|
+
hour,
|
|
282
|
+
hits: data.hits,
|
|
283
|
+
misses: data.misses,
|
|
284
|
+
avgLatencyMs: mean(data.latencies)
|
|
285
|
+
})).sort((a, b) => a.hour - b.hour);
|
|
286
|
+
}
|
|
287
|
+
trimEvents() {
|
|
288
|
+
const cutoff = Date.now() - this.config.retentionSeconds * 1e3;
|
|
289
|
+
this.hitEvents = this.hitEvents.filter((e) => e.timestamp > cutoff);
|
|
290
|
+
this.missEvents = this.missEvents.filter((e) => e.timestamp > cutoff);
|
|
291
|
+
}
|
|
292
|
+
};
|
|
293
|
+
function createCacheAnalytics(config) {
|
|
294
|
+
return new CacheAnalytics(config);
|
|
295
|
+
}
|
|
296
|
+
var DEFAULT_KEY_OPTIONS = {
|
|
297
|
+
includeTemperature: false,
|
|
298
|
+
includeTools: false,
|
|
299
|
+
normalizeWhitespace: true,
|
|
300
|
+
extractUserMessage: false
|
|
301
|
+
};
|
|
302
|
+
function generateCacheKey(model, messages, options = {}) {
|
|
303
|
+
const opts = { ...DEFAULT_KEY_OPTIONS, ...options };
|
|
304
|
+
const normalized = normalizeRequest(model, messages, opts);
|
|
305
|
+
const hash = murmurhash.v3(JSON.stringify(normalized)).toString(16);
|
|
306
|
+
return `cache:${model}:${hash}`;
|
|
307
|
+
}
|
|
308
|
+
function normalizeRequest(model, messages, options = {}) {
|
|
309
|
+
const normalizedMessages = messages.map((m) => ({
|
|
310
|
+
role: m.role,
|
|
311
|
+
content: options.normalizeWhitespace ? normalizeWhitespace(m.content) : m.content
|
|
312
|
+
}));
|
|
313
|
+
return {
|
|
314
|
+
model,
|
|
315
|
+
messages: options.extractUserMessage ? extractUserMessage(normalizedMessages) : normalizedMessages
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
function normalizeWhitespace(text) {
|
|
319
|
+
return text.trim().replace(/\r\n/g, "\n").replace(/\s+/g, " ");
|
|
320
|
+
}
|
|
321
|
+
function extractUserMessage(messages) {
|
|
322
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
323
|
+
if (messages[i].role === "user") {
|
|
324
|
+
return messages[i].content;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
return "";
|
|
328
|
+
}
|
|
329
|
+
function extractSystemPrompt(messages) {
|
|
330
|
+
const systemMessage = messages.find((m) => m.role === "system");
|
|
331
|
+
return systemMessage?.content;
|
|
332
|
+
}
|
|
333
|
+
function generateSemanticKey(model, messages) {
|
|
334
|
+
const userMessage = extractUserMessage(messages);
|
|
335
|
+
const normalized = normalizeWhitespace(userMessage);
|
|
336
|
+
return `${model}:${normalized}`;
|
|
337
|
+
}
|
|
338
|
+
function generateConversationFingerprint(messages) {
|
|
339
|
+
const pattern = messages.map((m) => `${m.role}:${m.content.length}`).join("|");
|
|
340
|
+
return murmurhash.v3(pattern).toString(16);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// src/core/SemanticCache.ts
|
|
344
|
+
var DEFAULT_CONFIG2 = {
|
|
345
|
+
defaultTTL: 3600,
|
|
346
|
+
similarityThreshold: 0.92,
|
|
347
|
+
maxEntries: 1e4,
|
|
348
|
+
maxSizeBytes: 1024 * 1024 * 1024,
|
|
349
|
+
// 1GB
|
|
350
|
+
keyPrefix: "llm-cache",
|
|
351
|
+
matchStrategy: "hybrid",
|
|
352
|
+
analyticsEnabled: true,
|
|
353
|
+
namespace: "default",
|
|
354
|
+
cacheKeyFields: ["model", "messages"],
|
|
355
|
+
normalizeWhitespace: true
|
|
356
|
+
};
|
|
357
|
+
var SemanticCache = class extends EventEmitter {
|
|
358
|
+
config;
|
|
359
|
+
store;
|
|
360
|
+
strategy;
|
|
361
|
+
similarity;
|
|
362
|
+
analytics;
|
|
363
|
+
stats;
|
|
364
|
+
constructor(config, store, strategy, similarity) {
|
|
365
|
+
super();
|
|
366
|
+
this.config = { ...DEFAULT_CONFIG2, ...config };
|
|
367
|
+
this.store = store;
|
|
368
|
+
this.strategy = strategy;
|
|
369
|
+
this.similarity = similarity;
|
|
370
|
+
this.analytics = new CacheAnalytics({
|
|
371
|
+
enabled: this.config.analyticsEnabled
|
|
372
|
+
});
|
|
373
|
+
this.stats = this.createInitialStats();
|
|
374
|
+
}
|
|
375
|
+
createInitialStats() {
|
|
376
|
+
return {
|
|
377
|
+
entries: 0,
|
|
378
|
+
sizeBytes: 0,
|
|
379
|
+
hits: 0,
|
|
380
|
+
misses: 0,
|
|
381
|
+
hitRate: 0,
|
|
382
|
+
exactHits: 0,
|
|
383
|
+
semanticHits: 0,
|
|
384
|
+
avgSimilarity: 0,
|
|
385
|
+
avgLatencyMs: 0,
|
|
386
|
+
costSavingsUSD: 0,
|
|
387
|
+
tokensSaved: 0
|
|
388
|
+
};
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Wrap an LLM call with caching
|
|
392
|
+
*
|
|
393
|
+
* @param request - The LLM request
|
|
394
|
+
* @param fn - Function to call on cache miss
|
|
395
|
+
* @param options - Cache options
|
|
396
|
+
* @returns The response (cached or fresh)
|
|
397
|
+
*/
|
|
398
|
+
async wrap(request, fn, options) {
|
|
399
|
+
const startTime = performance.now();
|
|
400
|
+
if (options?.skipCache) {
|
|
401
|
+
const response = await fn(request);
|
|
402
|
+
return { ...response, _cache: { hit: false } };
|
|
403
|
+
}
|
|
404
|
+
const lookupResult = await this.get(request, options);
|
|
405
|
+
if (lookupResult.hit && lookupResult.entry && !options?.forceRefresh) {
|
|
406
|
+
this.emit("hit", lookupResult.entry, lookupResult.similarity ?? 1);
|
|
407
|
+
this.stats.hits++;
|
|
408
|
+
this.updateStats("hit", lookupResult);
|
|
409
|
+
if (this.config.analyticsEnabled) {
|
|
410
|
+
this.analytics.recordHit(
|
|
411
|
+
lookupResult.entry,
|
|
412
|
+
lookupResult.source === "exact" ? "exact" : "semantic",
|
|
413
|
+
lookupResult.latencyMs
|
|
414
|
+
);
|
|
415
|
+
}
|
|
416
|
+
return {
|
|
417
|
+
...lookupResult.entry.response,
|
|
418
|
+
_cache: {
|
|
419
|
+
hit: true,
|
|
420
|
+
similarity: lookupResult.similarity
|
|
421
|
+
}
|
|
422
|
+
};
|
|
423
|
+
}
|
|
424
|
+
const key = generateCacheKey(request.model, request.messages, {
|
|
425
|
+
normalizeWhitespace: this.config.normalizeWhitespace
|
|
426
|
+
});
|
|
427
|
+
this.emit("miss", key, lookupResult.source);
|
|
428
|
+
this.stats.misses++;
|
|
429
|
+
if (this.config.analyticsEnabled) {
|
|
430
|
+
this.analytics.recordMiss(performance.now() - startTime);
|
|
431
|
+
}
|
|
432
|
+
try {
|
|
433
|
+
const response = await fn(request);
|
|
434
|
+
await this.set(request, response, options);
|
|
435
|
+
return { ...response, _cache: { hit: false } };
|
|
436
|
+
} catch (error) {
|
|
437
|
+
this.emit("error", error, "wrap");
|
|
438
|
+
throw error;
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
/**
|
|
442
|
+
* Get an entry from cache
|
|
443
|
+
*
|
|
444
|
+
* @param request - The request to look up
|
|
445
|
+
* @param options - Lookup options
|
|
446
|
+
* @returns The lookup result
|
|
447
|
+
*/
|
|
448
|
+
async get(request, options) {
|
|
449
|
+
const startTime = performance.now();
|
|
450
|
+
try {
|
|
451
|
+
const result = await this.strategy.match(
|
|
452
|
+
{
|
|
453
|
+
model: request.model,
|
|
454
|
+
messages: request.messages,
|
|
455
|
+
temperature: request.temperature
|
|
456
|
+
},
|
|
457
|
+
this.store,
|
|
458
|
+
this.similarity,
|
|
459
|
+
{
|
|
460
|
+
threshold: this.config.similarityThreshold,
|
|
461
|
+
namespace: options?.namespace ?? this.config.namespace
|
|
462
|
+
}
|
|
463
|
+
);
|
|
464
|
+
if (result.hit && result.entry) {
|
|
465
|
+
if (isExpired(result.entry.metadata.createdAt, result.entry.metadata.ttl)) {
|
|
466
|
+
await this.store.delete(result.entry.key);
|
|
467
|
+
return {
|
|
468
|
+
hit: false,
|
|
469
|
+
latencyMs: performance.now() - startTime,
|
|
470
|
+
source: "miss"
|
|
471
|
+
};
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
return {
|
|
475
|
+
...result,
|
|
476
|
+
latencyMs: performance.now() - startTime
|
|
477
|
+
};
|
|
478
|
+
} catch (error) {
|
|
479
|
+
this.emit("error", error, "get");
|
|
480
|
+
return {
|
|
481
|
+
hit: false,
|
|
482
|
+
latencyMs: performance.now() - startTime,
|
|
483
|
+
source: "miss"
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
/**
|
|
488
|
+
* Set an entry in cache
|
|
489
|
+
*
|
|
490
|
+
* @param request - The request
|
|
491
|
+
* @param response - The response to cache
|
|
492
|
+
* @param options - Cache options
|
|
493
|
+
*/
|
|
494
|
+
async set(request, response, options) {
|
|
495
|
+
const namespace = options?.namespace ?? this.config.namespace;
|
|
496
|
+
const baseKey = generateCacheKey(request.model, request.messages, {
|
|
497
|
+
normalizeWhitespace: this.config.normalizeWhitespace
|
|
498
|
+
});
|
|
499
|
+
const tempSuffix = request.temperature !== void 0 ? `:t:${request.temperature}` : "";
|
|
500
|
+
const key = namespace && namespace !== "default" ? `${baseKey}${tempSuffix}:ns:${namespace}` : `${baseKey}${tempSuffix}`;
|
|
501
|
+
let embedding;
|
|
502
|
+
if (this.similarity && this.config.matchStrategy !== "exact") {
|
|
503
|
+
try {
|
|
504
|
+
const userMessage = extractUserMessage(request.messages);
|
|
505
|
+
if (userMessage) {
|
|
506
|
+
embedding = await this.similarity.embed(userMessage);
|
|
507
|
+
}
|
|
508
|
+
} catch (error) {
|
|
509
|
+
this.emit("error", error, "embedding");
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
const entry = {
|
|
513
|
+
id: generateId("entry"),
|
|
514
|
+
key,
|
|
515
|
+
embedding,
|
|
516
|
+
request: {
|
|
517
|
+
model: request.model,
|
|
518
|
+
messages: request.messages,
|
|
519
|
+
temperature: request.temperature,
|
|
520
|
+
maxTokens: request.maxTokens,
|
|
521
|
+
tools: request.tools
|
|
522
|
+
},
|
|
523
|
+
response: {
|
|
524
|
+
content: response.content,
|
|
525
|
+
model: response.model ?? request.model,
|
|
526
|
+
usage: response.usage ?? {
|
|
527
|
+
promptTokens: 0,
|
|
528
|
+
completionTokens: 0,
|
|
529
|
+
totalTokens: 0
|
|
530
|
+
},
|
|
531
|
+
finishReason: response.finishReason ?? "stop",
|
|
532
|
+
toolCalls: response.toolCalls
|
|
533
|
+
},
|
|
534
|
+
metadata: {
|
|
535
|
+
createdAt: now(),
|
|
536
|
+
accessedAt: now(),
|
|
537
|
+
accessCount: 0,
|
|
538
|
+
ttl: options?.ttl ?? this.config.defaultTTL,
|
|
539
|
+
hitCount: 0,
|
|
540
|
+
tags: options?.tags,
|
|
541
|
+
namespace,
|
|
542
|
+
userId: options?.userId,
|
|
543
|
+
agentId: options?.agentId
|
|
544
|
+
}
|
|
545
|
+
};
|
|
546
|
+
try {
|
|
547
|
+
await this.store.set(key, entry);
|
|
548
|
+
this.emit("set", entry);
|
|
549
|
+
this.stats.entries++;
|
|
550
|
+
if (this.config.analyticsEnabled) {
|
|
551
|
+
this.analytics.recordSet(entry);
|
|
552
|
+
}
|
|
553
|
+
} catch (error) {
|
|
554
|
+
this.emit("error", error, "set");
|
|
555
|
+
throw error;
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
/**
|
|
559
|
+
* Delete an entry from cache
|
|
560
|
+
*
|
|
561
|
+
* @param key - The cache key to delete
|
|
562
|
+
* @returns Whether the entry was deleted
|
|
563
|
+
*/
|
|
564
|
+
async delete(key) {
|
|
565
|
+
const deleted = await this.store.delete(key);
|
|
566
|
+
if (deleted) {
|
|
567
|
+
this.emit("delete", key);
|
|
568
|
+
this.stats.entries = Math.max(0, this.stats.entries - 1);
|
|
569
|
+
}
|
|
570
|
+
return deleted;
|
|
571
|
+
}
|
|
572
|
+
/**
|
|
573
|
+
* Clear all entries from cache
|
|
574
|
+
*/
|
|
575
|
+
async clear() {
|
|
576
|
+
await this.store.clear();
|
|
577
|
+
this.stats = this.createInitialStats();
|
|
578
|
+
this.analytics.reset();
|
|
579
|
+
}
|
|
580
|
+
/**
|
|
581
|
+
* Invalidate entries by pattern
|
|
582
|
+
*
|
|
583
|
+
* @param pattern - Regex pattern to match keys
|
|
584
|
+
* @returns Number of entries invalidated
|
|
585
|
+
*/
|
|
586
|
+
async invalidateByPattern(pattern) {
|
|
587
|
+
const keys = await this.store.keys();
|
|
588
|
+
let count = 0;
|
|
589
|
+
for (const key of keys) {
|
|
590
|
+
if (pattern.test(key)) {
|
|
591
|
+
await this.store.delete(key);
|
|
592
|
+
count++;
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
this.stats.entries = Math.max(0, this.stats.entries - count);
|
|
596
|
+
return count;
|
|
597
|
+
}
|
|
598
|
+
/**
|
|
599
|
+
* Invalidate entries by tags
|
|
600
|
+
*
|
|
601
|
+
* @param tags - Tags to match
|
|
602
|
+
* @returns Number of entries invalidated
|
|
603
|
+
*/
|
|
604
|
+
async invalidateByTags(tags) {
|
|
605
|
+
const keys = await this.store.keys();
|
|
606
|
+
let count = 0;
|
|
607
|
+
for (const key of keys) {
|
|
608
|
+
const entry = await this.store.get(key);
|
|
609
|
+
if (entry?.metadata.tags?.some((t) => tags.includes(t))) {
|
|
610
|
+
await this.store.delete(key);
|
|
611
|
+
count++;
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
this.stats.entries = Math.max(0, this.stats.entries - count);
|
|
615
|
+
return count;
|
|
616
|
+
}
|
|
617
|
+
/**
|
|
618
|
+
* Get cache statistics
|
|
619
|
+
*/
|
|
620
|
+
getStats() {
|
|
621
|
+
this.updateHitRate();
|
|
622
|
+
return { ...this.stats };
|
|
623
|
+
}
|
|
624
|
+
/**
|
|
625
|
+
* Get analytics instance
|
|
626
|
+
*/
|
|
627
|
+
getAnalytics() {
|
|
628
|
+
return this.analytics;
|
|
629
|
+
}
|
|
630
|
+
/**
|
|
631
|
+
* Get configuration
|
|
632
|
+
*/
|
|
633
|
+
getConfig() {
|
|
634
|
+
return { ...this.config };
|
|
635
|
+
}
|
|
636
|
+
/**
|
|
637
|
+
* Check store health
|
|
638
|
+
*/
|
|
639
|
+
async checkHealth() {
|
|
640
|
+
return this.store.checkHealth();
|
|
641
|
+
}
|
|
642
|
+
/**
|
|
643
|
+
* Close the cache and release resources
|
|
644
|
+
*/
|
|
645
|
+
async close() {
|
|
646
|
+
await this.store.close();
|
|
647
|
+
this.removeAllListeners();
|
|
648
|
+
}
|
|
649
|
+
updateStats(type, result) {
|
|
650
|
+
if (type === "hit" && result) {
|
|
651
|
+
if (result.source === "exact") {
|
|
652
|
+
this.stats.exactHits++;
|
|
653
|
+
} else if (result.source === "semantic") {
|
|
654
|
+
this.stats.semanticHits++;
|
|
655
|
+
if (result.similarity) {
|
|
656
|
+
const total = this.stats.exactHits + this.stats.semanticHits;
|
|
657
|
+
this.stats.avgSimilarity = (this.stats.avgSimilarity * (total - 1) + result.similarity) / total;
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
if (result.entry?.response.usage) {
|
|
661
|
+
this.stats.tokensSaved += result.entry.response.usage.totalTokens;
|
|
662
|
+
this.stats.costSavingsUSD += result.entry.response.usage.totalTokens / 1e3 * 0.01;
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
this.updateHitRate();
|
|
666
|
+
}
|
|
667
|
+
updateHitRate() {
|
|
668
|
+
const total = this.stats.hits + this.stats.misses;
|
|
669
|
+
this.stats.hitRate = total > 0 ? this.stats.hits / total : 0;
|
|
670
|
+
}
|
|
671
|
+
};
|
|
672
|
+
function createSemanticCache(config, store, strategy, similarity) {
|
|
673
|
+
return new SemanticCache(config, store, strategy, similarity);
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
// src/stores/BaseCacheStore.ts
|
|
677
|
+
var BaseCacheStore = class {
|
|
678
|
+
/** Store configuration */
|
|
679
|
+
config;
|
|
680
|
+
/** Store metrics */
|
|
681
|
+
metrics = {
|
|
682
|
+
gets: 0,
|
|
683
|
+
sets: 0,
|
|
684
|
+
deletes: 0,
|
|
685
|
+
hits: 0,
|
|
686
|
+
misses: 0
|
|
687
|
+
};
|
|
688
|
+
constructor(config) {
|
|
689
|
+
this.config = {
|
|
690
|
+
namespace: config.namespace ?? "default",
|
|
691
|
+
...config
|
|
692
|
+
};
|
|
693
|
+
}
|
|
694
|
+
/**
|
|
695
|
+
* Get the store namespace
|
|
696
|
+
*/
|
|
697
|
+
get namespace() {
|
|
698
|
+
return this.config.namespace ?? "default";
|
|
699
|
+
}
|
|
700
|
+
/**
|
|
701
|
+
* Get store metrics
|
|
702
|
+
*/
|
|
703
|
+
getMetrics() {
|
|
704
|
+
return { ...this.metrics };
|
|
705
|
+
}
|
|
706
|
+
/**
|
|
707
|
+
* Reset store metrics
|
|
708
|
+
*/
|
|
709
|
+
resetMetrics() {
|
|
710
|
+
this.metrics = {
|
|
711
|
+
gets: 0,
|
|
712
|
+
sets: 0,
|
|
713
|
+
deletes: 0,
|
|
714
|
+
hits: 0,
|
|
715
|
+
misses: 0
|
|
716
|
+
};
|
|
717
|
+
}
|
|
718
|
+
/**
|
|
719
|
+
* Increment a metric counter
|
|
720
|
+
*/
|
|
721
|
+
incrementMetric(metric, amount = 1) {
|
|
722
|
+
if (typeof this.metrics[metric] === "number") {
|
|
723
|
+
this.metrics[metric] += amount;
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
};
|
|
727
|
+
var DEFAULT_CONFIG3 = {
|
|
728
|
+
maxEntries: 1e4,
|
|
729
|
+
maxSizeBytes: 1024 * 1024 * 1024,
|
|
730
|
+
// 1GB
|
|
731
|
+
evictionPolicy: "lru"
|
|
732
|
+
};
|
|
733
|
+
var MemoryCacheStore = class extends BaseCacheStore {
|
|
734
|
+
storeType = "memory";
|
|
735
|
+
cache;
|
|
736
|
+
vectors = /* @__PURE__ */ new Map();
|
|
737
|
+
memoryConfig;
|
|
738
|
+
closed = false;
|
|
739
|
+
constructor(config = { type: "memory" }) {
|
|
740
|
+
super(config);
|
|
741
|
+
this.memoryConfig = { ...DEFAULT_CONFIG3, ...config };
|
|
742
|
+
this.cache = new LRUCache({
|
|
743
|
+
max: this.memoryConfig.maxEntries ?? 1e4,
|
|
744
|
+
maxSize: this.memoryConfig.maxSizeBytes ?? 1024 * 1024 * 1024,
|
|
745
|
+
sizeCalculation: (entry) => estimateEntrySize(entry),
|
|
746
|
+
ttl: 0,
|
|
747
|
+
// TTL handled per-entry
|
|
748
|
+
updateAgeOnGet: true,
|
|
749
|
+
allowStale: false
|
|
750
|
+
});
|
|
751
|
+
}
|
|
752
|
+
get(key) {
|
|
753
|
+
this.incrementMetric("gets");
|
|
754
|
+
const entry = this.cache.get(key);
|
|
755
|
+
if (entry) {
|
|
756
|
+
this.incrementMetric("hits");
|
|
757
|
+
entry.metadata.accessedAt = now();
|
|
758
|
+
entry.metadata.accessCount++;
|
|
759
|
+
return Promise.resolve(entry);
|
|
760
|
+
}
|
|
761
|
+
this.incrementMetric("misses");
|
|
762
|
+
return Promise.resolve(void 0);
|
|
763
|
+
}
|
|
764
|
+
set(key, entry) {
|
|
765
|
+
const startTime = performance.now();
|
|
766
|
+
this.incrementMetric("sets");
|
|
767
|
+
const ttlMs = entry.metadata.ttl > 0 ? entry.metadata.ttl * 1e3 : void 0;
|
|
768
|
+
this.cache.set(key, entry, { ttl: ttlMs });
|
|
769
|
+
if (entry.embedding && entry.embedding.length > 0) {
|
|
770
|
+
this.vectors.set(key, {
|
|
771
|
+
id: entry.id,
|
|
772
|
+
vector: entry.embedding
|
|
773
|
+
});
|
|
774
|
+
}
|
|
775
|
+
return Promise.resolve({
|
|
776
|
+
success: true,
|
|
777
|
+
id: entry.id,
|
|
778
|
+
durationMs: performance.now() - startTime
|
|
779
|
+
});
|
|
780
|
+
}
|
|
781
|
+
has(key) {
|
|
782
|
+
return Promise.resolve(this.cache.has(key));
|
|
783
|
+
}
|
|
784
|
+
delete(key) {
|
|
785
|
+
this.incrementMetric("deletes");
|
|
786
|
+
const existed = this.cache.has(key);
|
|
787
|
+
this.cache.delete(key);
|
|
788
|
+
this.vectors.delete(key);
|
|
789
|
+
return Promise.resolve(existed);
|
|
790
|
+
}
|
|
791
|
+
clear() {
|
|
792
|
+
this.cache.clear();
|
|
793
|
+
this.vectors.clear();
|
|
794
|
+
return Promise.resolve();
|
|
795
|
+
}
|
|
796
|
+
size() {
|
|
797
|
+
return Promise.resolve(this.cache.size);
|
|
798
|
+
}
|
|
799
|
+
keys() {
|
|
800
|
+
return Promise.resolve(Array.from(this.cache.keys()));
|
|
801
|
+
}
|
|
802
|
+
query(vector, options) {
|
|
803
|
+
const startTime = performance.now();
|
|
804
|
+
const topK = options?.topK ?? 10;
|
|
805
|
+
const minSimilarity = options?.minSimilarity ?? 0;
|
|
806
|
+
const results = [];
|
|
807
|
+
for (const [key, stored] of this.vectors) {
|
|
808
|
+
const entry = this.cache.get(key);
|
|
809
|
+
if (!entry) continue;
|
|
810
|
+
if (options?.namespace && entry.metadata.namespace !== options.namespace) {
|
|
811
|
+
continue;
|
|
812
|
+
}
|
|
813
|
+
const similarity = this.cosineSimilarity(vector, stored.vector);
|
|
814
|
+
if (similarity >= minSimilarity) {
|
|
815
|
+
results.push({ ...entry, score: similarity });
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
results.sort((a, b) => b.score - a.score);
|
|
819
|
+
return Promise.resolve({
|
|
820
|
+
entries: results.slice(0, topK),
|
|
821
|
+
durationMs: performance.now() - startTime
|
|
822
|
+
});
|
|
823
|
+
}
|
|
824
|
+
checkHealth() {
|
|
825
|
+
return Promise.resolve({
|
|
826
|
+
healthy: !this.closed,
|
|
827
|
+
latencyMs: 0,
|
|
828
|
+
lastCheck: now(),
|
|
829
|
+
error: this.closed ? "Store is closed" : void 0
|
|
830
|
+
});
|
|
831
|
+
}
|
|
832
|
+
close() {
|
|
833
|
+
this.closed = true;
|
|
834
|
+
this.cache.clear();
|
|
835
|
+
this.vectors.clear();
|
|
836
|
+
return Promise.resolve();
|
|
837
|
+
}
|
|
838
|
+
/**
|
|
839
|
+
* Compute cosine similarity between two vectors
|
|
840
|
+
*/
|
|
841
|
+
cosineSimilarity(a, b) {
|
|
842
|
+
if (a.length !== b.length) return 0;
|
|
843
|
+
let dotProduct2 = 0;
|
|
844
|
+
let normA = 0;
|
|
845
|
+
let normB = 0;
|
|
846
|
+
for (let i = 0; i < a.length; i++) {
|
|
847
|
+
dotProduct2 += a[i] * b[i];
|
|
848
|
+
normA += a[i] * a[i];
|
|
849
|
+
normB += b[i] * b[i];
|
|
850
|
+
}
|
|
851
|
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
852
|
+
if (denominator === 0) return 0;
|
|
853
|
+
return dotProduct2 / denominator;
|
|
854
|
+
}
|
|
855
|
+
/**
|
|
856
|
+
* Get memory usage information
|
|
857
|
+
*/
|
|
858
|
+
getMemoryInfo() {
|
|
859
|
+
return {
|
|
860
|
+
entries: this.cache.size,
|
|
861
|
+
calculatedSize: this.cache.calculatedSize ?? 0,
|
|
862
|
+
maxSize: this.memoryConfig.maxSizeBytes ?? 0,
|
|
863
|
+
vectorCount: this.vectors.size
|
|
864
|
+
};
|
|
865
|
+
}
|
|
866
|
+
/**
|
|
867
|
+
* Prune expired entries
|
|
868
|
+
*/
|
|
869
|
+
prune() {
|
|
870
|
+
this.cache.purgeStale();
|
|
871
|
+
let pruned = 0;
|
|
872
|
+
for (const key of this.vectors.keys()) {
|
|
873
|
+
if (!this.cache.has(key)) {
|
|
874
|
+
this.vectors.delete(key);
|
|
875
|
+
pruned++;
|
|
876
|
+
}
|
|
877
|
+
}
|
|
878
|
+
return Promise.resolve(pruned);
|
|
879
|
+
}
|
|
880
|
+
};
|
|
881
|
+
function createMemoryCacheStore(config) {
|
|
882
|
+
return new MemoryCacheStore({ type: "memory", ...config });
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
// src/similarity/metrics/SimilarityMetrics.ts
|
|
886
|
+
function cosineSimilarity(a, b) {
|
|
887
|
+
if (a.length !== b.length) return 0;
|
|
888
|
+
let dotProduct2 = 0;
|
|
889
|
+
let normA = 0;
|
|
890
|
+
let normB = 0;
|
|
891
|
+
for (let i = 0; i < a.length; i++) {
|
|
892
|
+
dotProduct2 += a[i] * b[i];
|
|
893
|
+
normA += a[i] * a[i];
|
|
894
|
+
normB += b[i] * b[i];
|
|
895
|
+
}
|
|
896
|
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
897
|
+
if (denominator === 0) return 0;
|
|
898
|
+
return dotProduct2 / denominator;
|
|
899
|
+
}
|
|
900
|
+
function euclideanDistance(a, b) {
|
|
901
|
+
if (a.length !== b.length) return Infinity;
|
|
902
|
+
let sum = 0;
|
|
903
|
+
for (let i = 0; i < a.length; i++) {
|
|
904
|
+
const diff = a[i] - b[i];
|
|
905
|
+
sum += diff * diff;
|
|
906
|
+
}
|
|
907
|
+
return Math.sqrt(sum);
|
|
908
|
+
}
|
|
909
|
+
function dotProduct(a, b) {
|
|
910
|
+
if (a.length !== b.length) return 0;
|
|
911
|
+
let sum = 0;
|
|
912
|
+
for (let i = 0; i < a.length; i++) {
|
|
913
|
+
sum += a[i] * b[i];
|
|
914
|
+
}
|
|
915
|
+
return sum;
|
|
916
|
+
}
|
|
917
|
+
function manhattanDistance(a, b) {
|
|
918
|
+
if (a.length !== b.length) return Infinity;
|
|
919
|
+
let sum = 0;
|
|
920
|
+
for (let i = 0; i < a.length; i++) {
|
|
921
|
+
sum += Math.abs(a[i] - b[i]);
|
|
922
|
+
}
|
|
923
|
+
return sum;
|
|
924
|
+
}
|
|
925
|
+
function distanceToSimilarity(distance) {
|
|
926
|
+
return 1 / (1 + distance);
|
|
927
|
+
}
|
|
928
|
+
function normalize(vector) {
|
|
929
|
+
const magnitude2 = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
|
|
930
|
+
if (magnitude2 === 0) return vector;
|
|
931
|
+
return vector.map((v) => v / magnitude2);
|
|
932
|
+
}
|
|
933
|
+
function magnitude(vector) {
|
|
934
|
+
return Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
// src/stores/RedisCacheStore.ts
|
|
938
|
+
var DEFAULT_CONFIG4 = {
|
|
939
|
+
host: "localhost",
|
|
940
|
+
port: 6379,
|
|
941
|
+
db: 0,
|
|
942
|
+
keyPrefix: "llm-cache",
|
|
943
|
+
connectTimeout: 1e4
|
|
944
|
+
};
|
|
945
|
+
var RedisCacheStore = class extends BaseCacheStore {
|
|
946
|
+
storeType = "redis";
|
|
947
|
+
client = null;
|
|
948
|
+
redisConfig;
|
|
949
|
+
connected = false;
|
|
950
|
+
constructor(config) {
|
|
951
|
+
super(config);
|
|
952
|
+
this.redisConfig = { ...DEFAULT_CONFIG4, ...config };
|
|
953
|
+
}
|
|
954
|
+
/**
|
|
955
|
+
* Connect to Redis
|
|
956
|
+
*/
|
|
957
|
+
async connect() {
|
|
958
|
+
if (this.connected) return;
|
|
959
|
+
try {
|
|
960
|
+
const { Redis } = await import('ioredis');
|
|
961
|
+
if (this.redisConfig.url) {
|
|
962
|
+
this.client = new Redis(this.redisConfig.url, {
|
|
963
|
+
connectTimeout: this.redisConfig.connectTimeout ?? 1e4,
|
|
964
|
+
lazyConnect: false,
|
|
965
|
+
tls: this.redisConfig.tls ? {} : void 0
|
|
966
|
+
});
|
|
967
|
+
} else {
|
|
968
|
+
this.client = new Redis({
|
|
969
|
+
host: this.redisConfig.host ?? "localhost",
|
|
970
|
+
port: this.redisConfig.port ?? 6379,
|
|
971
|
+
password: this.redisConfig.password,
|
|
972
|
+
db: this.redisConfig.db ?? 0,
|
|
973
|
+
connectTimeout: this.redisConfig.connectTimeout ?? 1e4,
|
|
974
|
+
tls: this.redisConfig.tls ? {} : void 0
|
|
975
|
+
});
|
|
976
|
+
}
|
|
977
|
+
await this.client.ping();
|
|
978
|
+
this.connected = true;
|
|
979
|
+
} catch (error) {
|
|
980
|
+
throw new Error(
|
|
981
|
+
`Failed to connect to Redis: ${error.message}`
|
|
982
|
+
);
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
async ensureConnected() {
|
|
986
|
+
if (!this.connected || !this.client) {
|
|
987
|
+
await this.connect();
|
|
988
|
+
}
|
|
989
|
+
if (!this.client) {
|
|
990
|
+
throw new Error("Redis client not initialized");
|
|
991
|
+
}
|
|
992
|
+
return this.client;
|
|
993
|
+
}
|
|
994
|
+
prefixKey(key) {
|
|
995
|
+
const prefix = this.redisConfig.keyPrefix ?? "llm-cache";
|
|
996
|
+
return `${prefix}:${this.namespace}:${key}`;
|
|
997
|
+
}
|
|
998
|
+
async get(key) {
|
|
999
|
+
this.incrementMetric("gets");
|
|
1000
|
+
const client = await this.ensureConnected();
|
|
1001
|
+
const data = await client.get(this.prefixKey(key));
|
|
1002
|
+
if (!data) {
|
|
1003
|
+
this.incrementMetric("misses");
|
|
1004
|
+
return void 0;
|
|
1005
|
+
}
|
|
1006
|
+
this.incrementMetric("hits");
|
|
1007
|
+
try {
|
|
1008
|
+
const entry = JSON.parse(data);
|
|
1009
|
+
entry.metadata.accessedAt = now();
|
|
1010
|
+
entry.metadata.accessCount++;
|
|
1011
|
+
client.set(this.prefixKey(key), JSON.stringify(entry)).catch(() => {
|
|
1012
|
+
});
|
|
1013
|
+
return entry;
|
|
1014
|
+
} catch {
|
|
1015
|
+
return void 0;
|
|
1016
|
+
}
|
|
1017
|
+
}
|
|
1018
|
+
async set(key, entry) {
|
|
1019
|
+
const startTime = performance.now();
|
|
1020
|
+
this.incrementMetric("sets");
|
|
1021
|
+
const client = await this.ensureConnected();
|
|
1022
|
+
await client.set(this.prefixKey(key), JSON.stringify(entry));
|
|
1023
|
+
if (entry.metadata.ttl > 0) {
|
|
1024
|
+
await client.expire(this.prefixKey(key), entry.metadata.ttl);
|
|
1025
|
+
}
|
|
1026
|
+
return {
|
|
1027
|
+
success: true,
|
|
1028
|
+
id: entry.id,
|
|
1029
|
+
durationMs: performance.now() - startTime
|
|
1030
|
+
};
|
|
1031
|
+
}
|
|
1032
|
+
async has(key) {
|
|
1033
|
+
const client = await this.ensureConnected();
|
|
1034
|
+
return await client.exists(this.prefixKey(key)) > 0;
|
|
1035
|
+
}
|
|
1036
|
+
async delete(key) {
|
|
1037
|
+
this.incrementMetric("deletes");
|
|
1038
|
+
const client = await this.ensureConnected();
|
|
1039
|
+
return await client.del(this.prefixKey(key)) > 0;
|
|
1040
|
+
}
|
|
1041
|
+
async clear() {
|
|
1042
|
+
const client = await this.ensureConnected();
|
|
1043
|
+
const pattern = this.prefixKey("*");
|
|
1044
|
+
const keys = await client.keys(pattern);
|
|
1045
|
+
if (keys.length > 0) {
|
|
1046
|
+
await client.del(...keys);
|
|
1047
|
+
}
|
|
1048
|
+
}
|
|
1049
|
+
async size() {
|
|
1050
|
+
const client = await this.ensureConnected();
|
|
1051
|
+
const keys = await client.keys(this.prefixKey("*"));
|
|
1052
|
+
return keys.length;
|
|
1053
|
+
}
|
|
1054
|
+
async keys() {
|
|
1055
|
+
const client = await this.ensureConnected();
|
|
1056
|
+
const keys = await client.keys(this.prefixKey("*"));
|
|
1057
|
+
const prefix = this.prefixKey("");
|
|
1058
|
+
return keys.map((k) => k.slice(prefix.length));
|
|
1059
|
+
}
|
|
1060
|
+
async query(vector, options) {
|
|
1061
|
+
const startTime = performance.now();
|
|
1062
|
+
const allKeys = await this.keys();
|
|
1063
|
+
const entries = [];
|
|
1064
|
+
const keysToProcess = allKeys.slice(0, 1e3);
|
|
1065
|
+
for (const key of keysToProcess) {
|
|
1066
|
+
const entry = await this.get(key);
|
|
1067
|
+
if (entry?.embedding) {
|
|
1068
|
+
const score = cosineSimilarity(vector, entry.embedding);
|
|
1069
|
+
if (score >= (options?.minSimilarity ?? 0)) {
|
|
1070
|
+
if (options?.namespace && entry.metadata.namespace !== options.namespace) {
|
|
1071
|
+
continue;
|
|
1072
|
+
}
|
|
1073
|
+
entries.push({ ...entry, score });
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
entries.sort((a, b) => b.score - a.score);
|
|
1078
|
+
return {
|
|
1079
|
+
entries: entries.slice(0, options?.topK ?? 10),
|
|
1080
|
+
durationMs: performance.now() - startTime
|
|
1081
|
+
};
|
|
1082
|
+
}
|
|
1083
|
+
async checkHealth() {
|
|
1084
|
+
const startTime = performance.now();
|
|
1085
|
+
try {
|
|
1086
|
+
const client = await this.ensureConnected();
|
|
1087
|
+
await client.ping();
|
|
1088
|
+
return {
|
|
1089
|
+
healthy: true,
|
|
1090
|
+
latencyMs: performance.now() - startTime,
|
|
1091
|
+
lastCheck: now()
|
|
1092
|
+
};
|
|
1093
|
+
} catch (error) {
|
|
1094
|
+
return {
|
|
1095
|
+
healthy: false,
|
|
1096
|
+
latencyMs: performance.now() - startTime,
|
|
1097
|
+
lastCheck: now(),
|
|
1098
|
+
error: error.message
|
|
1099
|
+
};
|
|
1100
|
+
}
|
|
1101
|
+
}
|
|
1102
|
+
async close() {
|
|
1103
|
+
if (this.client) {
|
|
1104
|
+
await this.client.quit();
|
|
1105
|
+
this.client = null;
|
|
1106
|
+
this.connected = false;
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
/**
|
|
1110
|
+
* Check if connected to Redis
|
|
1111
|
+
*/
|
|
1112
|
+
isConnected() {
|
|
1113
|
+
return this.connected;
|
|
1114
|
+
}
|
|
1115
|
+
};
|
|
1116
|
+
function createRedisCacheStore(config) {
|
|
1117
|
+
return new RedisCacheStore(config);
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
// src/stores/SQLiteCacheStore.ts
|
|
1121
|
+
var DEFAULT_CONFIG5 = {
|
|
1122
|
+
dbPath: "cache.db",
|
|
1123
|
+
inMemory: false,
|
|
1124
|
+
enableVector: false
|
|
1125
|
+
};
|
|
1126
|
+
var SQLiteCacheStore = class extends BaseCacheStore {
|
|
1127
|
+
storeType = "sqlite";
|
|
1128
|
+
db = null;
|
|
1129
|
+
sqliteConfig;
|
|
1130
|
+
initialized = false;
|
|
1131
|
+
constructor(config) {
|
|
1132
|
+
super(config);
|
|
1133
|
+
this.sqliteConfig = { ...DEFAULT_CONFIG5, ...config };
|
|
1134
|
+
}
|
|
1135
|
+
/**
|
|
1136
|
+
* Initialize the database
|
|
1137
|
+
*/
|
|
1138
|
+
async init() {
|
|
1139
|
+
if (this.initialized) return;
|
|
1140
|
+
try {
|
|
1141
|
+
const BetterSqlite3 = (await import('better-sqlite3')).default;
|
|
1142
|
+
const db = new BetterSqlite3(
|
|
1143
|
+
this.sqliteConfig.inMemory ? ":memory:" : this.sqliteConfig.dbPath ?? "cache.db"
|
|
1144
|
+
);
|
|
1145
|
+
this.db = db;
|
|
1146
|
+
db.exec(`
|
|
1147
|
+
CREATE TABLE IF NOT EXISTS cache_entries (
|
|
1148
|
+
key TEXT PRIMARY KEY,
|
|
1149
|
+
id TEXT NOT NULL,
|
|
1150
|
+
data TEXT NOT NULL,
|
|
1151
|
+
embedding BLOB,
|
|
1152
|
+
model TEXT NOT NULL,
|
|
1153
|
+
namespace TEXT,
|
|
1154
|
+
created_at INTEGER NOT NULL,
|
|
1155
|
+
accessed_at INTEGER NOT NULL,
|
|
1156
|
+
ttl INTEGER DEFAULT 0
|
|
1157
|
+
);
|
|
1158
|
+
|
|
1159
|
+
CREATE INDEX IF NOT EXISTS idx_namespace ON cache_entries(namespace);
|
|
1160
|
+
CREATE INDEX IF NOT EXISTS idx_model ON cache_entries(model);
|
|
1161
|
+
CREATE INDEX IF NOT EXISTS idx_created_at ON cache_entries(created_at);
|
|
1162
|
+
CREATE INDEX IF NOT EXISTS idx_accessed_at ON cache_entries(accessed_at);
|
|
1163
|
+
`);
|
|
1164
|
+
this.initialized = true;
|
|
1165
|
+
} catch (error) {
|
|
1166
|
+
throw new Error(
|
|
1167
|
+
`Failed to initialize SQLite database: ${error.message}`
|
|
1168
|
+
);
|
|
1169
|
+
}
|
|
1170
|
+
}
|
|
1171
|
+
ensureInitialized() {
|
|
1172
|
+
if (!this.initialized || !this.db) {
|
|
1173
|
+
throw new Error("SQLite store not initialized. Call init() first.");
|
|
1174
|
+
}
|
|
1175
|
+
return this.db;
|
|
1176
|
+
}
|
|
1177
|
+
get(key) {
|
|
1178
|
+
this.incrementMetric("gets");
|
|
1179
|
+
const db = this.ensureInitialized();
|
|
1180
|
+
const row = db.prepare("SELECT data FROM cache_entries WHERE key = ?").get(key);
|
|
1181
|
+
if (!row) {
|
|
1182
|
+
this.incrementMetric("misses");
|
|
1183
|
+
return Promise.resolve(void 0);
|
|
1184
|
+
}
|
|
1185
|
+
this.incrementMetric("hits");
|
|
1186
|
+
try {
|
|
1187
|
+
const entry = JSON.parse(row.data);
|
|
1188
|
+
entry.metadata.accessedAt = now();
|
|
1189
|
+
entry.metadata.accessCount++;
|
|
1190
|
+
db.prepare(
|
|
1191
|
+
"UPDATE cache_entries SET accessed_at = ?, data = ? WHERE key = ?"
|
|
1192
|
+
).run(now(), JSON.stringify(entry), key);
|
|
1193
|
+
return Promise.resolve(entry);
|
|
1194
|
+
} catch {
|
|
1195
|
+
return Promise.resolve(void 0);
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
set(key, entry) {
|
|
1199
|
+
const startTime = performance.now();
|
|
1200
|
+
this.incrementMetric("sets");
|
|
1201
|
+
const db = this.ensureInitialized();
|
|
1202
|
+
const embedding = entry.embedding ? Buffer.from(new Float32Array(entry.embedding).buffer) : null;
|
|
1203
|
+
db.prepare(
|
|
1204
|
+
`
|
|
1205
|
+
INSERT OR REPLACE INTO cache_entries
|
|
1206
|
+
(key, id, data, embedding, model, namespace, created_at, accessed_at, ttl)
|
|
1207
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1208
|
+
`
|
|
1209
|
+
).run(
|
|
1210
|
+
key,
|
|
1211
|
+
entry.id,
|
|
1212
|
+
JSON.stringify(entry),
|
|
1213
|
+
embedding,
|
|
1214
|
+
entry.request.model,
|
|
1215
|
+
entry.metadata.namespace ?? null,
|
|
1216
|
+
entry.metadata.createdAt,
|
|
1217
|
+
entry.metadata.accessedAt,
|
|
1218
|
+
entry.metadata.ttl
|
|
1219
|
+
);
|
|
1220
|
+
return Promise.resolve({
|
|
1221
|
+
success: true,
|
|
1222
|
+
id: entry.id,
|
|
1223
|
+
durationMs: performance.now() - startTime
|
|
1224
|
+
});
|
|
1225
|
+
}
|
|
1226
|
+
has(key) {
|
|
1227
|
+
const db = this.ensureInitialized();
|
|
1228
|
+
const row = db.prepare("SELECT 1 FROM cache_entries WHERE key = ?").get(key);
|
|
1229
|
+
return Promise.resolve(!!row);
|
|
1230
|
+
}
|
|
1231
|
+
delete(key) {
|
|
1232
|
+
this.incrementMetric("deletes");
|
|
1233
|
+
const db = this.ensureInitialized();
|
|
1234
|
+
const result = db.prepare("DELETE FROM cache_entries WHERE key = ?").run(key);
|
|
1235
|
+
return Promise.resolve(result.changes > 0);
|
|
1236
|
+
}
|
|
1237
|
+
clear() {
|
|
1238
|
+
const db = this.ensureInitialized();
|
|
1239
|
+
if (this.namespace === "default") {
|
|
1240
|
+
db.prepare("DELETE FROM cache_entries").run();
|
|
1241
|
+
} else {
|
|
1242
|
+
db.prepare("DELETE FROM cache_entries WHERE namespace = ?").run(
|
|
1243
|
+
this.namespace
|
|
1244
|
+
);
|
|
1245
|
+
}
|
|
1246
|
+
return Promise.resolve();
|
|
1247
|
+
}
|
|
1248
|
+
size() {
|
|
1249
|
+
const db = this.ensureInitialized();
|
|
1250
|
+
const row = db.prepare("SELECT COUNT(*) as count FROM cache_entries").get();
|
|
1251
|
+
return Promise.resolve(row.count);
|
|
1252
|
+
}
|
|
1253
|
+
keys() {
|
|
1254
|
+
const db = this.ensureInitialized();
|
|
1255
|
+
const rows = db.prepare("SELECT key FROM cache_entries").all();
|
|
1256
|
+
return Promise.resolve(rows.map((r) => r.key));
|
|
1257
|
+
}
|
|
1258
|
+
query(vector, options) {
|
|
1259
|
+
const startTime = performance.now();
|
|
1260
|
+
const db = this.ensureInitialized();
|
|
1261
|
+
let sql = `
|
|
1262
|
+
SELECT key, data, embedding FROM cache_entries
|
|
1263
|
+
WHERE embedding IS NOT NULL
|
|
1264
|
+
`;
|
|
1265
|
+
const params = [];
|
|
1266
|
+
if (options?.namespace) {
|
|
1267
|
+
sql += " AND namespace = ?";
|
|
1268
|
+
params.push(options.namespace);
|
|
1269
|
+
}
|
|
1270
|
+
const rows = db.prepare(sql).all(...params);
|
|
1271
|
+
const results = [];
|
|
1272
|
+
for (const row of rows) {
|
|
1273
|
+
const stored = new Float32Array(
|
|
1274
|
+
row.embedding.buffer,
|
|
1275
|
+
row.embedding.byteOffset,
|
|
1276
|
+
row.embedding.length / 4
|
|
1277
|
+
);
|
|
1278
|
+
const similarity = cosineSimilarity(vector, Array.from(stored));
|
|
1279
|
+
if (similarity >= (options?.minSimilarity ?? 0)) {
|
|
1280
|
+
try {
|
|
1281
|
+
const entry = JSON.parse(row.data);
|
|
1282
|
+
results.push({ ...entry, score: similarity });
|
|
1283
|
+
} catch {
|
|
1284
|
+
}
|
|
1285
|
+
}
|
|
1286
|
+
}
|
|
1287
|
+
results.sort((a, b) => b.score - a.score);
|
|
1288
|
+
return Promise.resolve({
|
|
1289
|
+
entries: results.slice(0, options?.topK ?? 10),
|
|
1290
|
+
durationMs: performance.now() - startTime
|
|
1291
|
+
});
|
|
1292
|
+
}
|
|
1293
|
+
checkHealth() {
|
|
1294
|
+
const startTime = performance.now();
|
|
1295
|
+
try {
|
|
1296
|
+
this.ensureInitialized();
|
|
1297
|
+
return Promise.resolve({
|
|
1298
|
+
healthy: true,
|
|
1299
|
+
latencyMs: performance.now() - startTime,
|
|
1300
|
+
lastCheck: now()
|
|
1301
|
+
});
|
|
1302
|
+
} catch (error) {
|
|
1303
|
+
return Promise.resolve({
|
|
1304
|
+
healthy: false,
|
|
1305
|
+
latencyMs: performance.now() - startTime,
|
|
1306
|
+
lastCheck: now(),
|
|
1307
|
+
error: error.message
|
|
1308
|
+
});
|
|
1309
|
+
}
|
|
1310
|
+
}
|
|
1311
|
+
close() {
|
|
1312
|
+
if (this.db) {
|
|
1313
|
+
this.db.close();
|
|
1314
|
+
this.db = null;
|
|
1315
|
+
this.initialized = false;
|
|
1316
|
+
}
|
|
1317
|
+
return Promise.resolve();
|
|
1318
|
+
}
|
|
1319
|
+
/**
|
|
1320
|
+
* Prune expired entries
|
|
1321
|
+
*/
|
|
1322
|
+
pruneExpired() {
|
|
1323
|
+
const db = this.ensureInitialized();
|
|
1324
|
+
const currentTime = now();
|
|
1325
|
+
const result = db.prepare(
|
|
1326
|
+
`
|
|
1327
|
+
DELETE FROM cache_entries
|
|
1328
|
+
WHERE ttl > 0 AND (created_at + (ttl * 1000)) < ?
|
|
1329
|
+
`
|
|
1330
|
+
).run(currentTime);
|
|
1331
|
+
return Promise.resolve(result.changes);
|
|
1332
|
+
}
|
|
1333
|
+
/**
|
|
1334
|
+
* Get database file size (for non-memory databases)
|
|
1335
|
+
*/
|
|
1336
|
+
async getDbSize() {
|
|
1337
|
+
if (this.sqliteConfig.inMemory) return null;
|
|
1338
|
+
try {
|
|
1339
|
+
const { statSync } = await import('fs');
|
|
1340
|
+
const stats = statSync(this.sqliteConfig.dbPath ?? "cache.db");
|
|
1341
|
+
return stats.size;
|
|
1342
|
+
} catch {
|
|
1343
|
+
return null;
|
|
1344
|
+
}
|
|
1345
|
+
}
|
|
1346
|
+
/**
|
|
1347
|
+
* Check if database is initialized
|
|
1348
|
+
*/
|
|
1349
|
+
isInitialized() {
|
|
1350
|
+
return this.initialized;
|
|
1351
|
+
}
|
|
1352
|
+
};
|
|
1353
|
+
function createSQLiteCacheStore(config) {
|
|
1354
|
+
return new SQLiteCacheStore(config);
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
// src/stores/TieredCacheStore.ts
|
|
1358
|
+
var TieredCacheStore = class extends BaseCacheStore {
|
|
1359
|
+
storeType = "tiered";
|
|
1360
|
+
tiers;
|
|
1361
|
+
accessCounts = /* @__PURE__ */ new Map();
|
|
1362
|
+
constructor(config) {
|
|
1363
|
+
super(config);
|
|
1364
|
+
const validTiers = config.tiers.filter(
|
|
1365
|
+
(t) => t.store !== void 0
|
|
1366
|
+
);
|
|
1367
|
+
if (validTiers.length === 0) {
|
|
1368
|
+
throw new Error(
|
|
1369
|
+
"TieredCacheStore requires at least one tier with a store"
|
|
1370
|
+
);
|
|
1371
|
+
}
|
|
1372
|
+
this.tiers = validTiers.sort((a, b) => a.priority - b.priority);
|
|
1373
|
+
}
|
|
1374
|
+
async get(key) {
|
|
1375
|
+
this.incrementMetric("gets");
|
|
1376
|
+
for (let i = 0; i < this.tiers.length; i++) {
|
|
1377
|
+
const tier = this.tiers[i];
|
|
1378
|
+
const entry = await tier.store.get(key);
|
|
1379
|
+
if (entry) {
|
|
1380
|
+
this.incrementMetric("hits");
|
|
1381
|
+
const accessCount = (this.accessCounts.get(key) ?? 0) + 1;
|
|
1382
|
+
this.accessCounts.set(key, accessCount);
|
|
1383
|
+
if (i > 0) {
|
|
1384
|
+
await this.checkPromotion(key, entry, i, accessCount);
|
|
1385
|
+
}
|
|
1386
|
+
return entry;
|
|
1387
|
+
}
|
|
1388
|
+
}
|
|
1389
|
+
this.incrementMetric("misses");
|
|
1390
|
+
return void 0;
|
|
1391
|
+
}
|
|
1392
|
+
async set(key, entry) {
|
|
1393
|
+
const startTime = performance.now();
|
|
1394
|
+
this.incrementMetric("sets");
|
|
1395
|
+
const result = await this.tiers[0].store.set(key, entry);
|
|
1396
|
+
this.accessCounts.set(key, 0);
|
|
1397
|
+
await this.checkDemotion(0);
|
|
1398
|
+
return {
|
|
1399
|
+
...result,
|
|
1400
|
+
durationMs: performance.now() - startTime
|
|
1401
|
+
};
|
|
1402
|
+
}
|
|
1403
|
+
async has(key) {
|
|
1404
|
+
for (const tier of this.tiers) {
|
|
1405
|
+
if (await tier.store.has(key)) {
|
|
1406
|
+
return true;
|
|
1407
|
+
}
|
|
1408
|
+
}
|
|
1409
|
+
return false;
|
|
1410
|
+
}
|
|
1411
|
+
async delete(key) {
|
|
1412
|
+
this.incrementMetric("deletes");
|
|
1413
|
+
let deleted = false;
|
|
1414
|
+
for (const tier of this.tiers) {
|
|
1415
|
+
if (await tier.store.delete(key)) {
|
|
1416
|
+
deleted = true;
|
|
1417
|
+
}
|
|
1418
|
+
}
|
|
1419
|
+
this.accessCounts.delete(key);
|
|
1420
|
+
return deleted;
|
|
1421
|
+
}
|
|
1422
|
+
async clear() {
|
|
1423
|
+
for (const tier of this.tiers) {
|
|
1424
|
+
await tier.store.clear();
|
|
1425
|
+
}
|
|
1426
|
+
this.accessCounts.clear();
|
|
1427
|
+
}
|
|
1428
|
+
async size() {
|
|
1429
|
+
const allKeys = /* @__PURE__ */ new Set();
|
|
1430
|
+
for (const tier of this.tiers) {
|
|
1431
|
+
const keys = await tier.store.keys();
|
|
1432
|
+
keys.forEach((k) => allKeys.add(k));
|
|
1433
|
+
}
|
|
1434
|
+
return allKeys.size;
|
|
1435
|
+
}
|
|
1436
|
+
async keys() {
|
|
1437
|
+
const allKeys = /* @__PURE__ */ new Set();
|
|
1438
|
+
for (const tier of this.tiers) {
|
|
1439
|
+
const keys = await tier.store.keys();
|
|
1440
|
+
keys.forEach((k) => allKeys.add(k));
|
|
1441
|
+
}
|
|
1442
|
+
return Array.from(allKeys);
|
|
1443
|
+
}
|
|
1444
|
+
async query(vector, options) {
|
|
1445
|
+
const startTime = performance.now();
|
|
1446
|
+
const entriesMap = /* @__PURE__ */ new Map();
|
|
1447
|
+
for (const tier of this.tiers) {
|
|
1448
|
+
const result = await tier.store.query(vector, options);
|
|
1449
|
+
for (const entry of result.entries) {
|
|
1450
|
+
const existing = entriesMap.get(entry.key);
|
|
1451
|
+
if (!existing || entry.score > existing.score) {
|
|
1452
|
+
entriesMap.set(entry.key, entry);
|
|
1453
|
+
}
|
|
1454
|
+
}
|
|
1455
|
+
}
|
|
1456
|
+
const entries = Array.from(entriesMap.values()).sort((a, b) => b.score - a.score).slice(0, options?.topK ?? 10);
|
|
1457
|
+
return {
|
|
1458
|
+
entries,
|
|
1459
|
+
durationMs: performance.now() - startTime
|
|
1460
|
+
};
|
|
1461
|
+
}
|
|
1462
|
+
async checkHealth() {
|
|
1463
|
+
const startTime = performance.now();
|
|
1464
|
+
const tierHealths = [];
|
|
1465
|
+
for (const tier of this.tiers) {
|
|
1466
|
+
const health = await tier.store.checkHealth();
|
|
1467
|
+
tierHealths.push({ name: tier.name, healthy: health.healthy });
|
|
1468
|
+
}
|
|
1469
|
+
const allHealthy = tierHealths.every((t) => t.healthy);
|
|
1470
|
+
return {
|
|
1471
|
+
healthy: allHealthy,
|
|
1472
|
+
latencyMs: performance.now() - startTime,
|
|
1473
|
+
lastCheck: now(),
|
|
1474
|
+
error: allHealthy ? void 0 : `Unhealthy tiers: ${tierHealths.filter((t) => !t.healthy).map((t) => t.name).join(", ")}`
|
|
1475
|
+
};
|
|
1476
|
+
}
|
|
1477
|
+
async close() {
|
|
1478
|
+
for (const tier of this.tiers) {
|
|
1479
|
+
await tier.store.close();
|
|
1480
|
+
}
|
|
1481
|
+
}
|
|
1482
|
+
/**
|
|
1483
|
+
* Get tier statistics
|
|
1484
|
+
*/
|
|
1485
|
+
async getTierStats() {
|
|
1486
|
+
const stats = [];
|
|
1487
|
+
for (const tier of this.tiers) {
|
|
1488
|
+
stats.push({
|
|
1489
|
+
name: tier.name,
|
|
1490
|
+
priority: tier.priority,
|
|
1491
|
+
size: await tier.store.size(),
|
|
1492
|
+
maxSize: tier.maxSize
|
|
1493
|
+
});
|
|
1494
|
+
}
|
|
1495
|
+
return stats;
|
|
1496
|
+
}
|
|
1497
|
+
/**
|
|
1498
|
+
* Manually promote an entry to a higher tier
|
|
1499
|
+
*/
|
|
1500
|
+
async promote(key, targetTierIndex = 0) {
|
|
1501
|
+
for (let i = targetTierIndex + 1; i < this.tiers.length; i++) {
|
|
1502
|
+
const entry = await this.tiers[i].store.get(key);
|
|
1503
|
+
if (entry) {
|
|
1504
|
+
await this.tiers[targetTierIndex].store.set(key, entry);
|
|
1505
|
+
await this.tiers[i].store.delete(key);
|
|
1506
|
+
return true;
|
|
1507
|
+
}
|
|
1508
|
+
}
|
|
1509
|
+
return false;
|
|
1510
|
+
}
|
|
1511
|
+
/**
|
|
1512
|
+
* Manually demote an entry to a lower tier
|
|
1513
|
+
*/
|
|
1514
|
+
async demote(key, targetTierIndex) {
|
|
1515
|
+
for (let i = 0; i < this.tiers.length - 1; i++) {
|
|
1516
|
+
const entry = await this.tiers[i].store.get(key);
|
|
1517
|
+
if (entry) {
|
|
1518
|
+
const target = targetTierIndex ?? i + 1;
|
|
1519
|
+
if (target >= this.tiers.length) return false;
|
|
1520
|
+
await this.tiers[target].store.set(key, entry);
|
|
1521
|
+
await this.tiers[i].store.delete(key);
|
|
1522
|
+
return true;
|
|
1523
|
+
}
|
|
1524
|
+
}
|
|
1525
|
+
return false;
|
|
1526
|
+
}
|
|
1527
|
+
async checkPromotion(key, entry, currentTierIndex, accessCount) {
|
|
1528
|
+
for (let i = currentTierIndex - 1; i >= 0; i--) {
|
|
1529
|
+
const tier = this.tiers[i];
|
|
1530
|
+
const threshold = tier.promotionThreshold ?? 3;
|
|
1531
|
+
if (accessCount >= threshold) {
|
|
1532
|
+
await tier.store.set(key, entry);
|
|
1533
|
+
await this.tiers[currentTierIndex].store.delete(key);
|
|
1534
|
+
break;
|
|
1535
|
+
}
|
|
1536
|
+
}
|
|
1537
|
+
}
|
|
1538
|
+
async checkDemotion(tierIndex) {
|
|
1539
|
+
const tier = this.tiers[tierIndex];
|
|
1540
|
+
if (!tier.maxSize) return;
|
|
1541
|
+
const size = await tier.store.size();
|
|
1542
|
+
if (size <= tier.maxSize) return;
|
|
1543
|
+
const demotionTarget = tier.demotionTarget ?? 0.9;
|
|
1544
|
+
const targetSize = Math.floor(tier.maxSize * demotionTarget);
|
|
1545
|
+
const toRemove = size - targetSize;
|
|
1546
|
+
if (toRemove <= 0) return;
|
|
1547
|
+
const keys = await tier.store.keys();
|
|
1548
|
+
const keysByAccess = keys.map((k) => ({ key: k, count: this.accessCounts.get(k) ?? 0 })).sort((a, b) => a.count - b.count);
|
|
1549
|
+
const nextTierIndex = tierIndex + 1;
|
|
1550
|
+
if (nextTierIndex >= this.tiers.length) {
|
|
1551
|
+
for (let i = 0; i < toRemove && i < keysByAccess.length; i++) {
|
|
1552
|
+
await tier.store.delete(keysByAccess[i].key);
|
|
1553
|
+
this.accessCounts.delete(keysByAccess[i].key);
|
|
1554
|
+
}
|
|
1555
|
+
} else {
|
|
1556
|
+
for (let i = 0; i < toRemove && i < keysByAccess.length; i++) {
|
|
1557
|
+
const key = keysByAccess[i].key;
|
|
1558
|
+
const entry = await tier.store.get(key);
|
|
1559
|
+
if (entry) {
|
|
1560
|
+
await this.tiers[nextTierIndex].store.set(key, entry);
|
|
1561
|
+
await tier.store.delete(key);
|
|
1562
|
+
}
|
|
1563
|
+
}
|
|
1564
|
+
}
|
|
1565
|
+
}
|
|
1566
|
+
};
|
|
1567
|
+
function createTieredCacheStore(config) {
|
|
1568
|
+
return new TieredCacheStore(config);
|
|
1569
|
+
}
|
|
1570
|
+
|
|
1571
|
+
// src/stores/PineconeCacheStore.ts
|
|
1572
|
+
var PineconeCacheStore = class extends BaseCacheStore {
|
|
1573
|
+
storeType = "pinecone";
|
|
1574
|
+
client = null;
|
|
1575
|
+
index = null;
|
|
1576
|
+
ns = null;
|
|
1577
|
+
pineconeConfig;
|
|
1578
|
+
connected = false;
|
|
1579
|
+
constructor(config) {
|
|
1580
|
+
super(config);
|
|
1581
|
+
this.pineconeConfig = config;
|
|
1582
|
+
}
|
|
1583
|
+
/**
|
|
1584
|
+
* Connect to Pinecone
|
|
1585
|
+
*/
|
|
1586
|
+
async connect() {
|
|
1587
|
+
if (this.connected) return;
|
|
1588
|
+
try {
|
|
1589
|
+
const { Pinecone } = await import('@pinecone-database/pinecone');
|
|
1590
|
+
this.client = new Pinecone({
|
|
1591
|
+
apiKey: this.pineconeConfig.apiKey
|
|
1592
|
+
});
|
|
1593
|
+
this.index = this.client.Index(this.pineconeConfig.index);
|
|
1594
|
+
this.ns = this.index.namespace(this.namespace);
|
|
1595
|
+
this.connected = true;
|
|
1596
|
+
} catch (error) {
|
|
1597
|
+
throw new Error(
|
|
1598
|
+
`Failed to connect to Pinecone: ${error.message}`
|
|
1599
|
+
);
|
|
1600
|
+
}
|
|
1601
|
+
}
|
|
1602
|
+
async ensureConnected() {
|
|
1603
|
+
if (!this.connected || !this.ns) {
|
|
1604
|
+
await this.connect();
|
|
1605
|
+
}
|
|
1606
|
+
if (!this.ns) {
|
|
1607
|
+
throw new Error("Pinecone namespace not initialized");
|
|
1608
|
+
}
|
|
1609
|
+
return this.ns;
|
|
1610
|
+
}
|
|
1611
|
+
async get(key) {
|
|
1612
|
+
this.incrementMetric("gets");
|
|
1613
|
+
const ns = await this.ensureConnected();
|
|
1614
|
+
try {
|
|
1615
|
+
const result = await ns.fetch([key]);
|
|
1616
|
+
if (!result.records[key]) {
|
|
1617
|
+
this.incrementMetric("misses");
|
|
1618
|
+
return void 0;
|
|
1619
|
+
}
|
|
1620
|
+
this.incrementMetric("hits");
|
|
1621
|
+
const record = result.records[key];
|
|
1622
|
+
const metadata = record.metadata;
|
|
1623
|
+
const entry = JSON.parse(metadata.entryData);
|
|
1624
|
+
entry.metadata.accessedAt = now();
|
|
1625
|
+
entry.metadata.accessCount++;
|
|
1626
|
+
this.updateAccessMetadata(key, record.values, metadata).catch(() => {
|
|
1627
|
+
});
|
|
1628
|
+
return entry;
|
|
1629
|
+
} catch {
|
|
1630
|
+
this.incrementMetric("misses");
|
|
1631
|
+
return void 0;
|
|
1632
|
+
}
|
|
1633
|
+
}
|
|
1634
|
+
async updateAccessMetadata(key, values, metadata) {
|
|
1635
|
+
const ns = await this.ensureConnected();
|
|
1636
|
+
const updatedEntry = JSON.parse(metadata.entryData);
|
|
1637
|
+
updatedEntry.metadata.accessedAt = now();
|
|
1638
|
+
updatedEntry.metadata.accessCount++;
|
|
1639
|
+
await ns.upsert([
|
|
1640
|
+
{
|
|
1641
|
+
id: key,
|
|
1642
|
+
values,
|
|
1643
|
+
metadata: {
|
|
1644
|
+
...metadata,
|
|
1645
|
+
accessedAt: now(),
|
|
1646
|
+
accessCount: metadata.accessCount + 1,
|
|
1647
|
+
entryData: JSON.stringify(updatedEntry)
|
|
1648
|
+
}
|
|
1649
|
+
}
|
|
1650
|
+
]);
|
|
1651
|
+
}
|
|
1652
|
+
async set(key, entry) {
|
|
1653
|
+
const startTime = performance.now();
|
|
1654
|
+
this.incrementMetric("sets");
|
|
1655
|
+
const ns = await this.ensureConnected();
|
|
1656
|
+
if (!entry.embedding || entry.embedding.length === 0) {
|
|
1657
|
+
return {
|
|
1658
|
+
success: false,
|
|
1659
|
+
id: entry.id,
|
|
1660
|
+
durationMs: performance.now() - startTime
|
|
1661
|
+
};
|
|
1662
|
+
}
|
|
1663
|
+
const metadata = {
|
|
1664
|
+
key,
|
|
1665
|
+
model: entry.request.model,
|
|
1666
|
+
content: entry.response.content.substring(0, 3e4),
|
|
1667
|
+
// Pinecone metadata limit
|
|
1668
|
+
createdAt: entry.metadata.createdAt,
|
|
1669
|
+
accessedAt: entry.metadata.accessedAt,
|
|
1670
|
+
accessCount: entry.metadata.accessCount,
|
|
1671
|
+
hitCount: entry.metadata.hitCount,
|
|
1672
|
+
ttl: entry.metadata.ttl,
|
|
1673
|
+
namespace: entry.metadata.namespace ?? this.namespace,
|
|
1674
|
+
tags: entry.metadata.tags ?? [],
|
|
1675
|
+
entryData: JSON.stringify(entry)
|
|
1676
|
+
};
|
|
1677
|
+
try {
|
|
1678
|
+
await ns.upsert([
|
|
1679
|
+
{
|
|
1680
|
+
id: key,
|
|
1681
|
+
values: entry.embedding,
|
|
1682
|
+
metadata
|
|
1683
|
+
}
|
|
1684
|
+
]);
|
|
1685
|
+
return {
|
|
1686
|
+
success: true,
|
|
1687
|
+
id: entry.id,
|
|
1688
|
+
durationMs: performance.now() - startTime
|
|
1689
|
+
};
|
|
1690
|
+
} catch (error) {
|
|
1691
|
+
return {
|
|
1692
|
+
success: false,
|
|
1693
|
+
id: entry.id,
|
|
1694
|
+
durationMs: performance.now() - startTime
|
|
1695
|
+
};
|
|
1696
|
+
}
|
|
1697
|
+
}
|
|
1698
|
+
async has(key) {
|
|
1699
|
+
const ns = await this.ensureConnected();
|
|
1700
|
+
try {
|
|
1701
|
+
const result = await ns.fetch([key]);
|
|
1702
|
+
return !!result.records[key];
|
|
1703
|
+
} catch {
|
|
1704
|
+
return false;
|
|
1705
|
+
}
|
|
1706
|
+
}
|
|
1707
|
+
async delete(key) {
|
|
1708
|
+
this.incrementMetric("deletes");
|
|
1709
|
+
const ns = await this.ensureConnected();
|
|
1710
|
+
try {
|
|
1711
|
+
await ns.deleteOne(key);
|
|
1712
|
+
return true;
|
|
1713
|
+
} catch {
|
|
1714
|
+
return false;
|
|
1715
|
+
}
|
|
1716
|
+
}
|
|
1717
|
+
async clear() {
|
|
1718
|
+
const ns = await this.ensureConnected();
|
|
1719
|
+
await ns.deleteAll();
|
|
1720
|
+
}
|
|
1721
|
+
async size() {
|
|
1722
|
+
if (!this.index) {
|
|
1723
|
+
await this.connect();
|
|
1724
|
+
}
|
|
1725
|
+
try {
|
|
1726
|
+
const stats = await this.index.describeIndexStats();
|
|
1727
|
+
return stats.namespaces[this.namespace]?.recordCount ?? 0;
|
|
1728
|
+
} catch {
|
|
1729
|
+
return 0;
|
|
1730
|
+
}
|
|
1731
|
+
}
|
|
1732
|
+
async keys() {
|
|
1733
|
+
const ns = await this.ensureConnected();
|
|
1734
|
+
try {
|
|
1735
|
+
const result = await ns.listPaginated({ limit: 1e4 });
|
|
1736
|
+
return result.vectors.map((v) => v.id);
|
|
1737
|
+
} catch {
|
|
1738
|
+
return [];
|
|
1739
|
+
}
|
|
1740
|
+
}
|
|
1741
|
+
async query(vector, options) {
|
|
1742
|
+
const startTime = performance.now();
|
|
1743
|
+
const ns = await this.ensureConnected();
|
|
1744
|
+
const queryOptions = {
|
|
1745
|
+
vector,
|
|
1746
|
+
topK: options?.topK ?? 10,
|
|
1747
|
+
includeMetadata: true,
|
|
1748
|
+
includeValues: options?.includeEmbedding ?? false
|
|
1749
|
+
};
|
|
1750
|
+
if (options?.filter) {
|
|
1751
|
+
queryOptions.filter = options.filter;
|
|
1752
|
+
}
|
|
1753
|
+
try {
|
|
1754
|
+
const result = await ns.query(queryOptions);
|
|
1755
|
+
const entries = [];
|
|
1756
|
+
for (const match of result.matches) {
|
|
1757
|
+
if (options?.minSimilarity && match.score < options.minSimilarity) {
|
|
1758
|
+
continue;
|
|
1759
|
+
}
|
|
1760
|
+
const metadata = match.metadata;
|
|
1761
|
+
if (metadata?.entryData) {
|
|
1762
|
+
try {
|
|
1763
|
+
const entry = JSON.parse(metadata.entryData);
|
|
1764
|
+
if (options?.includeEmbedding && match.values) {
|
|
1765
|
+
entry.embedding = match.values;
|
|
1766
|
+
}
|
|
1767
|
+
entries.push({
|
|
1768
|
+
...entry,
|
|
1769
|
+
score: match.score
|
|
1770
|
+
});
|
|
1771
|
+
} catch {
|
|
1772
|
+
}
|
|
1773
|
+
}
|
|
1774
|
+
}
|
|
1775
|
+
return {
|
|
1776
|
+
entries,
|
|
1777
|
+
durationMs: performance.now() - startTime
|
|
1778
|
+
};
|
|
1779
|
+
} catch (error) {
|
|
1780
|
+
return {
|
|
1781
|
+
entries: [],
|
|
1782
|
+
durationMs: performance.now() - startTime
|
|
1783
|
+
};
|
|
1784
|
+
}
|
|
1785
|
+
}
|
|
1786
|
+
async checkHealth() {
|
|
1787
|
+
const startTime = performance.now();
|
|
1788
|
+
try {
|
|
1789
|
+
if (!this.index) {
|
|
1790
|
+
await this.connect();
|
|
1791
|
+
}
|
|
1792
|
+
await this.index.describeIndexStats();
|
|
1793
|
+
return {
|
|
1794
|
+
healthy: true,
|
|
1795
|
+
latencyMs: performance.now() - startTime,
|
|
1796
|
+
lastCheck: now()
|
|
1797
|
+
};
|
|
1798
|
+
} catch (error) {
|
|
1799
|
+
return {
|
|
1800
|
+
healthy: false,
|
|
1801
|
+
latencyMs: performance.now() - startTime,
|
|
1802
|
+
lastCheck: now(),
|
|
1803
|
+
error: error.message
|
|
1804
|
+
};
|
|
1805
|
+
}
|
|
1806
|
+
}
|
|
1807
|
+
close() {
|
|
1808
|
+
this.client = null;
|
|
1809
|
+
this.index = null;
|
|
1810
|
+
this.ns = null;
|
|
1811
|
+
this.connected = false;
|
|
1812
|
+
return Promise.resolve();
|
|
1813
|
+
}
|
|
1814
|
+
/**
|
|
1815
|
+
* Check if connected to Pinecone
|
|
1816
|
+
*/
|
|
1817
|
+
isConnected() {
|
|
1818
|
+
return this.connected;
|
|
1819
|
+
}
|
|
1820
|
+
/**
|
|
1821
|
+
* Get index stats
|
|
1822
|
+
*/
|
|
1823
|
+
async getIndexStats() {
|
|
1824
|
+
if (!this.index) {
|
|
1825
|
+
await this.connect();
|
|
1826
|
+
}
|
|
1827
|
+
try {
|
|
1828
|
+
return await this.index.describeIndexStats();
|
|
1829
|
+
} catch {
|
|
1830
|
+
return null;
|
|
1831
|
+
}
|
|
1832
|
+
}
|
|
1833
|
+
};
|
|
1834
|
+
function createPineconeCacheStore(config) {
|
|
1835
|
+
return new PineconeCacheStore(config);
|
|
1836
|
+
}
|
|
1837
|
+
|
|
1838
|
+
// src/strategies/BaseMatchStrategy.ts
|
|
1839
|
+
var BaseMatchStrategy = class {
|
|
1840
|
+
};
|
|
1841
|
+
|
|
1842
|
+
// src/strategies/ExactMatchStrategy.ts
|
|
1843
|
+
var DEFAULT_CONFIG6 = {
|
|
1844
|
+
normalizeWhitespace: true,
|
|
1845
|
+
hashFields: ["model", "messages"]
|
|
1846
|
+
};
|
|
1847
|
+
var ExactMatchStrategy = class extends BaseMatchStrategy {
|
|
1848
|
+
name = "exact";
|
|
1849
|
+
config;
|
|
1850
|
+
constructor(config) {
|
|
1851
|
+
super();
|
|
1852
|
+
this.config = { ...DEFAULT_CONFIG6, ...config };
|
|
1853
|
+
}
|
|
1854
|
+
async match(request, store, _similarity, options) {
|
|
1855
|
+
const startTime = performance.now();
|
|
1856
|
+
const namespace = options?.namespace;
|
|
1857
|
+
const baseKey = generateCacheKey(request.model, request.messages, {
|
|
1858
|
+
normalizeWhitespace: this.config.normalizeWhitespace,
|
|
1859
|
+
includeTemperature: this.config.hashFields?.includes("temperature")
|
|
1860
|
+
});
|
|
1861
|
+
const tempSuffix = request.temperature !== void 0 ? `:t:${request.temperature}` : "";
|
|
1862
|
+
const key = namespace && namespace !== "default" ? `${baseKey}${tempSuffix}:ns:${namespace}` : `${baseKey}${tempSuffix}`;
|
|
1863
|
+
const entry = await store.get(key);
|
|
1864
|
+
if (entry) {
|
|
1865
|
+
if (namespace && entry.metadata.namespace && entry.metadata.namespace !== namespace) {
|
|
1866
|
+
return {
|
|
1867
|
+
hit: false,
|
|
1868
|
+
latencyMs: performance.now() - startTime,
|
|
1869
|
+
source: "miss"
|
|
1870
|
+
};
|
|
1871
|
+
}
|
|
1872
|
+
return {
|
|
1873
|
+
hit: true,
|
|
1874
|
+
entry,
|
|
1875
|
+
similarity: 1,
|
|
1876
|
+
// Exact match = 100% similarity
|
|
1877
|
+
latencyMs: performance.now() - startTime,
|
|
1878
|
+
source: "exact"
|
|
1879
|
+
};
|
|
1880
|
+
}
|
|
1881
|
+
return {
|
|
1882
|
+
hit: false,
|
|
1883
|
+
latencyMs: performance.now() - startTime,
|
|
1884
|
+
source: "miss"
|
|
1885
|
+
};
|
|
1886
|
+
}
|
|
1887
|
+
};
|
|
1888
|
+
function createExactMatchStrategy(config) {
|
|
1889
|
+
return new ExactMatchStrategy(config);
|
|
1890
|
+
}
|
|
1891
|
+
|
|
1892
|
+
// src/strategies/SemanticMatchStrategy.ts
|
|
1893
|
+
var DEFAULT_CONFIG7 = {
|
|
1894
|
+
threshold: 0.92,
|
|
1895
|
+
matchModel: true,
|
|
1896
|
+
topK: 5
|
|
1897
|
+
};
|
|
1898
|
+
var SemanticMatchStrategy = class extends BaseMatchStrategy {
|
|
1899
|
+
name = "semantic";
|
|
1900
|
+
config;
|
|
1901
|
+
constructor(config) {
|
|
1902
|
+
super();
|
|
1903
|
+
this.config = { ...DEFAULT_CONFIG7, ...config };
|
|
1904
|
+
}
|
|
1905
|
+
async match(request, store, similarity, options) {
|
|
1906
|
+
const startTime = performance.now();
|
|
1907
|
+
if (!similarity) {
|
|
1908
|
+
return {
|
|
1909
|
+
hit: false,
|
|
1910
|
+
latencyMs: performance.now() - startTime,
|
|
1911
|
+
source: "miss"
|
|
1912
|
+
};
|
|
1913
|
+
}
|
|
1914
|
+
const userMessage = extractUserMessage(request.messages);
|
|
1915
|
+
if (!userMessage) {
|
|
1916
|
+
return {
|
|
1917
|
+
hit: false,
|
|
1918
|
+
latencyMs: performance.now() - startTime,
|
|
1919
|
+
source: "miss"
|
|
1920
|
+
};
|
|
1921
|
+
}
|
|
1922
|
+
try {
|
|
1923
|
+
const queryEmbedding = await similarity.embed(userMessage);
|
|
1924
|
+
const threshold = options?.threshold ?? this.config.threshold ?? 0.92;
|
|
1925
|
+
const topK = options?.topK ?? this.config.topK ?? 5;
|
|
1926
|
+
const results = await store.query(queryEmbedding, {
|
|
1927
|
+
topK,
|
|
1928
|
+
minSimilarity: threshold,
|
|
1929
|
+
namespace: options?.namespace
|
|
1930
|
+
});
|
|
1931
|
+
if (results.entries.length > 0) {
|
|
1932
|
+
let bestMatch = results.entries[0];
|
|
1933
|
+
if (this.config.matchModel) {
|
|
1934
|
+
const modelMatch = results.entries.find(
|
|
1935
|
+
(e) => e.request.model === request.model
|
|
1936
|
+
);
|
|
1937
|
+
if (modelMatch) {
|
|
1938
|
+
bestMatch = modelMatch;
|
|
1939
|
+
}
|
|
1940
|
+
}
|
|
1941
|
+
if (bestMatch && bestMatch.score >= threshold) {
|
|
1942
|
+
return {
|
|
1943
|
+
hit: true,
|
|
1944
|
+
entry: bestMatch,
|
|
1945
|
+
similarity: bestMatch.score,
|
|
1946
|
+
latencyMs: performance.now() - startTime,
|
|
1947
|
+
source: "semantic"
|
|
1948
|
+
};
|
|
1949
|
+
}
|
|
1950
|
+
}
|
|
1951
|
+
return {
|
|
1952
|
+
hit: false,
|
|
1953
|
+
latencyMs: performance.now() - startTime,
|
|
1954
|
+
source: "miss"
|
|
1955
|
+
};
|
|
1956
|
+
} catch (error) {
|
|
1957
|
+
console.error("Semantic match error:", error);
|
|
1958
|
+
return {
|
|
1959
|
+
hit: false,
|
|
1960
|
+
latencyMs: performance.now() - startTime,
|
|
1961
|
+
source: "miss"
|
|
1962
|
+
};
|
|
1963
|
+
}
|
|
1964
|
+
}
|
|
1965
|
+
};
|
|
1966
|
+
function createSemanticMatchStrategy(config) {
|
|
1967
|
+
return new SemanticMatchStrategy(config);
|
|
1968
|
+
}
|
|
1969
|
+
|
|
1970
|
+
// src/strategies/HybridMatchStrategy.ts
|
|
1971
|
+
var DEFAULT_CONFIG8 = {
|
|
1972
|
+
exact: {
|
|
1973
|
+
normalizeWhitespace: true,
|
|
1974
|
+
hashFields: ["model", "messages"]
|
|
1975
|
+
},
|
|
1976
|
+
semantic: {
|
|
1977
|
+
threshold: 0.92,
|
|
1978
|
+
matchModel: true,
|
|
1979
|
+
topK: 5
|
|
1980
|
+
}
|
|
1981
|
+
};
|
|
1982
|
+
var HybridMatchStrategy = class extends BaseMatchStrategy {
|
|
1983
|
+
name = "hybrid";
|
|
1984
|
+
exact;
|
|
1985
|
+
semantic;
|
|
1986
|
+
config;
|
|
1987
|
+
constructor(config) {
|
|
1988
|
+
super();
|
|
1989
|
+
this.config = { ...DEFAULT_CONFIG8, ...config };
|
|
1990
|
+
this.exact = new ExactMatchStrategy(this.config.exact);
|
|
1991
|
+
this.semantic = new SemanticMatchStrategy(this.config.semantic);
|
|
1992
|
+
}
|
|
1993
|
+
async match(request, store, similarity, options) {
|
|
1994
|
+
const startTime = performance.now();
|
|
1995
|
+
if (this.shouldUseExactOnly(request)) {
|
|
1996
|
+
const result = await this.exact.match(
|
|
1997
|
+
request,
|
|
1998
|
+
store,
|
|
1999
|
+
similarity,
|
|
2000
|
+
options
|
|
2001
|
+
);
|
|
2002
|
+
return {
|
|
2003
|
+
...result,
|
|
2004
|
+
latencyMs: performance.now() - startTime
|
|
2005
|
+
};
|
|
2006
|
+
}
|
|
2007
|
+
const exactResult = await this.exact.match(
|
|
2008
|
+
request,
|
|
2009
|
+
store,
|
|
2010
|
+
similarity,
|
|
2011
|
+
options
|
|
2012
|
+
);
|
|
2013
|
+
if (exactResult.hit) {
|
|
2014
|
+
return {
|
|
2015
|
+
...exactResult,
|
|
2016
|
+
latencyMs: performance.now() - startTime
|
|
2017
|
+
};
|
|
2018
|
+
}
|
|
2019
|
+
if (!this.shouldUseSemantic(request)) {
|
|
2020
|
+
return {
|
|
2021
|
+
hit: false,
|
|
2022
|
+
latencyMs: performance.now() - startTime,
|
|
2023
|
+
source: "miss"
|
|
2024
|
+
};
|
|
2025
|
+
}
|
|
2026
|
+
if (similarity) {
|
|
2027
|
+
const semanticResult = await this.semantic.match(
|
|
2028
|
+
request,
|
|
2029
|
+
store,
|
|
2030
|
+
similarity,
|
|
2031
|
+
options
|
|
2032
|
+
);
|
|
2033
|
+
return {
|
|
2034
|
+
...semanticResult,
|
|
2035
|
+
latencyMs: performance.now() - startTime
|
|
2036
|
+
};
|
|
2037
|
+
}
|
|
2038
|
+
return {
|
|
2039
|
+
hit: false,
|
|
2040
|
+
latencyMs: performance.now() - startTime,
|
|
2041
|
+
source: "miss"
|
|
2042
|
+
};
|
|
2043
|
+
}
|
|
2044
|
+
/**
|
|
2045
|
+
* Check if request should use exact-only matching
|
|
2046
|
+
*/
|
|
2047
|
+
shouldUseExactOnly(request) {
|
|
2048
|
+
if (!this.config.exactOnlyPatterns) return false;
|
|
2049
|
+
const userMessage = this.extractUserMessage(request);
|
|
2050
|
+
return this.config.exactOnlyPatterns.some(
|
|
2051
|
+
(pattern) => pattern.test(userMessage)
|
|
2052
|
+
);
|
|
2053
|
+
}
|
|
2054
|
+
/**
|
|
2055
|
+
* Check if semantic matching should be used
|
|
2056
|
+
*/
|
|
2057
|
+
shouldUseSemantic(request) {
|
|
2058
|
+
if (!this.config.semanticPatterns) return true;
|
|
2059
|
+
const userMessage = this.extractUserMessage(request);
|
|
2060
|
+
return this.config.semanticPatterns.some(
|
|
2061
|
+
(pattern) => pattern.test(userMessage)
|
|
2062
|
+
);
|
|
2063
|
+
}
|
|
2064
|
+
/**
|
|
2065
|
+
* Extract user message from request
|
|
2066
|
+
*/
|
|
2067
|
+
extractUserMessage(request) {
|
|
2068
|
+
for (let i = request.messages.length - 1; i >= 0; i--) {
|
|
2069
|
+
if (request.messages[i].role === "user") {
|
|
2070
|
+
return request.messages[i].content;
|
|
2071
|
+
}
|
|
2072
|
+
}
|
|
2073
|
+
return "";
|
|
2074
|
+
}
|
|
2075
|
+
};
|
|
2076
|
+
function createHybridMatchStrategy(config) {
|
|
2077
|
+
return new HybridMatchStrategy(config);
|
|
2078
|
+
}
|
|
2079
|
+
|
|
2080
|
+
// src/similarity/SimilarityEngine.ts
|
|
2081
|
+
var SimilarityEngine = class {
|
|
2082
|
+
provider;
|
|
2083
|
+
metric;
|
|
2084
|
+
embeddingCache;
|
|
2085
|
+
maxCacheSize;
|
|
2086
|
+
constructor(config) {
|
|
2087
|
+
this.provider = config.provider;
|
|
2088
|
+
this.metric = config.metric ?? "cosine";
|
|
2089
|
+
this.maxCacheSize = config.maxCacheSize ?? 1e4;
|
|
2090
|
+
if (config.cacheEmbeddings) {
|
|
2091
|
+
this.embeddingCache = /* @__PURE__ */ new Map();
|
|
2092
|
+
}
|
|
2093
|
+
}
|
|
2094
|
+
/**
|
|
2095
|
+
* Generate embedding for text
|
|
2096
|
+
*/
|
|
2097
|
+
async embed(text) {
|
|
2098
|
+
if (this.embeddingCache?.has(text)) {
|
|
2099
|
+
return this.embeddingCache.get(text);
|
|
2100
|
+
}
|
|
2101
|
+
const embedding = await this.provider.embed(text);
|
|
2102
|
+
if (this.embeddingCache) {
|
|
2103
|
+
if (this.embeddingCache.size >= this.maxCacheSize) {
|
|
2104
|
+
const firstKey = this.embeddingCache.keys().next().value;
|
|
2105
|
+
if (firstKey) {
|
|
2106
|
+
this.embeddingCache.delete(firstKey);
|
|
2107
|
+
}
|
|
2108
|
+
}
|
|
2109
|
+
this.embeddingCache.set(text, embedding);
|
|
2110
|
+
}
|
|
2111
|
+
return embedding;
|
|
2112
|
+
}
|
|
2113
|
+
/**
|
|
2114
|
+
* Generate embeddings for multiple texts
|
|
2115
|
+
*/
|
|
2116
|
+
async embedBatch(texts) {
|
|
2117
|
+
if (this.provider.embedBatch) {
|
|
2118
|
+
return this.provider.embedBatch(texts);
|
|
2119
|
+
}
|
|
2120
|
+
return Promise.all(texts.map((text) => this.embed(text)));
|
|
2121
|
+
}
|
|
2122
|
+
/**
|
|
2123
|
+
* Compute similarity between two vectors
|
|
2124
|
+
*/
|
|
2125
|
+
computeSimilarity(a, b) {
|
|
2126
|
+
switch (this.metric) {
|
|
2127
|
+
case "cosine":
|
|
2128
|
+
return this.cosineSimilarity(a, b);
|
|
2129
|
+
case "euclidean": {
|
|
2130
|
+
const dist = this.euclideanDistance(a, b);
|
|
2131
|
+
return 1 / (1 + dist);
|
|
2132
|
+
}
|
|
2133
|
+
case "dot_product":
|
|
2134
|
+
return this.dotProduct(a, b);
|
|
2135
|
+
default:
|
|
2136
|
+
return this.cosineSimilarity(a, b);
|
|
2137
|
+
}
|
|
2138
|
+
}
|
|
2139
|
+
/**
|
|
2140
|
+
* Find most similar vectors from candidates
|
|
2141
|
+
*/
|
|
2142
|
+
findMostSimilar(query, candidates, minSimilarity = 0) {
|
|
2143
|
+
const results = candidates.map((c) => ({
|
|
2144
|
+
id: c.id,
|
|
2145
|
+
similarity: this.computeSimilarity(query, c.vector)
|
|
2146
|
+
})).filter((r) => r.similarity >= minSimilarity).sort((a, b) => b.similarity - a.similarity);
|
|
2147
|
+
return results;
|
|
2148
|
+
}
|
|
2149
|
+
/**
|
|
2150
|
+
* Get embedding dimensions
|
|
2151
|
+
*/
|
|
2152
|
+
get dimensions() {
|
|
2153
|
+
return this.provider.dimensions ?? 0;
|
|
2154
|
+
}
|
|
2155
|
+
/**
|
|
2156
|
+
* Clear embedding cache
|
|
2157
|
+
*/
|
|
2158
|
+
clearCache() {
|
|
2159
|
+
this.embeddingCache?.clear();
|
|
2160
|
+
}
|
|
2161
|
+
/**
|
|
2162
|
+
* Get cache statistics
|
|
2163
|
+
*/
|
|
2164
|
+
getCacheStats() {
|
|
2165
|
+
if (!this.embeddingCache) return null;
|
|
2166
|
+
return {
|
|
2167
|
+
size: this.embeddingCache.size,
|
|
2168
|
+
maxSize: this.maxCacheSize
|
|
2169
|
+
};
|
|
2170
|
+
}
|
|
2171
|
+
cosineSimilarity(a, b) {
|
|
2172
|
+
if (a.length !== b.length) return 0;
|
|
2173
|
+
let dotProduct2 = 0;
|
|
2174
|
+
let normA = 0;
|
|
2175
|
+
let normB = 0;
|
|
2176
|
+
for (let i = 0; i < a.length; i++) {
|
|
2177
|
+
dotProduct2 += a[i] * b[i];
|
|
2178
|
+
normA += a[i] * a[i];
|
|
2179
|
+
normB += b[i] * b[i];
|
|
2180
|
+
}
|
|
2181
|
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
2182
|
+
if (denominator === 0) return 0;
|
|
2183
|
+
return dotProduct2 / denominator;
|
|
2184
|
+
}
|
|
2185
|
+
euclideanDistance(a, b) {
|
|
2186
|
+
if (a.length !== b.length) return Infinity;
|
|
2187
|
+
let sum = 0;
|
|
2188
|
+
for (let i = 0; i < a.length; i++) {
|
|
2189
|
+
const diff = a[i] - b[i];
|
|
2190
|
+
sum += diff * diff;
|
|
2191
|
+
}
|
|
2192
|
+
return Math.sqrt(sum);
|
|
2193
|
+
}
|
|
2194
|
+
dotProduct(a, b) {
|
|
2195
|
+
if (a.length !== b.length) return 0;
|
|
2196
|
+
let sum = 0;
|
|
2197
|
+
for (let i = 0; i < a.length; i++) {
|
|
2198
|
+
sum += a[i] * b[i];
|
|
2199
|
+
}
|
|
2200
|
+
return sum;
|
|
2201
|
+
}
|
|
2202
|
+
};
|
|
2203
|
+
function createSimilarityEngine(config) {
|
|
2204
|
+
return new SimilarityEngine(config);
|
|
2205
|
+
}
|
|
2206
|
+
|
|
2207
|
+
// src/streaming/ChunkBuffer.ts
|
|
2208
|
+
var DEFAULT_CONFIG9 = {
|
|
2209
|
+
maxChunks: 100,
|
|
2210
|
+
maxBytes: 64 * 1024,
|
|
2211
|
+
// 64KB
|
|
2212
|
+
flushIntervalMs: 1e3,
|
|
2213
|
+
preserveOrder: true
|
|
2214
|
+
};
|
|
2215
|
+
var ChunkBuffer = class {
|
|
2216
|
+
chunks = [];
|
|
2217
|
+
currentBytes = 0;
|
|
2218
|
+
config;
|
|
2219
|
+
flushTimer = null;
|
|
2220
|
+
onFlush;
|
|
2221
|
+
constructor(config, onFlush) {
|
|
2222
|
+
this.config = { ...DEFAULT_CONFIG9, ...config };
|
|
2223
|
+
this.onFlush = onFlush;
|
|
2224
|
+
if (this.config.flushIntervalMs > 0) {
|
|
2225
|
+
this.startFlushTimer();
|
|
2226
|
+
}
|
|
2227
|
+
}
|
|
2228
|
+
/**
|
|
2229
|
+
* Add a chunk to the buffer
|
|
2230
|
+
*/
|
|
2231
|
+
add(chunk) {
|
|
2232
|
+
if (this.config.preserveOrder) {
|
|
2233
|
+
if (chunk.index === void 0) {
|
|
2234
|
+
chunk.index = this.chunks.length;
|
|
2235
|
+
}
|
|
2236
|
+
}
|
|
2237
|
+
this.chunks.push(chunk);
|
|
2238
|
+
this.currentBytes += this.estimateChunkSize(chunk);
|
|
2239
|
+
if (this.shouldFlush()) {
|
|
2240
|
+
this.flush();
|
|
2241
|
+
}
|
|
2242
|
+
}
|
|
2243
|
+
/**
|
|
2244
|
+
* Add multiple chunks
|
|
2245
|
+
*/
|
|
2246
|
+
addAll(chunks) {
|
|
2247
|
+
for (const chunk of chunks) {
|
|
2248
|
+
this.add(chunk);
|
|
2249
|
+
}
|
|
2250
|
+
}
|
|
2251
|
+
/**
|
|
2252
|
+
* Flush all buffered chunks
|
|
2253
|
+
*/
|
|
2254
|
+
flush() {
|
|
2255
|
+
const flushed = this.chunks;
|
|
2256
|
+
if (this.config.preserveOrder) {
|
|
2257
|
+
flushed.sort((a, b) => a.index - b.index);
|
|
2258
|
+
}
|
|
2259
|
+
this.chunks = [];
|
|
2260
|
+
this.currentBytes = 0;
|
|
2261
|
+
if (this.onFlush && flushed.length > 0) {
|
|
2262
|
+
this.onFlush(flushed);
|
|
2263
|
+
}
|
|
2264
|
+
return flushed;
|
|
2265
|
+
}
|
|
2266
|
+
/**
|
|
2267
|
+
* Get current buffer size
|
|
2268
|
+
*/
|
|
2269
|
+
size() {
|
|
2270
|
+
return this.chunks.length;
|
|
2271
|
+
}
|
|
2272
|
+
/**
|
|
2273
|
+
* Get current buffer bytes
|
|
2274
|
+
*/
|
|
2275
|
+
bytes() {
|
|
2276
|
+
return this.currentBytes;
|
|
2277
|
+
}
|
|
2278
|
+
/**
|
|
2279
|
+
* Check if buffer is empty
|
|
2280
|
+
*/
|
|
2281
|
+
isEmpty() {
|
|
2282
|
+
return this.chunks.length === 0;
|
|
2283
|
+
}
|
|
2284
|
+
/**
|
|
2285
|
+
* Peek at buffered chunks without flushing
|
|
2286
|
+
*/
|
|
2287
|
+
peek() {
|
|
2288
|
+
return this.chunks;
|
|
2289
|
+
}
|
|
2290
|
+
/**
|
|
2291
|
+
* Clear the buffer without flushing
|
|
2292
|
+
*/
|
|
2293
|
+
clear() {
|
|
2294
|
+
this.chunks = [];
|
|
2295
|
+
this.currentBytes = 0;
|
|
2296
|
+
}
|
|
2297
|
+
/**
|
|
2298
|
+
* Stop the flush timer
|
|
2299
|
+
*/
|
|
2300
|
+
stop() {
|
|
2301
|
+
if (this.flushTimer) {
|
|
2302
|
+
clearInterval(this.flushTimer);
|
|
2303
|
+
this.flushTimer = null;
|
|
2304
|
+
}
|
|
2305
|
+
}
|
|
2306
|
+
/**
|
|
2307
|
+
* Destroy the buffer
|
|
2308
|
+
*/
|
|
2309
|
+
destroy() {
|
|
2310
|
+
this.stop();
|
|
2311
|
+
this.clear();
|
|
2312
|
+
}
|
|
2313
|
+
shouldFlush() {
|
|
2314
|
+
return this.chunks.length >= this.config.maxChunks || this.currentBytes >= this.config.maxBytes;
|
|
2315
|
+
}
|
|
2316
|
+
startFlushTimer() {
|
|
2317
|
+
this.flushTimer = setInterval(() => {
|
|
2318
|
+
if (!this.isEmpty()) {
|
|
2319
|
+
this.flush();
|
|
2320
|
+
}
|
|
2321
|
+
}, this.config.flushIntervalMs);
|
|
2322
|
+
}
|
|
2323
|
+
estimateChunkSize(chunk) {
|
|
2324
|
+
let size = 0;
|
|
2325
|
+
if (chunk.content) {
|
|
2326
|
+
size += chunk.content.length * 2;
|
|
2327
|
+
}
|
|
2328
|
+
if (chunk.toolCall) {
|
|
2329
|
+
size += JSON.stringify(chunk.toolCall).length;
|
|
2330
|
+
}
|
|
2331
|
+
if (chunk.toolResult) {
|
|
2332
|
+
size += JSON.stringify(chunk.toolResult).length;
|
|
2333
|
+
}
|
|
2334
|
+
if (chunk.metadata) {
|
|
2335
|
+
size += JSON.stringify(chunk.metadata).length;
|
|
2336
|
+
}
|
|
2337
|
+
return size + 50;
|
|
2338
|
+
}
|
|
2339
|
+
};
|
|
2340
|
+
function createChunkBuffer(config, onFlush) {
|
|
2341
|
+
return new ChunkBuffer(config, onFlush);
|
|
2342
|
+
}
|
|
2343
|
+
|
|
2344
|
+
// src/streaming/StreamRecorder.ts
|
|
2345
|
+
var DEFAULT_CONFIG10 = {
|
|
2346
|
+
buffer: {
|
|
2347
|
+
maxChunks: 100,
|
|
2348
|
+
maxBytes: 64 * 1024,
|
|
2349
|
+
flushIntervalMs: 0,
|
|
2350
|
+
// No auto-flush during recording
|
|
2351
|
+
preserveOrder: true
|
|
2352
|
+
},
|
|
2353
|
+
captureToolCalls: true,
|
|
2354
|
+
captureMetadata: true,
|
|
2355
|
+
maxDurationMs: 3e5,
|
|
2356
|
+
// 5 minutes
|
|
2357
|
+
maxChunks: 1e4
|
|
2358
|
+
};
|
|
2359
|
+
var StreamRecorder = class {
|
|
2360
|
+
config;
|
|
2361
|
+
buffer;
|
|
2362
|
+
recording = false;
|
|
2363
|
+
startTime = 0;
|
|
2364
|
+
model = "";
|
|
2365
|
+
messages = [];
|
|
2366
|
+
key = "";
|
|
2367
|
+
totalChars = 0;
|
|
2368
|
+
chunkIndex = 0;
|
|
2369
|
+
timeoutId = null;
|
|
2370
|
+
constructor(config) {
|
|
2371
|
+
this.config = {
|
|
2372
|
+
...DEFAULT_CONFIG10,
|
|
2373
|
+
...config,
|
|
2374
|
+
buffer: { ...DEFAULT_CONFIG10.buffer, ...config?.buffer }
|
|
2375
|
+
};
|
|
2376
|
+
this.buffer = new ChunkBuffer(this.config.buffer);
|
|
2377
|
+
}
|
|
2378
|
+
/**
|
|
2379
|
+
* Start recording a new stream
|
|
2380
|
+
*/
|
|
2381
|
+
start(model, messages, key) {
|
|
2382
|
+
if (this.recording) {
|
|
2383
|
+
throw new Error("Recording already in progress");
|
|
2384
|
+
}
|
|
2385
|
+
this.recording = true;
|
|
2386
|
+
this.startTime = now();
|
|
2387
|
+
this.model = model;
|
|
2388
|
+
this.messages = messages;
|
|
2389
|
+
this.key = key ?? generateId();
|
|
2390
|
+
this.totalChars = 0;
|
|
2391
|
+
this.chunkIndex = 0;
|
|
2392
|
+
this.buffer.clear();
|
|
2393
|
+
if (this.config.maxDurationMs > 0) {
|
|
2394
|
+
this.timeoutId = setTimeout(() => {
|
|
2395
|
+
if (this.recording) {
|
|
2396
|
+
this.abort("Recording exceeded maximum duration");
|
|
2397
|
+
}
|
|
2398
|
+
}, this.config.maxDurationMs);
|
|
2399
|
+
}
|
|
2400
|
+
}
|
|
2401
|
+
/**
|
|
2402
|
+
* Record a text chunk
|
|
2403
|
+
*/
|
|
2404
|
+
recordText(content, metadata) {
|
|
2405
|
+
this.recordChunk({
|
|
2406
|
+
type: "text",
|
|
2407
|
+
content,
|
|
2408
|
+
metadata: this.config.captureMetadata ? metadata : void 0,
|
|
2409
|
+
timestamp: now(),
|
|
2410
|
+
index: this.chunkIndex++
|
|
2411
|
+
});
|
|
2412
|
+
this.totalChars += content.length;
|
|
2413
|
+
}
|
|
2414
|
+
/**
|
|
2415
|
+
* Record a tool call chunk
|
|
2416
|
+
*/
|
|
2417
|
+
recordToolCall(id, name, args, metadata) {
|
|
2418
|
+
if (!this.config.captureToolCalls) return;
|
|
2419
|
+
this.recordChunk({
|
|
2420
|
+
type: "tool_call",
|
|
2421
|
+
toolCall: { id, name, arguments: args },
|
|
2422
|
+
metadata: this.config.captureMetadata ? metadata : void 0,
|
|
2423
|
+
timestamp: now(),
|
|
2424
|
+
index: this.chunkIndex++
|
|
2425
|
+
});
|
|
2426
|
+
}
|
|
2427
|
+
/**
|
|
2428
|
+
* Record a tool result chunk
|
|
2429
|
+
*/
|
|
2430
|
+
recordToolResult(callId, content, metadata) {
|
|
2431
|
+
if (!this.config.captureToolCalls) return;
|
|
2432
|
+
this.recordChunk({
|
|
2433
|
+
type: "tool_result",
|
|
2434
|
+
toolResult: { callId, content },
|
|
2435
|
+
metadata: this.config.captureMetadata ? metadata : void 0,
|
|
2436
|
+
timestamp: now(),
|
|
2437
|
+
index: this.chunkIndex++
|
|
2438
|
+
});
|
|
2439
|
+
}
|
|
2440
|
+
/**
|
|
2441
|
+
* Record metadata
|
|
2442
|
+
*/
|
|
2443
|
+
recordMetadata(metadata) {
|
|
2444
|
+
if (!this.config.captureMetadata) return;
|
|
2445
|
+
this.recordChunk({
|
|
2446
|
+
type: "metadata",
|
|
2447
|
+
metadata,
|
|
2448
|
+
timestamp: now(),
|
|
2449
|
+
index: this.chunkIndex++
|
|
2450
|
+
});
|
|
2451
|
+
}
|
|
2452
|
+
/**
|
|
2453
|
+
* Record a generic chunk
|
|
2454
|
+
*/
|
|
2455
|
+
recordChunk(chunk) {
|
|
2456
|
+
if (!this.recording) {
|
|
2457
|
+
throw new Error("Not currently recording");
|
|
2458
|
+
}
|
|
2459
|
+
if (chunk.index >= this.config.maxChunks) {
|
|
2460
|
+
throw new Error("Maximum chunks exceeded");
|
|
2461
|
+
}
|
|
2462
|
+
this.buffer.add(chunk);
|
|
2463
|
+
}
|
|
2464
|
+
/**
|
|
2465
|
+
* Complete the recording and return the recorded stream
|
|
2466
|
+
*/
|
|
2467
|
+
complete(tokenUsage) {
|
|
2468
|
+
if (!this.recording) {
|
|
2469
|
+
throw new Error("Not currently recording");
|
|
2470
|
+
}
|
|
2471
|
+
this.clearTimeout();
|
|
2472
|
+
const endTime = now();
|
|
2473
|
+
const chunks = this.buffer.flush();
|
|
2474
|
+
const stream = {
|
|
2475
|
+
id: generateId(),
|
|
2476
|
+
key: this.key,
|
|
2477
|
+
chunks,
|
|
2478
|
+
model: this.model,
|
|
2479
|
+
messages: this.messages,
|
|
2480
|
+
startTime: this.startTime,
|
|
2481
|
+
endTime,
|
|
2482
|
+
durationMs: endTime - this.startTime,
|
|
2483
|
+
totalChars: this.totalChars,
|
|
2484
|
+
tokenUsage,
|
|
2485
|
+
complete: true
|
|
2486
|
+
};
|
|
2487
|
+
this.reset();
|
|
2488
|
+
return stream;
|
|
2489
|
+
}
|
|
2490
|
+
/**
|
|
2491
|
+
* Abort the recording
|
|
2492
|
+
*/
|
|
2493
|
+
abort(reason) {
|
|
2494
|
+
if (!this.recording) {
|
|
2495
|
+
throw new Error("Not currently recording");
|
|
2496
|
+
}
|
|
2497
|
+
this.clearTimeout();
|
|
2498
|
+
const endTime = now();
|
|
2499
|
+
const chunks = this.buffer.flush();
|
|
2500
|
+
const stream = {
|
|
2501
|
+
id: generateId(),
|
|
2502
|
+
key: this.key,
|
|
2503
|
+
chunks,
|
|
2504
|
+
model: this.model,
|
|
2505
|
+
messages: this.messages,
|
|
2506
|
+
startTime: this.startTime,
|
|
2507
|
+
endTime,
|
|
2508
|
+
durationMs: endTime - this.startTime,
|
|
2509
|
+
totalChars: this.totalChars,
|
|
2510
|
+
complete: false,
|
|
2511
|
+
error: reason ?? "Recording aborted"
|
|
2512
|
+
};
|
|
2513
|
+
this.reset();
|
|
2514
|
+
return stream;
|
|
2515
|
+
}
|
|
2516
|
+
/**
|
|
2517
|
+
* Check if currently recording
|
|
2518
|
+
*/
|
|
2519
|
+
isRecording() {
|
|
2520
|
+
return this.recording;
|
|
2521
|
+
}
|
|
2522
|
+
/**
|
|
2523
|
+
* Get current chunk count
|
|
2524
|
+
*/
|
|
2525
|
+
getChunkCount() {
|
|
2526
|
+
return this.chunkIndex;
|
|
2527
|
+
}
|
|
2528
|
+
/**
|
|
2529
|
+
* Get current recording duration in ms
|
|
2530
|
+
*/
|
|
2531
|
+
getDuration() {
|
|
2532
|
+
if (!this.recording) return 0;
|
|
2533
|
+
return now() - this.startTime;
|
|
2534
|
+
}
|
|
2535
|
+
/**
|
|
2536
|
+
* Destroy the recorder
|
|
2537
|
+
*/
|
|
2538
|
+
destroy() {
|
|
2539
|
+
this.clearTimeout();
|
|
2540
|
+
this.buffer.destroy();
|
|
2541
|
+
this.reset();
|
|
2542
|
+
}
|
|
2543
|
+
reset() {
|
|
2544
|
+
this.recording = false;
|
|
2545
|
+
this.startTime = 0;
|
|
2546
|
+
this.model = "";
|
|
2547
|
+
this.messages = [];
|
|
2548
|
+
this.key = "";
|
|
2549
|
+
this.totalChars = 0;
|
|
2550
|
+
this.chunkIndex = 0;
|
|
2551
|
+
}
|
|
2552
|
+
clearTimeout() {
|
|
2553
|
+
if (this.timeoutId) {
|
|
2554
|
+
clearTimeout(this.timeoutId);
|
|
2555
|
+
this.timeoutId = null;
|
|
2556
|
+
}
|
|
2557
|
+
}
|
|
2558
|
+
};
|
|
2559
|
+
function createStreamRecorder(config) {
|
|
2560
|
+
return new StreamRecorder(config);
|
|
2561
|
+
}
|
|
2562
|
+
|
|
2563
|
+
// src/streaming/StreamReplayer.ts
|
|
2564
|
+
var DEFAULT_CONFIG11 = {
|
|
2565
|
+
speedMultiplier: 1,
|
|
2566
|
+
minDelayMs: 0,
|
|
2567
|
+
maxDelayMs: 100,
|
|
2568
|
+
simulateTiming: false,
|
|
2569
|
+
onChunk: () => {
|
|
2570
|
+
},
|
|
2571
|
+
onComplete: () => {
|
|
2572
|
+
},
|
|
2573
|
+
onError: () => {
|
|
2574
|
+
}
|
|
2575
|
+
};
|
|
2576
|
+
var StreamReplayer = class {
|
|
2577
|
+
config;
|
|
2578
|
+
abortController = null;
|
|
2579
|
+
constructor(config) {
|
|
2580
|
+
this.config = { ...DEFAULT_CONFIG11, ...config };
|
|
2581
|
+
}
|
|
2582
|
+
/**
|
|
2583
|
+
* Replay a recorded stream as an async iterable
|
|
2584
|
+
*/
|
|
2585
|
+
async *replay(stream) {
|
|
2586
|
+
this.abortController = new AbortController();
|
|
2587
|
+
const signal = this.abortController.signal;
|
|
2588
|
+
try {
|
|
2589
|
+
const chunks = [...stream.chunks].sort((a, b) => a.index - b.index);
|
|
2590
|
+
let lastTimestamp = chunks[0]?.timestamp ?? 0;
|
|
2591
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
2592
|
+
if (signal.aborted) {
|
|
2593
|
+
break;
|
|
2594
|
+
}
|
|
2595
|
+
const chunk = chunks[i];
|
|
2596
|
+
if (this.config.simulateTiming && i > 0) {
|
|
2597
|
+
const timeDiff = chunk.timestamp - lastTimestamp;
|
|
2598
|
+
const delay = Math.min(
|
|
2599
|
+
Math.max(
|
|
2600
|
+
timeDiff / this.config.speedMultiplier,
|
|
2601
|
+
this.config.minDelayMs
|
|
2602
|
+
),
|
|
2603
|
+
this.config.maxDelayMs
|
|
2604
|
+
);
|
|
2605
|
+
if (delay > 0) {
|
|
2606
|
+
await this.delay(delay, signal);
|
|
2607
|
+
}
|
|
2608
|
+
}
|
|
2609
|
+
lastTimestamp = chunk.timestamp;
|
|
2610
|
+
this.config.onChunk(chunk);
|
|
2611
|
+
yield chunk;
|
|
2612
|
+
}
|
|
2613
|
+
this.config.onComplete(stream);
|
|
2614
|
+
} catch (error) {
|
|
2615
|
+
if (error.name !== "AbortError") {
|
|
2616
|
+
this.config.onError(error);
|
|
2617
|
+
throw error;
|
|
2618
|
+
}
|
|
2619
|
+
} finally {
|
|
2620
|
+
this.abortController = null;
|
|
2621
|
+
}
|
|
2622
|
+
}
|
|
2623
|
+
/**
|
|
2624
|
+
* Replay as a full async iterable of text content only
|
|
2625
|
+
*/
|
|
2626
|
+
async *replayText(stream) {
|
|
2627
|
+
for await (const chunk of this.replay(stream)) {
|
|
2628
|
+
if (chunk.type === "text" && chunk.content) {
|
|
2629
|
+
yield chunk.content;
|
|
2630
|
+
}
|
|
2631
|
+
}
|
|
2632
|
+
}
|
|
2633
|
+
/**
|
|
2634
|
+
* Replay synchronously (no timing simulation)
|
|
2635
|
+
*/
|
|
2636
|
+
*replaySync(stream) {
|
|
2637
|
+
const chunks = [...stream.chunks].sort((a, b) => a.index - b.index);
|
|
2638
|
+
for (const chunk of chunks) {
|
|
2639
|
+
this.config.onChunk(chunk);
|
|
2640
|
+
yield chunk;
|
|
2641
|
+
}
|
|
2642
|
+
this.config.onComplete(stream);
|
|
2643
|
+
}
|
|
2644
|
+
/**
|
|
2645
|
+
* Get all chunks at once
|
|
2646
|
+
*/
|
|
2647
|
+
getAllChunks(stream) {
|
|
2648
|
+
return [...stream.chunks].sort((a, b) => a.index - b.index);
|
|
2649
|
+
}
|
|
2650
|
+
/**
|
|
2651
|
+
* Get full text content from stream
|
|
2652
|
+
*/
|
|
2653
|
+
getFullText(stream) {
|
|
2654
|
+
return stream.chunks.filter((c) => c.type === "text" && c.content).sort((a, b) => a.index - b.index).map((c) => c.content).join("");
|
|
2655
|
+
}
|
|
2656
|
+
/**
|
|
2657
|
+
* Get tool calls from stream
|
|
2658
|
+
*/
|
|
2659
|
+
getToolCalls(stream) {
|
|
2660
|
+
return stream.chunks.filter((c) => c.type === "tool_call" && c.toolCall).sort((a, b) => a.index - b.index).map((c) => c.toolCall);
|
|
2661
|
+
}
|
|
2662
|
+
/**
|
|
2663
|
+
* Stop current replay
|
|
2664
|
+
*/
|
|
2665
|
+
stop() {
|
|
2666
|
+
if (this.abortController) {
|
|
2667
|
+
this.abortController.abort();
|
|
2668
|
+
}
|
|
2669
|
+
}
|
|
2670
|
+
/**
|
|
2671
|
+
* Update configuration
|
|
2672
|
+
*/
|
|
2673
|
+
configure(config) {
|
|
2674
|
+
this.config = { ...this.config, ...config };
|
|
2675
|
+
}
|
|
2676
|
+
delay(ms, signal) {
|
|
2677
|
+
return new Promise((resolve, reject) => {
|
|
2678
|
+
const timeoutId = setTimeout(resolve, ms);
|
|
2679
|
+
signal.addEventListener("abort", () => {
|
|
2680
|
+
clearTimeout(timeoutId);
|
|
2681
|
+
reject(new DOMException("Aborted", "AbortError"));
|
|
2682
|
+
});
|
|
2683
|
+
});
|
|
2684
|
+
}
|
|
2685
|
+
};
|
|
2686
|
+
function createStreamReplayer(config) {
|
|
2687
|
+
return new StreamReplayer(config);
|
|
2688
|
+
}
|
|
2689
|
+
var DEFAULT_CONFIG12 = {
|
|
2690
|
+
recorder: {},
|
|
2691
|
+
replayer: {},
|
|
2692
|
+
cacheIncomplete: false,
|
|
2693
|
+
minLengthToCache: 10,
|
|
2694
|
+
streamTtl: 3600
|
|
2695
|
+
// 1 hour
|
|
2696
|
+
};
|
|
2697
|
+
var StreamCache = class extends EventEmitter2 {
|
|
2698
|
+
store;
|
|
2699
|
+
similarity;
|
|
2700
|
+
config;
|
|
2701
|
+
recorder;
|
|
2702
|
+
replayer;
|
|
2703
|
+
stats = {
|
|
2704
|
+
totalLookups: 0,
|
|
2705
|
+
totalHits: 0,
|
|
2706
|
+
totalMisses: 0,
|
|
2707
|
+
hitRate: 0,
|
|
2708
|
+
avgReplayLatencyMs: 0,
|
|
2709
|
+
totalStreamsCached: 0,
|
|
2710
|
+
totalBytesCached: 0,
|
|
2711
|
+
avgStreamDurationMs: 0
|
|
2712
|
+
};
|
|
2713
|
+
replayLatencies = [];
|
|
2714
|
+
streamDurations = [];
|
|
2715
|
+
constructor(store, config, similarity) {
|
|
2716
|
+
super();
|
|
2717
|
+
this.store = store;
|
|
2718
|
+
this.similarity = similarity;
|
|
2719
|
+
this.config = { ...DEFAULT_CONFIG12, ...config };
|
|
2720
|
+
this.recorder = new StreamRecorder(this.config.recorder);
|
|
2721
|
+
this.replayer = new StreamReplayer(this.config.replayer);
|
|
2722
|
+
}
|
|
2723
|
+
/**
|
|
2724
|
+
* Look up a cached stream
|
|
2725
|
+
*/
|
|
2726
|
+
async lookup(model, messages) {
|
|
2727
|
+
const startTime = performance.now();
|
|
2728
|
+
this.stats.totalLookups++;
|
|
2729
|
+
const key = this.generateStreamKey(model, messages);
|
|
2730
|
+
try {
|
|
2731
|
+
const entry = await this.store.get(key);
|
|
2732
|
+
if (entry) {
|
|
2733
|
+
const stream = this.deserializeStream(entry.response.content);
|
|
2734
|
+
if (stream) {
|
|
2735
|
+
this.stats.totalHits++;
|
|
2736
|
+
this.updateHitRate();
|
|
2737
|
+
const latencyMs2 = performance.now() - startTime;
|
|
2738
|
+
this.replayLatencies.push(latencyMs2);
|
|
2739
|
+
const result = {
|
|
2740
|
+
hit: true,
|
|
2741
|
+
stream,
|
|
2742
|
+
similarity: 1,
|
|
2743
|
+
source: "exact",
|
|
2744
|
+
latencyMs: latencyMs2
|
|
2745
|
+
};
|
|
2746
|
+
this.emit("hit", result);
|
|
2747
|
+
return result;
|
|
2748
|
+
}
|
|
2749
|
+
}
|
|
2750
|
+
if (this.similarity) {
|
|
2751
|
+
const userMessage = messages.find((m) => m.role === "user")?.content;
|
|
2752
|
+
if (userMessage) {
|
|
2753
|
+
const embedding = await this.similarity.embed(userMessage);
|
|
2754
|
+
const results = await this.store.query(embedding, {
|
|
2755
|
+
topK: 1,
|
|
2756
|
+
minSimilarity: 0.92
|
|
2757
|
+
});
|
|
2758
|
+
if (results.entries.length > 0) {
|
|
2759
|
+
const entry2 = results.entries[0];
|
|
2760
|
+
const stream = this.deserializeStream(entry2.response.content);
|
|
2761
|
+
if (stream && entry2.score >= 0.92) {
|
|
2762
|
+
this.stats.totalHits++;
|
|
2763
|
+
this.updateHitRate();
|
|
2764
|
+
const latencyMs2 = performance.now() - startTime;
|
|
2765
|
+
this.replayLatencies.push(latencyMs2);
|
|
2766
|
+
const result = {
|
|
2767
|
+
hit: true,
|
|
2768
|
+
stream,
|
|
2769
|
+
similarity: entry2.score,
|
|
2770
|
+
source: "semantic",
|
|
2771
|
+
latencyMs: latencyMs2
|
|
2772
|
+
};
|
|
2773
|
+
this.emit("hit", result);
|
|
2774
|
+
return result;
|
|
2775
|
+
}
|
|
2776
|
+
}
|
|
2777
|
+
}
|
|
2778
|
+
}
|
|
2779
|
+
this.stats.totalMisses++;
|
|
2780
|
+
this.updateHitRate();
|
|
2781
|
+
const latencyMs = performance.now() - startTime;
|
|
2782
|
+
this.emit("miss", key);
|
|
2783
|
+
return {
|
|
2784
|
+
hit: false,
|
|
2785
|
+
source: "miss",
|
|
2786
|
+
latencyMs
|
|
2787
|
+
};
|
|
2788
|
+
} catch (error) {
|
|
2789
|
+
this.emit("error", error);
|
|
2790
|
+
return {
|
|
2791
|
+
hit: false,
|
|
2792
|
+
source: "miss",
|
|
2793
|
+
latencyMs: performance.now() - startTime
|
|
2794
|
+
};
|
|
2795
|
+
}
|
|
2796
|
+
}
|
|
2797
|
+
/**
|
|
2798
|
+
* Cache a recorded stream
|
|
2799
|
+
*/
|
|
2800
|
+
async cache(stream, embedding) {
|
|
2801
|
+
if (!stream.complete && !this.config.cacheIncomplete) {
|
|
2802
|
+
return;
|
|
2803
|
+
}
|
|
2804
|
+
if (stream.totalChars < this.config.minLengthToCache) {
|
|
2805
|
+
return;
|
|
2806
|
+
}
|
|
2807
|
+
const key = this.generateStreamKey(stream.model, stream.messages);
|
|
2808
|
+
try {
|
|
2809
|
+
const cacheMessages = stream.messages.map((m) => ({
|
|
2810
|
+
role: m.role,
|
|
2811
|
+
content: m.content
|
|
2812
|
+
}));
|
|
2813
|
+
await this.store.set(key, {
|
|
2814
|
+
id: generateId(),
|
|
2815
|
+
key,
|
|
2816
|
+
request: {
|
|
2817
|
+
messages: cacheMessages,
|
|
2818
|
+
model: stream.model
|
|
2819
|
+
},
|
|
2820
|
+
response: {
|
|
2821
|
+
content: this.serializeStream(stream),
|
|
2822
|
+
model: stream.model,
|
|
2823
|
+
finishReason: stream.complete ? "stop" : "error",
|
|
2824
|
+
usage: {
|
|
2825
|
+
promptTokens: stream.tokenUsage?.prompt ?? 0,
|
|
2826
|
+
completionTokens: stream.tokenUsage?.completion ?? 0,
|
|
2827
|
+
totalTokens: stream.tokenUsage?.total ?? 0
|
|
2828
|
+
}
|
|
2829
|
+
},
|
|
2830
|
+
embedding,
|
|
2831
|
+
metadata: {
|
|
2832
|
+
createdAt: now(),
|
|
2833
|
+
accessedAt: now(),
|
|
2834
|
+
accessCount: 1,
|
|
2835
|
+
hitCount: 0,
|
|
2836
|
+
ttl: this.config.streamTtl
|
|
2837
|
+
}
|
|
2838
|
+
});
|
|
2839
|
+
this.stats.totalStreamsCached++;
|
|
2840
|
+
this.stats.totalBytesCached += this.estimateStreamSize(stream);
|
|
2841
|
+
this.streamDurations.push(stream.durationMs);
|
|
2842
|
+
this.updateAvgStreamDuration();
|
|
2843
|
+
this.emit("record", stream);
|
|
2844
|
+
} catch (error) {
|
|
2845
|
+
this.emit("error", error);
|
|
2846
|
+
}
|
|
2847
|
+
}
|
|
2848
|
+
/**
|
|
2849
|
+
* Wrap a streaming function with caching
|
|
2850
|
+
*/
|
|
2851
|
+
async *wrapStream(model, messages, streamFn, options) {
|
|
2852
|
+
const lookupResult = await this.lookup(model, messages);
|
|
2853
|
+
if (lookupResult.hit && lookupResult.stream) {
|
|
2854
|
+
for await (const chunk of this.replayer.replay(lookupResult.stream)) {
|
|
2855
|
+
yield { content: chunk.content };
|
|
2856
|
+
}
|
|
2857
|
+
return;
|
|
2858
|
+
}
|
|
2859
|
+
const key = this.generateStreamKey(model, messages);
|
|
2860
|
+
this.recorder.start(model, messages, key);
|
|
2861
|
+
try {
|
|
2862
|
+
for await (const chunk of streamFn()) {
|
|
2863
|
+
if (chunk.content) {
|
|
2864
|
+
this.recorder.recordText(chunk.content);
|
|
2865
|
+
}
|
|
2866
|
+
yield chunk;
|
|
2867
|
+
}
|
|
2868
|
+
const stream = this.recorder.complete();
|
|
2869
|
+
await this.cache(stream, options?.embedding);
|
|
2870
|
+
} catch (error) {
|
|
2871
|
+
if (this.recorder.isRecording()) {
|
|
2872
|
+
this.recorder.abort(error.message);
|
|
2873
|
+
}
|
|
2874
|
+
throw error;
|
|
2875
|
+
}
|
|
2876
|
+
}
|
|
2877
|
+
/**
|
|
2878
|
+
* Replay a cached stream
|
|
2879
|
+
*/
|
|
2880
|
+
async *replay(stream) {
|
|
2881
|
+
for await (const chunk of this.replayer.replay(stream)) {
|
|
2882
|
+
yield chunk;
|
|
2883
|
+
}
|
|
2884
|
+
}
|
|
2885
|
+
/**
|
|
2886
|
+
* Get cache statistics
|
|
2887
|
+
*/
|
|
2888
|
+
getStats() {
|
|
2889
|
+
return { ...this.stats };
|
|
2890
|
+
}
|
|
2891
|
+
/**
|
|
2892
|
+
* Clear the stream cache
|
|
2893
|
+
*/
|
|
2894
|
+
async clear() {
|
|
2895
|
+
await this.store.clear();
|
|
2896
|
+
this.stats = {
|
|
2897
|
+
totalLookups: 0,
|
|
2898
|
+
totalHits: 0,
|
|
2899
|
+
totalMisses: 0,
|
|
2900
|
+
hitRate: 0,
|
|
2901
|
+
avgReplayLatencyMs: 0,
|
|
2902
|
+
totalStreamsCached: 0,
|
|
2903
|
+
totalBytesCached: 0,
|
|
2904
|
+
avgStreamDurationMs: 0
|
|
2905
|
+
};
|
|
2906
|
+
this.replayLatencies = [];
|
|
2907
|
+
this.streamDurations = [];
|
|
2908
|
+
}
|
|
2909
|
+
/**
|
|
2910
|
+
* Destroy the stream cache
|
|
2911
|
+
*/
|
|
2912
|
+
destroy() {
|
|
2913
|
+
this.recorder.destroy();
|
|
2914
|
+
this.replayer.stop();
|
|
2915
|
+
this.removeAllListeners();
|
|
2916
|
+
}
|
|
2917
|
+
generateStreamKey(model, messages) {
|
|
2918
|
+
const cacheMessages = messages.map((m) => ({
|
|
2919
|
+
role: m.role,
|
|
2920
|
+
content: m.content
|
|
2921
|
+
}));
|
|
2922
|
+
return generateCacheKey(model, cacheMessages);
|
|
2923
|
+
}
|
|
2924
|
+
serializeStream(stream) {
|
|
2925
|
+
return JSON.stringify(stream);
|
|
2926
|
+
}
|
|
2927
|
+
deserializeStream(content) {
|
|
2928
|
+
try {
|
|
2929
|
+
return JSON.parse(content);
|
|
2930
|
+
} catch {
|
|
2931
|
+
return null;
|
|
2932
|
+
}
|
|
2933
|
+
}
|
|
2934
|
+
estimateStreamSize(stream) {
|
|
2935
|
+
return JSON.stringify(stream).length;
|
|
2936
|
+
}
|
|
2937
|
+
updateHitRate() {
|
|
2938
|
+
if (this.stats.totalLookups > 0) {
|
|
2939
|
+
this.stats.hitRate = this.stats.totalHits / this.stats.totalLookups * 100;
|
|
2940
|
+
}
|
|
2941
|
+
}
|
|
2942
|
+
updateAvgStreamDuration() {
|
|
2943
|
+
if (this.streamDurations.length > 0) {
|
|
2944
|
+
this.stats.avgStreamDurationMs = this.streamDurations.reduce((a, b) => a + b, 0) / this.streamDurations.length;
|
|
2945
|
+
}
|
|
2946
|
+
}
|
|
2947
|
+
};
|
|
2948
|
+
function createStreamCache(store, config, similarity) {
|
|
2949
|
+
return new StreamCache(store, config, similarity);
|
|
2950
|
+
}
|
|
2951
|
+
var DEFAULT_CONFIG13 = {
|
|
2952
|
+
strategy: "ttl",
|
|
2953
|
+
ttl: {
|
|
2954
|
+
defaultTtl: 3600,
|
|
2955
|
+
softTtl: false
|
|
2956
|
+
},
|
|
2957
|
+
emitEvents: true
|
|
2958
|
+
};
|
|
2959
|
+
var InvalidationManager = class extends EventEmitter2 {
|
|
2960
|
+
store;
|
|
2961
|
+
config;
|
|
2962
|
+
autoInterval = null;
|
|
2963
|
+
stats = {
|
|
2964
|
+
totalInvalidations: 0,
|
|
2965
|
+
ttlRemovals: 0,
|
|
2966
|
+
lruRemovals: 0,
|
|
2967
|
+
eventRemovals: 0,
|
|
2968
|
+
smartRemovals: 0,
|
|
2969
|
+
manualRemovals: 0,
|
|
2970
|
+
totalBytesFreed: 0
|
|
2971
|
+
};
|
|
2972
|
+
accessTimes = /* @__PURE__ */ new Map();
|
|
2973
|
+
constructor(store, config) {
|
|
2974
|
+
super();
|
|
2975
|
+
this.store = store;
|
|
2976
|
+
this.config = { ...DEFAULT_CONFIG13, ...config };
|
|
2977
|
+
}
|
|
2978
|
+
/**
|
|
2979
|
+
* Run invalidation based on configured strategy
|
|
2980
|
+
*/
|
|
2981
|
+
async run() {
|
|
2982
|
+
switch (this.config.strategy) {
|
|
2983
|
+
case "ttl":
|
|
2984
|
+
return this.runTTLInvalidation();
|
|
2985
|
+
case "lru":
|
|
2986
|
+
return this.runLRUInvalidation();
|
|
2987
|
+
case "smart":
|
|
2988
|
+
return this.runSmartInvalidation();
|
|
2989
|
+
default:
|
|
2990
|
+
return this.runTTLInvalidation();
|
|
2991
|
+
}
|
|
2992
|
+
}
|
|
2993
|
+
/**
|
|
2994
|
+
* Run TTL-based invalidation
|
|
2995
|
+
*/
|
|
2996
|
+
async runTTLInvalidation() {
|
|
2997
|
+
const startTime = performance.now();
|
|
2998
|
+
const invalidatedKeys = [];
|
|
2999
|
+
let bytesFreed = 0;
|
|
3000
|
+
const currentTime = now();
|
|
3001
|
+
const keys = await this.store.keys();
|
|
3002
|
+
for (const key of keys) {
|
|
3003
|
+
const entry = await this.store.get(key);
|
|
3004
|
+
if (!entry) continue;
|
|
3005
|
+
const ttl = entry.metadata.ttl ?? this.getTTL(entry.request.model, entry.metadata.namespace);
|
|
3006
|
+
const age = (currentTime - entry.metadata.createdAt) / 1e3;
|
|
3007
|
+
if (age >= ttl) {
|
|
3008
|
+
if (this.config.ttl?.softTtl && this.config.ttl?.gracePeriod) {
|
|
3009
|
+
if (age < ttl + this.config.ttl.gracePeriod) {
|
|
3010
|
+
continue;
|
|
3011
|
+
}
|
|
3012
|
+
}
|
|
3013
|
+
const deleted = await this.store.delete(key);
|
|
3014
|
+
if (deleted) {
|
|
3015
|
+
invalidatedKeys.push(key);
|
|
3016
|
+
bytesFreed += this.estimateEntrySize(entry);
|
|
3017
|
+
}
|
|
3018
|
+
}
|
|
3019
|
+
}
|
|
3020
|
+
this.stats.totalInvalidations++;
|
|
3021
|
+
this.stats.ttlRemovals += invalidatedKeys.length;
|
|
3022
|
+
this.stats.totalBytesFreed += bytesFreed;
|
|
3023
|
+
this.stats.lastInvalidationAt = now();
|
|
3024
|
+
const result = {
|
|
3025
|
+
invalidatedKeys,
|
|
3026
|
+
entriesRemoved: invalidatedKeys.length,
|
|
3027
|
+
bytesFreed,
|
|
3028
|
+
durationMs: performance.now() - startTime
|
|
3029
|
+
};
|
|
3030
|
+
this.emitEvent("ttl", invalidatedKeys, bytesFreed);
|
|
3031
|
+
return result;
|
|
3032
|
+
}
|
|
3033
|
+
/**
|
|
3034
|
+
* Run LRU-based invalidation
|
|
3035
|
+
*/
|
|
3036
|
+
async runLRUInvalidation() {
|
|
3037
|
+
const startTime = performance.now();
|
|
3038
|
+
const invalidatedKeys = [];
|
|
3039
|
+
let bytesFreed = 0;
|
|
3040
|
+
const maxEntries = this.config.lru?.maxEntries ?? 1e3;
|
|
3041
|
+
this.config.lru?.maxSizeBytes ?? Infinity;
|
|
3042
|
+
const batchSize = this.config.lru?.evictionBatchSize ?? 10;
|
|
3043
|
+
const minAge = this.config.lru?.minAge ?? 0;
|
|
3044
|
+
const currentSize = await this.store.size();
|
|
3045
|
+
if (currentSize <= maxEntries) {
|
|
3046
|
+
return {
|
|
3047
|
+
invalidatedKeys: [],
|
|
3048
|
+
entriesRemoved: 0,
|
|
3049
|
+
bytesFreed: 0,
|
|
3050
|
+
durationMs: performance.now() - startTime
|
|
3051
|
+
};
|
|
3052
|
+
}
|
|
3053
|
+
const keys = await this.store.keys();
|
|
3054
|
+
const entriesWithAccess = [];
|
|
3055
|
+
for (const key of keys) {
|
|
3056
|
+
const entry = await this.store.get(key);
|
|
3057
|
+
if (entry) {
|
|
3058
|
+
entriesWithAccess.push({
|
|
3059
|
+
key,
|
|
3060
|
+
accessedAt: entry.metadata.accessedAt,
|
|
3061
|
+
size: this.estimateEntrySize(entry)
|
|
3062
|
+
});
|
|
3063
|
+
}
|
|
3064
|
+
}
|
|
3065
|
+
entriesWithAccess.sort((a, b) => a.accessedAt - b.accessedAt);
|
|
3066
|
+
const toRemove = Math.min(currentSize - maxEntries, batchSize);
|
|
3067
|
+
const currentTime = now();
|
|
3068
|
+
for (let i = 0; i < toRemove && i < entriesWithAccess.length; i++) {
|
|
3069
|
+
const { key, accessedAt, size } = entriesWithAccess[i];
|
|
3070
|
+
const age = (currentTime - accessedAt) / 1e3;
|
|
3071
|
+
if (age < minAge) continue;
|
|
3072
|
+
const deleted = await this.store.delete(key);
|
|
3073
|
+
if (deleted) {
|
|
3074
|
+
invalidatedKeys.push(key);
|
|
3075
|
+
bytesFreed += size;
|
|
3076
|
+
}
|
|
3077
|
+
}
|
|
3078
|
+
this.stats.totalInvalidations++;
|
|
3079
|
+
this.stats.lruRemovals += invalidatedKeys.length;
|
|
3080
|
+
this.stats.totalBytesFreed += bytesFreed;
|
|
3081
|
+
this.stats.lastInvalidationAt = now();
|
|
3082
|
+
const result = {
|
|
3083
|
+
invalidatedKeys,
|
|
3084
|
+
entriesRemoved: invalidatedKeys.length,
|
|
3085
|
+
bytesFreed,
|
|
3086
|
+
durationMs: performance.now() - startTime
|
|
3087
|
+
};
|
|
3088
|
+
this.emitEvent("lru", invalidatedKeys, bytesFreed);
|
|
3089
|
+
return result;
|
|
3090
|
+
}
|
|
3091
|
+
/**
|
|
3092
|
+
* Run smart invalidation (combines TTL + LRU + hit rate analysis)
|
|
3093
|
+
*/
|
|
3094
|
+
async runSmartInvalidation() {
|
|
3095
|
+
const startTime = performance.now();
|
|
3096
|
+
const invalidatedKeys = [];
|
|
3097
|
+
let bytesFreed = 0;
|
|
3098
|
+
const minHitRate = this.config.smart?.minHitRate ?? 0.1;
|
|
3099
|
+
const currentTime = now();
|
|
3100
|
+
const keys = await this.store.keys();
|
|
3101
|
+
for (const key of keys) {
|
|
3102
|
+
const entry = await this.store.get(key);
|
|
3103
|
+
if (!entry) continue;
|
|
3104
|
+
let shouldInvalidate = false;
|
|
3105
|
+
const ttl = entry.metadata.ttl ?? this.getTTL(entry.request.model, entry.metadata.namespace);
|
|
3106
|
+
const age = (currentTime - entry.metadata.createdAt) / 1e3;
|
|
3107
|
+
if (age >= ttl) {
|
|
3108
|
+
shouldInvalidate = true;
|
|
3109
|
+
}
|
|
3110
|
+
if (this.config.smart?.analyzeHitRate && entry.metadata.accessCount > 0) {
|
|
3111
|
+
const accessRate = entry.metadata.accessCount / Math.max(age / 3600, 1);
|
|
3112
|
+
if (accessRate < minHitRate) {
|
|
3113
|
+
shouldInvalidate = true;
|
|
3114
|
+
}
|
|
3115
|
+
}
|
|
3116
|
+
if (shouldInvalidate) {
|
|
3117
|
+
const deleted = await this.store.delete(key);
|
|
3118
|
+
if (deleted) {
|
|
3119
|
+
invalidatedKeys.push(key);
|
|
3120
|
+
bytesFreed += this.estimateEntrySize(entry);
|
|
3121
|
+
}
|
|
3122
|
+
}
|
|
3123
|
+
}
|
|
3124
|
+
this.stats.totalInvalidations++;
|
|
3125
|
+
this.stats.smartRemovals += invalidatedKeys.length;
|
|
3126
|
+
this.stats.totalBytesFreed += bytesFreed;
|
|
3127
|
+
this.stats.lastInvalidationAt = now();
|
|
3128
|
+
const result = {
|
|
3129
|
+
invalidatedKeys,
|
|
3130
|
+
entriesRemoved: invalidatedKeys.length,
|
|
3131
|
+
bytesFreed,
|
|
3132
|
+
durationMs: performance.now() - startTime
|
|
3133
|
+
};
|
|
3134
|
+
this.emitEvent("smart", invalidatedKeys, bytesFreed);
|
|
3135
|
+
return result;
|
|
3136
|
+
}
|
|
3137
|
+
/**
|
|
3138
|
+
* Manually invalidate specific keys
|
|
3139
|
+
*/
|
|
3140
|
+
async invalidateKeys(keys) {
|
|
3141
|
+
const startTime = performance.now();
|
|
3142
|
+
const invalidatedKeys = [];
|
|
3143
|
+
let bytesFreed = 0;
|
|
3144
|
+
for (const key of keys) {
|
|
3145
|
+
const entry = await this.store.get(key);
|
|
3146
|
+
if (entry) {
|
|
3147
|
+
const size = this.estimateEntrySize(entry);
|
|
3148
|
+
const deleted = await this.store.delete(key);
|
|
3149
|
+
if (deleted) {
|
|
3150
|
+
invalidatedKeys.push(key);
|
|
3151
|
+
bytesFreed += size;
|
|
3152
|
+
}
|
|
3153
|
+
}
|
|
3154
|
+
}
|
|
3155
|
+
this.stats.totalInvalidations++;
|
|
3156
|
+
this.stats.manualRemovals += invalidatedKeys.length;
|
|
3157
|
+
this.stats.totalBytesFreed += bytesFreed;
|
|
3158
|
+
this.stats.lastInvalidationAt = now();
|
|
3159
|
+
const result = {
|
|
3160
|
+
invalidatedKeys,
|
|
3161
|
+
entriesRemoved: invalidatedKeys.length,
|
|
3162
|
+
bytesFreed,
|
|
3163
|
+
durationMs: performance.now() - startTime
|
|
3164
|
+
};
|
|
3165
|
+
this.emitEvent("manual", invalidatedKeys, bytesFreed);
|
|
3166
|
+
return result;
|
|
3167
|
+
}
|
|
3168
|
+
/**
|
|
3169
|
+
* Invalidate by pattern (e.g., namespace or model)
|
|
3170
|
+
*/
|
|
3171
|
+
async invalidateByPattern(options) {
|
|
3172
|
+
const startTime = performance.now();
|
|
3173
|
+
const invalidatedKeys = [];
|
|
3174
|
+
let bytesFreed = 0;
|
|
3175
|
+
const currentTime = now();
|
|
3176
|
+
const keys = await this.store.keys();
|
|
3177
|
+
for (const key of keys) {
|
|
3178
|
+
const entry = await this.store.get(key);
|
|
3179
|
+
if (!entry) continue;
|
|
3180
|
+
let matches = true;
|
|
3181
|
+
if (options.namespace && entry.metadata.namespace !== options.namespace) {
|
|
3182
|
+
matches = false;
|
|
3183
|
+
}
|
|
3184
|
+
if (options.model && entry.request.model !== options.model) {
|
|
3185
|
+
matches = false;
|
|
3186
|
+
}
|
|
3187
|
+
if (options.olderThan) {
|
|
3188
|
+
const age = (currentTime - entry.metadata.createdAt) / 1e3;
|
|
3189
|
+
if (age < options.olderThan) {
|
|
3190
|
+
matches = false;
|
|
3191
|
+
}
|
|
3192
|
+
}
|
|
3193
|
+
if (matches) {
|
|
3194
|
+
const size = this.estimateEntrySize(entry);
|
|
3195
|
+
const deleted = await this.store.delete(key);
|
|
3196
|
+
if (deleted) {
|
|
3197
|
+
invalidatedKeys.push(key);
|
|
3198
|
+
bytesFreed += size;
|
|
3199
|
+
}
|
|
3200
|
+
}
|
|
3201
|
+
}
|
|
3202
|
+
this.stats.totalInvalidations++;
|
|
3203
|
+
this.stats.manualRemovals += invalidatedKeys.length;
|
|
3204
|
+
this.stats.totalBytesFreed += bytesFreed;
|
|
3205
|
+
this.stats.lastInvalidationAt = now();
|
|
3206
|
+
const result = {
|
|
3207
|
+
invalidatedKeys,
|
|
3208
|
+
entriesRemoved: invalidatedKeys.length,
|
|
3209
|
+
bytesFreed,
|
|
3210
|
+
durationMs: performance.now() - startTime
|
|
3211
|
+
};
|
|
3212
|
+
this.emitEvent("manual", invalidatedKeys, bytesFreed);
|
|
3213
|
+
return result;
|
|
3214
|
+
}
|
|
3215
|
+
/**
|
|
3216
|
+
* Start automatic invalidation
|
|
3217
|
+
*/
|
|
3218
|
+
startAuto(intervalMs = 6e4) {
|
|
3219
|
+
if (this.autoInterval) {
|
|
3220
|
+
this.stopAuto();
|
|
3221
|
+
}
|
|
3222
|
+
this.autoInterval = setInterval(() => {
|
|
3223
|
+
void (async () => {
|
|
3224
|
+
try {
|
|
3225
|
+
await this.run();
|
|
3226
|
+
} catch (error) {
|
|
3227
|
+
this.emit("error", error);
|
|
3228
|
+
}
|
|
3229
|
+
})();
|
|
3230
|
+
}, intervalMs);
|
|
3231
|
+
}
|
|
3232
|
+
/**
|
|
3233
|
+
* Stop automatic invalidation
|
|
3234
|
+
*/
|
|
3235
|
+
stopAuto() {
|
|
3236
|
+
if (this.autoInterval) {
|
|
3237
|
+
clearInterval(this.autoInterval);
|
|
3238
|
+
this.autoInterval = null;
|
|
3239
|
+
}
|
|
3240
|
+
}
|
|
3241
|
+
/**
|
|
3242
|
+
* Get invalidation statistics
|
|
3243
|
+
*/
|
|
3244
|
+
getStats() {
|
|
3245
|
+
return { ...this.stats };
|
|
3246
|
+
}
|
|
3247
|
+
/**
|
|
3248
|
+
* Reset statistics
|
|
3249
|
+
*/
|
|
3250
|
+
resetStats() {
|
|
3251
|
+
this.stats = {
|
|
3252
|
+
totalInvalidations: 0,
|
|
3253
|
+
ttlRemovals: 0,
|
|
3254
|
+
lruRemovals: 0,
|
|
3255
|
+
eventRemovals: 0,
|
|
3256
|
+
smartRemovals: 0,
|
|
3257
|
+
manualRemovals: 0,
|
|
3258
|
+
totalBytesFreed: 0
|
|
3259
|
+
};
|
|
3260
|
+
}
|
|
3261
|
+
/**
|
|
3262
|
+
* Destroy the manager
|
|
3263
|
+
*/
|
|
3264
|
+
destroy() {
|
|
3265
|
+
this.stopAuto();
|
|
3266
|
+
this.removeAllListeners();
|
|
3267
|
+
this.accessTimes.clear();
|
|
3268
|
+
}
|
|
3269
|
+
getTTL(model, namespace) {
|
|
3270
|
+
if (this.config.ttl?.modelTtls?.[model]) {
|
|
3271
|
+
return this.config.ttl.modelTtls[model];
|
|
3272
|
+
}
|
|
3273
|
+
if (namespace && this.config.ttl?.namespaceTtls?.[namespace]) {
|
|
3274
|
+
return this.config.ttl.namespaceTtls[namespace];
|
|
3275
|
+
}
|
|
3276
|
+
return this.config.ttl?.defaultTtl ?? 3600;
|
|
3277
|
+
}
|
|
3278
|
+
estimateEntrySize(entry) {
|
|
3279
|
+
return entry.response.content.length * 2 + 200;
|
|
3280
|
+
}
|
|
3281
|
+
emitEvent(reason, keys, bytesFreed) {
|
|
3282
|
+
if (this.config.emitEvents && keys.length > 0) {
|
|
3283
|
+
const event = {
|
|
3284
|
+
timestamp: now(),
|
|
3285
|
+
keys,
|
|
3286
|
+
reason,
|
|
3287
|
+
entriesRemoved: keys.length,
|
|
3288
|
+
bytesFreed
|
|
3289
|
+
};
|
|
3290
|
+
this.emit("invalidate", event);
|
|
3291
|
+
this.config.onInvalidate?.(event);
|
|
3292
|
+
}
|
|
3293
|
+
}
|
|
3294
|
+
};
|
|
3295
|
+
function createInvalidationManager(store, config) {
|
|
3296
|
+
return new InvalidationManager(store, config);
|
|
3297
|
+
}
|
|
3298
|
+
|
|
3299
|
+
export { BaseCacheStore, BaseMatchStrategy, CacheAnalytics, ChunkBuffer, ExactMatchStrategy, HybridMatchStrategy, InvalidationManager, MemoryCacheStore, PineconeCacheStore, RedisCacheStore, SQLiteCacheStore, SemanticCache, SemanticMatchStrategy, SimilarityEngine, StreamCache, StreamRecorder, StreamReplayer, TieredCacheStore, cosineSimilarity, createCacheAnalytics, createChunkBuffer, createExactMatchStrategy, createHybridMatchStrategy, createInvalidationManager, createMemoryCacheStore, createPineconeCacheStore, createRedisCacheStore, createSQLiteCacheStore, createSemanticCache, createSemanticMatchStrategy, createSimilarityEngine, createStreamCache, createStreamRecorder, createStreamReplayer, createTieredCacheStore, distanceToSimilarity, dotProduct, estimateEntrySize, euclideanDistance, extractSystemPrompt, extractUserMessage, generateCacheKey, generateConversationFingerprint, generateId, generateSemanticKey, isExpired, magnitude, manhattanDistance, normalize, normalizeWhitespace, now };
|
|
3300
|
+
//# sourceMappingURL=index.js.map
|
|
3301
|
+
//# sourceMappingURL=index.js.map
|