@lov3kaizen/agentsea-cache 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,3301 @@
1
+ import EventEmitter2, { EventEmitter } from 'eventemitter3';
2
+ import { nanoid } from 'nanoid';
3
+ import murmurhash from 'murmurhash';
4
+ import { LRUCache } from 'lru-cache';
5
+
6
+ // src/core/SemanticCache.ts
7
+ function generateId(prefix) {
8
+ const id = nanoid(16);
9
+ return prefix ? `${prefix}_${id}` : id;
10
+ }
11
+ function now() {
12
+ return Date.now();
13
+ }
14
+ function isExpired(createdAt, ttlSeconds) {
15
+ if (ttlSeconds <= 0) return false;
16
+ return now() > createdAt + ttlSeconds * 1e3;
17
+ }
18
+ function estimateEntrySize(entry) {
19
+ const vectorSize = (entry.embedding?.length ?? 0) * 4;
20
+ const messageSize = entry.request.messages.reduce(
21
+ (acc, m) => acc + (m.content?.length ?? 0) * 2,
22
+ 0
23
+ );
24
+ const responseSize = (entry.response.content?.length ?? 0) * 2;
25
+ const overheadSize = 500;
26
+ return vectorSize + messageSize + responseSize + overheadSize;
27
+ }
28
+ function percentile(values, p) {
29
+ if (values.length === 0) return 0;
30
+ const sorted = [...values].sort((a, b) => a - b);
31
+ const index = Math.ceil(p / 100 * sorted.length) - 1;
32
+ return sorted[Math.max(0, index)];
33
+ }
34
+ function mean(values) {
35
+ if (values.length === 0) return 0;
36
+ return values.reduce((a, b) => a + b, 0) / values.length;
37
+ }
38
+
39
+ // src/analytics/CacheAnalytics.ts
40
+ var DEFAULT_MODEL_PRICING = {
41
+ "gpt-4o": { inputPer1K: 25e-4, outputPer1K: 0.01 },
42
+ "gpt-4o-mini": { inputPer1K: 15e-5, outputPer1K: 6e-4 },
43
+ "gpt-4-turbo": { inputPer1K: 0.01, outputPer1K: 0.03 },
44
+ "gpt-4": { inputPer1K: 0.03, outputPer1K: 0.06 },
45
+ "gpt-3.5-turbo": { inputPer1K: 5e-4, outputPer1K: 15e-4 },
46
+ "claude-3-opus": { inputPer1K: 0.015, outputPer1K: 0.075 },
47
+ "claude-sonnet-4-20250514": { inputPer1K: 3e-3, outputPer1K: 0.015 },
48
+ "claude-3-5-sonnet": { inputPer1K: 3e-3, outputPer1K: 0.015 },
49
+ "claude-3-haiku": { inputPer1K: 25e-5, outputPer1K: 125e-5 },
50
+ default: { inputPer1K: 5e-3, outputPer1K: 0.015 }
51
+ };
52
+ var DEFAULT_CONFIG = {
53
+ enabled: true,
54
+ sampleRate: 1,
55
+ retentionSeconds: 86400 * 7,
56
+ // 7 days
57
+ flushIntervalMs: 6e4,
58
+ // 1 minute
59
+ modelPricing: DEFAULT_MODEL_PRICING
60
+ };
61
+ var CacheAnalytics = class {
62
+ config;
63
+ hits = 0;
64
+ misses = 0;
65
+ exactHits = 0;
66
+ semanticHits = 0;
67
+ tokensSaved = 0;
68
+ inputTokensSaved = 0;
69
+ outputTokensSaved = 0;
70
+ latencies = [];
71
+ modelHits = /* @__PURE__ */ new Map();
72
+ namespaceHits = /* @__PURE__ */ new Map();
73
+ hitEvents = [];
74
+ missEvents = [];
75
+ setCount = 0;
76
+ constructor(config) {
77
+ this.config = { ...DEFAULT_CONFIG, ...config };
78
+ }
79
+ /**
80
+ * Record a cache hit
81
+ */
82
+ recordHit(entry, type, latencyMs) {
83
+ if (!this.config.enabled) return;
84
+ if (Math.random() > this.config.sampleRate) return;
85
+ this.hits++;
86
+ if (type === "exact") {
87
+ this.exactHits++;
88
+ } else {
89
+ this.semanticHits++;
90
+ }
91
+ this.latencies.push(latencyMs);
92
+ const usage = entry.response.usage;
93
+ this.tokensSaved += usage.totalTokens;
94
+ this.inputTokensSaved += usage.promptTokens;
95
+ this.outputTokensSaved += usage.completionTokens;
96
+ const modelCount = this.modelHits.get(entry.request.model) ?? 0;
97
+ this.modelHits.set(entry.request.model, modelCount + 1);
98
+ if (entry.metadata.namespace) {
99
+ const nsCount = this.namespaceHits.get(entry.metadata.namespace) ?? 0;
100
+ this.namespaceHits.set(entry.metadata.namespace, nsCount + 1);
101
+ }
102
+ this.hitEvents.push({
103
+ timestamp: Date.now(),
104
+ type,
105
+ model: entry.request.model,
106
+ namespace: entry.metadata.namespace,
107
+ similarity: entry.metadata.similarity,
108
+ latencyMs,
109
+ tokensSaved: usage.totalTokens
110
+ });
111
+ this.trimEvents();
112
+ }
113
+ /**
114
+ * Record a cache miss
115
+ */
116
+ recordMiss(latencyMs, reason = "not_found") {
117
+ if (!this.config.enabled) return;
118
+ if (Math.random() > this.config.sampleRate) return;
119
+ this.misses++;
120
+ this.latencies.push(latencyMs);
121
+ this.missEvents.push({
122
+ timestamp: Date.now(),
123
+ model: "unknown",
124
+ latencyMs,
125
+ reason
126
+ });
127
+ this.trimEvents();
128
+ }
129
+ /**
130
+ * Record a cache set operation
131
+ */
132
+ recordSet(_entry) {
133
+ if (!this.config.enabled) return;
134
+ this.setCount++;
135
+ }
136
+ /**
137
+ * Get analytics summary
138
+ */
139
+ getSummary() {
140
+ const total = this.hits + this.misses;
141
+ return {
142
+ totalHits: this.hits,
143
+ totalMisses: this.misses,
144
+ exactHits: this.exactHits,
145
+ semanticHits: this.semanticHits,
146
+ hitRate: total > 0 ? this.hits / total : 0,
147
+ avgLatencyMs: mean(this.latencies),
148
+ p50LatencyMs: percentile(this.latencies, 50),
149
+ p95LatencyMs: percentile(this.latencies, 95),
150
+ p99LatencyMs: percentile(this.latencies, 99),
151
+ totalTokensSaved: this.tokensSaved,
152
+ estimatedCostSavingsUSD: this.calculateCostSavings(),
153
+ topModels: this.getTopModels(5),
154
+ topNamespaces: this.getTopNamespaces(5),
155
+ hourlyStats: this.getHourlyStats()
156
+ };
157
+ }
158
+ /**
159
+ * Get cost savings report
160
+ */
161
+ getCostSavingsReport(periodLabel = "all-time") {
162
+ const total = this.hits + this.misses;
163
+ const costSaved = this.calculateCostSavings();
164
+ const avgCostPerRequest = costSaved / (this.hits || 1);
165
+ const estimatedCostWithoutCache = avgCostPerRequest * total;
166
+ return {
167
+ period: periodLabel,
168
+ totalRequests: total,
169
+ cachedRequests: this.hits,
170
+ hitRate: total > 0 ? this.hits / total : 0,
171
+ inputTokensSaved: this.inputTokensSaved,
172
+ outputTokensSaved: this.outputTokensSaved,
173
+ totalTokensSaved: this.tokensSaved,
174
+ estimatedCostWithoutCache,
175
+ actualCostWithCache: estimatedCostWithoutCache - costSaved,
176
+ costSaved,
177
+ reductionPercent: estimatedCostWithoutCache > 0 ? costSaved / estimatedCostWithoutCache * 100 : 0
178
+ };
179
+ }
180
+ /**
181
+ * Get performance metrics
182
+ */
183
+ getPerformanceMetrics() {
184
+ const total = this.hits + this.misses;
185
+ return {
186
+ avgLookupMs: mean(this.latencies),
187
+ avgEmbeddingMs: 0,
188
+ // Would need to track separately
189
+ avgStoreReadMs: mean(this.latencies),
190
+ avgStoreWriteMs: 0,
191
+ // Would need to track separately
192
+ p50LatencyMs: percentile(this.latencies, 50),
193
+ p95LatencyMs: percentile(this.latencies, 95),
194
+ p99LatencyMs: percentile(this.latencies, 99),
195
+ totalOperations: total + this.setCount,
196
+ failedOperations: 0,
197
+ // Would need to track separately
198
+ errorRate: 0
199
+ };
200
+ }
201
+ /**
202
+ * Reset all analytics
203
+ */
204
+ reset() {
205
+ this.hits = 0;
206
+ this.misses = 0;
207
+ this.exactHits = 0;
208
+ this.semanticHits = 0;
209
+ this.tokensSaved = 0;
210
+ this.inputTokensSaved = 0;
211
+ this.outputTokensSaved = 0;
212
+ this.latencies = [];
213
+ this.modelHits.clear();
214
+ this.namespaceHits.clear();
215
+ this.hitEvents = [];
216
+ this.missEvents = [];
217
+ this.setCount = 0;
218
+ }
219
+ /**
220
+ * Export analytics data
221
+ */
222
+ export(format = "json") {
223
+ const data = this.getSummary();
224
+ if (format === "csv") {
225
+ const headers = Object.keys(data).filter(
226
+ (k) => typeof data[k] !== "object"
227
+ );
228
+ const values = headers.map((h) => data[h]);
229
+ return `${headers.join(",")}
230
+ ${values.join(",")}`;
231
+ }
232
+ return JSON.stringify(data, null, 2);
233
+ }
234
+ calculateCostSavings() {
235
+ let savings = 0;
236
+ for (const [model, hits] of this.modelHits) {
237
+ const pricing = this.config.modelPricing[model] ?? this.config.modelPricing["default"];
238
+ const avgInputTokens = this.inputTokensSaved / (this.hits || 1);
239
+ const avgOutputTokens = this.outputTokensSaved / (this.hits || 1);
240
+ const inputCost = avgInputTokens / 1e3 * pricing.inputPer1K * hits;
241
+ const outputCost = avgOutputTokens / 1e3 * pricing.outputPer1K * hits;
242
+ savings += inputCost + outputCost;
243
+ }
244
+ if (savings === 0 && this.tokensSaved > 0) {
245
+ const defaultPricing = this.config.modelPricing["default"];
246
+ savings = this.inputTokensSaved / 1e3 * defaultPricing.inputPer1K + this.outputTokensSaved / 1e3 * defaultPricing.outputPer1K;
247
+ }
248
+ return savings;
249
+ }
250
+ getTopModels(n) {
251
+ return Array.from(this.modelHits.entries()).map(([model, hits]) => ({ model, hits })).sort((a, b) => b.hits - a.hits).slice(0, n);
252
+ }
253
+ getTopNamespaces(n) {
254
+ return Array.from(this.namespaceHits.entries()).map(([namespace, hits]) => ({ namespace, hits })).sort((a, b) => b.hits - a.hits).slice(0, n);
255
+ }
256
+ getHourlyStats() {
257
+ const hourlyData = /* @__PURE__ */ new Map();
258
+ for (const event of this.hitEvents) {
259
+ const hour = new Date(event.timestamp).getHours();
260
+ const data = hourlyData.get(hour) ?? {
261
+ hits: 0,
262
+ misses: 0,
263
+ latencies: []
264
+ };
265
+ data.hits++;
266
+ data.latencies.push(event.latencyMs);
267
+ hourlyData.set(hour, data);
268
+ }
269
+ for (const event of this.missEvents) {
270
+ const hour = new Date(event.timestamp).getHours();
271
+ const data = hourlyData.get(hour) ?? {
272
+ hits: 0,
273
+ misses: 0,
274
+ latencies: []
275
+ };
276
+ data.misses++;
277
+ data.latencies.push(event.latencyMs);
278
+ hourlyData.set(hour, data);
279
+ }
280
+ return Array.from(hourlyData.entries()).map(([hour, data]) => ({
281
+ hour,
282
+ hits: data.hits,
283
+ misses: data.misses,
284
+ avgLatencyMs: mean(data.latencies)
285
+ })).sort((a, b) => a.hour - b.hour);
286
+ }
287
+ trimEvents() {
288
+ const cutoff = Date.now() - this.config.retentionSeconds * 1e3;
289
+ this.hitEvents = this.hitEvents.filter((e) => e.timestamp > cutoff);
290
+ this.missEvents = this.missEvents.filter((e) => e.timestamp > cutoff);
291
+ }
292
+ };
293
+ function createCacheAnalytics(config) {
294
+ return new CacheAnalytics(config);
295
+ }
296
+ var DEFAULT_KEY_OPTIONS = {
297
+ includeTemperature: false,
298
+ includeTools: false,
299
+ normalizeWhitespace: true,
300
+ extractUserMessage: false
301
+ };
302
+ function generateCacheKey(model, messages, options = {}) {
303
+ const opts = { ...DEFAULT_KEY_OPTIONS, ...options };
304
+ const normalized = normalizeRequest(model, messages, opts);
305
+ const hash = murmurhash.v3(JSON.stringify(normalized)).toString(16);
306
+ return `cache:${model}:${hash}`;
307
+ }
308
+ function normalizeRequest(model, messages, options = {}) {
309
+ const normalizedMessages = messages.map((m) => ({
310
+ role: m.role,
311
+ content: options.normalizeWhitespace ? normalizeWhitespace(m.content) : m.content
312
+ }));
313
+ return {
314
+ model,
315
+ messages: options.extractUserMessage ? extractUserMessage(normalizedMessages) : normalizedMessages
316
+ };
317
+ }
318
+ function normalizeWhitespace(text) {
319
+ return text.trim().replace(/\r\n/g, "\n").replace(/\s+/g, " ");
320
+ }
321
+ function extractUserMessage(messages) {
322
+ for (let i = messages.length - 1; i >= 0; i--) {
323
+ if (messages[i].role === "user") {
324
+ return messages[i].content;
325
+ }
326
+ }
327
+ return "";
328
+ }
329
+ function extractSystemPrompt(messages) {
330
+ const systemMessage = messages.find((m) => m.role === "system");
331
+ return systemMessage?.content;
332
+ }
333
+ function generateSemanticKey(model, messages) {
334
+ const userMessage = extractUserMessage(messages);
335
+ const normalized = normalizeWhitespace(userMessage);
336
+ return `${model}:${normalized}`;
337
+ }
338
+ function generateConversationFingerprint(messages) {
339
+ const pattern = messages.map((m) => `${m.role}:${m.content.length}`).join("|");
340
+ return murmurhash.v3(pattern).toString(16);
341
+ }
342
+
343
+ // src/core/SemanticCache.ts
344
+ var DEFAULT_CONFIG2 = {
345
+ defaultTTL: 3600,
346
+ similarityThreshold: 0.92,
347
+ maxEntries: 1e4,
348
+ maxSizeBytes: 1024 * 1024 * 1024,
349
+ // 1GB
350
+ keyPrefix: "llm-cache",
351
+ matchStrategy: "hybrid",
352
+ analyticsEnabled: true,
353
+ namespace: "default",
354
+ cacheKeyFields: ["model", "messages"],
355
+ normalizeWhitespace: true
356
+ };
357
+ var SemanticCache = class extends EventEmitter {
358
+ config;
359
+ store;
360
+ strategy;
361
+ similarity;
362
+ analytics;
363
+ stats;
364
+ constructor(config, store, strategy, similarity) {
365
+ super();
366
+ this.config = { ...DEFAULT_CONFIG2, ...config };
367
+ this.store = store;
368
+ this.strategy = strategy;
369
+ this.similarity = similarity;
370
+ this.analytics = new CacheAnalytics({
371
+ enabled: this.config.analyticsEnabled
372
+ });
373
+ this.stats = this.createInitialStats();
374
+ }
375
+ createInitialStats() {
376
+ return {
377
+ entries: 0,
378
+ sizeBytes: 0,
379
+ hits: 0,
380
+ misses: 0,
381
+ hitRate: 0,
382
+ exactHits: 0,
383
+ semanticHits: 0,
384
+ avgSimilarity: 0,
385
+ avgLatencyMs: 0,
386
+ costSavingsUSD: 0,
387
+ tokensSaved: 0
388
+ };
389
+ }
390
+ /**
391
+ * Wrap an LLM call with caching
392
+ *
393
+ * @param request - The LLM request
394
+ * @param fn - Function to call on cache miss
395
+ * @param options - Cache options
396
+ * @returns The response (cached or fresh)
397
+ */
398
+ async wrap(request, fn, options) {
399
+ const startTime = performance.now();
400
+ if (options?.skipCache) {
401
+ const response = await fn(request);
402
+ return { ...response, _cache: { hit: false } };
403
+ }
404
+ const lookupResult = await this.get(request, options);
405
+ if (lookupResult.hit && lookupResult.entry && !options?.forceRefresh) {
406
+ this.emit("hit", lookupResult.entry, lookupResult.similarity ?? 1);
407
+ this.stats.hits++;
408
+ this.updateStats("hit", lookupResult);
409
+ if (this.config.analyticsEnabled) {
410
+ this.analytics.recordHit(
411
+ lookupResult.entry,
412
+ lookupResult.source === "exact" ? "exact" : "semantic",
413
+ lookupResult.latencyMs
414
+ );
415
+ }
416
+ return {
417
+ ...lookupResult.entry.response,
418
+ _cache: {
419
+ hit: true,
420
+ similarity: lookupResult.similarity
421
+ }
422
+ };
423
+ }
424
+ const key = generateCacheKey(request.model, request.messages, {
425
+ normalizeWhitespace: this.config.normalizeWhitespace
426
+ });
427
+ this.emit("miss", key, lookupResult.source);
428
+ this.stats.misses++;
429
+ if (this.config.analyticsEnabled) {
430
+ this.analytics.recordMiss(performance.now() - startTime);
431
+ }
432
+ try {
433
+ const response = await fn(request);
434
+ await this.set(request, response, options);
435
+ return { ...response, _cache: { hit: false } };
436
+ } catch (error) {
437
+ this.emit("error", error, "wrap");
438
+ throw error;
439
+ }
440
+ }
441
+ /**
442
+ * Get an entry from cache
443
+ *
444
+ * @param request - The request to look up
445
+ * @param options - Lookup options
446
+ * @returns The lookup result
447
+ */
448
+ async get(request, options) {
449
+ const startTime = performance.now();
450
+ try {
451
+ const result = await this.strategy.match(
452
+ {
453
+ model: request.model,
454
+ messages: request.messages,
455
+ temperature: request.temperature
456
+ },
457
+ this.store,
458
+ this.similarity,
459
+ {
460
+ threshold: this.config.similarityThreshold,
461
+ namespace: options?.namespace ?? this.config.namespace
462
+ }
463
+ );
464
+ if (result.hit && result.entry) {
465
+ if (isExpired(result.entry.metadata.createdAt, result.entry.metadata.ttl)) {
466
+ await this.store.delete(result.entry.key);
467
+ return {
468
+ hit: false,
469
+ latencyMs: performance.now() - startTime,
470
+ source: "miss"
471
+ };
472
+ }
473
+ }
474
+ return {
475
+ ...result,
476
+ latencyMs: performance.now() - startTime
477
+ };
478
+ } catch (error) {
479
+ this.emit("error", error, "get");
480
+ return {
481
+ hit: false,
482
+ latencyMs: performance.now() - startTime,
483
+ source: "miss"
484
+ };
485
+ }
486
+ }
487
+ /**
488
+ * Set an entry in cache
489
+ *
490
+ * @param request - The request
491
+ * @param response - The response to cache
492
+ * @param options - Cache options
493
+ */
494
+ async set(request, response, options) {
495
+ const namespace = options?.namespace ?? this.config.namespace;
496
+ const baseKey = generateCacheKey(request.model, request.messages, {
497
+ normalizeWhitespace: this.config.normalizeWhitespace
498
+ });
499
+ const tempSuffix = request.temperature !== void 0 ? `:t:${request.temperature}` : "";
500
+ const key = namespace && namespace !== "default" ? `${baseKey}${tempSuffix}:ns:${namespace}` : `${baseKey}${tempSuffix}`;
501
+ let embedding;
502
+ if (this.similarity && this.config.matchStrategy !== "exact") {
503
+ try {
504
+ const userMessage = extractUserMessage(request.messages);
505
+ if (userMessage) {
506
+ embedding = await this.similarity.embed(userMessage);
507
+ }
508
+ } catch (error) {
509
+ this.emit("error", error, "embedding");
510
+ }
511
+ }
512
+ const entry = {
513
+ id: generateId("entry"),
514
+ key,
515
+ embedding,
516
+ request: {
517
+ model: request.model,
518
+ messages: request.messages,
519
+ temperature: request.temperature,
520
+ maxTokens: request.maxTokens,
521
+ tools: request.tools
522
+ },
523
+ response: {
524
+ content: response.content,
525
+ model: response.model ?? request.model,
526
+ usage: response.usage ?? {
527
+ promptTokens: 0,
528
+ completionTokens: 0,
529
+ totalTokens: 0
530
+ },
531
+ finishReason: response.finishReason ?? "stop",
532
+ toolCalls: response.toolCalls
533
+ },
534
+ metadata: {
535
+ createdAt: now(),
536
+ accessedAt: now(),
537
+ accessCount: 0,
538
+ ttl: options?.ttl ?? this.config.defaultTTL,
539
+ hitCount: 0,
540
+ tags: options?.tags,
541
+ namespace,
542
+ userId: options?.userId,
543
+ agentId: options?.agentId
544
+ }
545
+ };
546
+ try {
547
+ await this.store.set(key, entry);
548
+ this.emit("set", entry);
549
+ this.stats.entries++;
550
+ if (this.config.analyticsEnabled) {
551
+ this.analytics.recordSet(entry);
552
+ }
553
+ } catch (error) {
554
+ this.emit("error", error, "set");
555
+ throw error;
556
+ }
557
+ }
558
+ /**
559
+ * Delete an entry from cache
560
+ *
561
+ * @param key - The cache key to delete
562
+ * @returns Whether the entry was deleted
563
+ */
564
+ async delete(key) {
565
+ const deleted = await this.store.delete(key);
566
+ if (deleted) {
567
+ this.emit("delete", key);
568
+ this.stats.entries = Math.max(0, this.stats.entries - 1);
569
+ }
570
+ return deleted;
571
+ }
572
+ /**
573
+ * Clear all entries from cache
574
+ */
575
+ async clear() {
576
+ await this.store.clear();
577
+ this.stats = this.createInitialStats();
578
+ this.analytics.reset();
579
+ }
580
+ /**
581
+ * Invalidate entries by pattern
582
+ *
583
+ * @param pattern - Regex pattern to match keys
584
+ * @returns Number of entries invalidated
585
+ */
586
+ async invalidateByPattern(pattern) {
587
+ const keys = await this.store.keys();
588
+ let count = 0;
589
+ for (const key of keys) {
590
+ if (pattern.test(key)) {
591
+ await this.store.delete(key);
592
+ count++;
593
+ }
594
+ }
595
+ this.stats.entries = Math.max(0, this.stats.entries - count);
596
+ return count;
597
+ }
598
+ /**
599
+ * Invalidate entries by tags
600
+ *
601
+ * @param tags - Tags to match
602
+ * @returns Number of entries invalidated
603
+ */
604
+ async invalidateByTags(tags) {
605
+ const keys = await this.store.keys();
606
+ let count = 0;
607
+ for (const key of keys) {
608
+ const entry = await this.store.get(key);
609
+ if (entry?.metadata.tags?.some((t) => tags.includes(t))) {
610
+ await this.store.delete(key);
611
+ count++;
612
+ }
613
+ }
614
+ this.stats.entries = Math.max(0, this.stats.entries - count);
615
+ return count;
616
+ }
617
+ /**
618
+ * Get cache statistics
619
+ */
620
+ getStats() {
621
+ this.updateHitRate();
622
+ return { ...this.stats };
623
+ }
624
+ /**
625
+ * Get analytics instance
626
+ */
627
+ getAnalytics() {
628
+ return this.analytics;
629
+ }
630
+ /**
631
+ * Get configuration
632
+ */
633
+ getConfig() {
634
+ return { ...this.config };
635
+ }
636
+ /**
637
+ * Check store health
638
+ */
639
+ async checkHealth() {
640
+ return this.store.checkHealth();
641
+ }
642
+ /**
643
+ * Close the cache and release resources
644
+ */
645
+ async close() {
646
+ await this.store.close();
647
+ this.removeAllListeners();
648
+ }
649
+ updateStats(type, result) {
650
+ if (type === "hit" && result) {
651
+ if (result.source === "exact") {
652
+ this.stats.exactHits++;
653
+ } else if (result.source === "semantic") {
654
+ this.stats.semanticHits++;
655
+ if (result.similarity) {
656
+ const total = this.stats.exactHits + this.stats.semanticHits;
657
+ this.stats.avgSimilarity = (this.stats.avgSimilarity * (total - 1) + result.similarity) / total;
658
+ }
659
+ }
660
+ if (result.entry?.response.usage) {
661
+ this.stats.tokensSaved += result.entry.response.usage.totalTokens;
662
+ this.stats.costSavingsUSD += result.entry.response.usage.totalTokens / 1e3 * 0.01;
663
+ }
664
+ }
665
+ this.updateHitRate();
666
+ }
667
+ updateHitRate() {
668
+ const total = this.stats.hits + this.stats.misses;
669
+ this.stats.hitRate = total > 0 ? this.stats.hits / total : 0;
670
+ }
671
+ };
672
+ function createSemanticCache(config, store, strategy, similarity) {
673
+ return new SemanticCache(config, store, strategy, similarity);
674
+ }
675
+
676
+ // src/stores/BaseCacheStore.ts
677
+ var BaseCacheStore = class {
678
+ /** Store configuration */
679
+ config;
680
+ /** Store metrics */
681
+ metrics = {
682
+ gets: 0,
683
+ sets: 0,
684
+ deletes: 0,
685
+ hits: 0,
686
+ misses: 0
687
+ };
688
+ constructor(config) {
689
+ this.config = {
690
+ namespace: config.namespace ?? "default",
691
+ ...config
692
+ };
693
+ }
694
+ /**
695
+ * Get the store namespace
696
+ */
697
+ get namespace() {
698
+ return this.config.namespace ?? "default";
699
+ }
700
+ /**
701
+ * Get store metrics
702
+ */
703
+ getMetrics() {
704
+ return { ...this.metrics };
705
+ }
706
+ /**
707
+ * Reset store metrics
708
+ */
709
+ resetMetrics() {
710
+ this.metrics = {
711
+ gets: 0,
712
+ sets: 0,
713
+ deletes: 0,
714
+ hits: 0,
715
+ misses: 0
716
+ };
717
+ }
718
+ /**
719
+ * Increment a metric counter
720
+ */
721
+ incrementMetric(metric, amount = 1) {
722
+ if (typeof this.metrics[metric] === "number") {
723
+ this.metrics[metric] += amount;
724
+ }
725
+ }
726
+ };
727
+ var DEFAULT_CONFIG3 = {
728
+ maxEntries: 1e4,
729
+ maxSizeBytes: 1024 * 1024 * 1024,
730
+ // 1GB
731
+ evictionPolicy: "lru"
732
+ };
733
+ var MemoryCacheStore = class extends BaseCacheStore {
734
+ storeType = "memory";
735
+ cache;
736
+ vectors = /* @__PURE__ */ new Map();
737
+ memoryConfig;
738
+ closed = false;
739
+ constructor(config = { type: "memory" }) {
740
+ super(config);
741
+ this.memoryConfig = { ...DEFAULT_CONFIG3, ...config };
742
+ this.cache = new LRUCache({
743
+ max: this.memoryConfig.maxEntries ?? 1e4,
744
+ maxSize: this.memoryConfig.maxSizeBytes ?? 1024 * 1024 * 1024,
745
+ sizeCalculation: (entry) => estimateEntrySize(entry),
746
+ ttl: 0,
747
+ // TTL handled per-entry
748
+ updateAgeOnGet: true,
749
+ allowStale: false
750
+ });
751
+ }
752
+ get(key) {
753
+ this.incrementMetric("gets");
754
+ const entry = this.cache.get(key);
755
+ if (entry) {
756
+ this.incrementMetric("hits");
757
+ entry.metadata.accessedAt = now();
758
+ entry.metadata.accessCount++;
759
+ return Promise.resolve(entry);
760
+ }
761
+ this.incrementMetric("misses");
762
+ return Promise.resolve(void 0);
763
+ }
764
+ set(key, entry) {
765
+ const startTime = performance.now();
766
+ this.incrementMetric("sets");
767
+ const ttlMs = entry.metadata.ttl > 0 ? entry.metadata.ttl * 1e3 : void 0;
768
+ this.cache.set(key, entry, { ttl: ttlMs });
769
+ if (entry.embedding && entry.embedding.length > 0) {
770
+ this.vectors.set(key, {
771
+ id: entry.id,
772
+ vector: entry.embedding
773
+ });
774
+ }
775
+ return Promise.resolve({
776
+ success: true,
777
+ id: entry.id,
778
+ durationMs: performance.now() - startTime
779
+ });
780
+ }
781
+ has(key) {
782
+ return Promise.resolve(this.cache.has(key));
783
+ }
784
+ delete(key) {
785
+ this.incrementMetric("deletes");
786
+ const existed = this.cache.has(key);
787
+ this.cache.delete(key);
788
+ this.vectors.delete(key);
789
+ return Promise.resolve(existed);
790
+ }
791
+ clear() {
792
+ this.cache.clear();
793
+ this.vectors.clear();
794
+ return Promise.resolve();
795
+ }
796
+ size() {
797
+ return Promise.resolve(this.cache.size);
798
+ }
799
+ keys() {
800
+ return Promise.resolve(Array.from(this.cache.keys()));
801
+ }
802
+ query(vector, options) {
803
+ const startTime = performance.now();
804
+ const topK = options?.topK ?? 10;
805
+ const minSimilarity = options?.minSimilarity ?? 0;
806
+ const results = [];
807
+ for (const [key, stored] of this.vectors) {
808
+ const entry = this.cache.get(key);
809
+ if (!entry) continue;
810
+ if (options?.namespace && entry.metadata.namespace !== options.namespace) {
811
+ continue;
812
+ }
813
+ const similarity = this.cosineSimilarity(vector, stored.vector);
814
+ if (similarity >= minSimilarity) {
815
+ results.push({ ...entry, score: similarity });
816
+ }
817
+ }
818
+ results.sort((a, b) => b.score - a.score);
819
+ return Promise.resolve({
820
+ entries: results.slice(0, topK),
821
+ durationMs: performance.now() - startTime
822
+ });
823
+ }
824
+ checkHealth() {
825
+ return Promise.resolve({
826
+ healthy: !this.closed,
827
+ latencyMs: 0,
828
+ lastCheck: now(),
829
+ error: this.closed ? "Store is closed" : void 0
830
+ });
831
+ }
832
+ close() {
833
+ this.closed = true;
834
+ this.cache.clear();
835
+ this.vectors.clear();
836
+ return Promise.resolve();
837
+ }
838
+ /**
839
+ * Compute cosine similarity between two vectors
840
+ */
841
+ cosineSimilarity(a, b) {
842
+ if (a.length !== b.length) return 0;
843
+ let dotProduct2 = 0;
844
+ let normA = 0;
845
+ let normB = 0;
846
+ for (let i = 0; i < a.length; i++) {
847
+ dotProduct2 += a[i] * b[i];
848
+ normA += a[i] * a[i];
849
+ normB += b[i] * b[i];
850
+ }
851
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
852
+ if (denominator === 0) return 0;
853
+ return dotProduct2 / denominator;
854
+ }
855
+ /**
856
+ * Get memory usage information
857
+ */
858
+ getMemoryInfo() {
859
+ return {
860
+ entries: this.cache.size,
861
+ calculatedSize: this.cache.calculatedSize ?? 0,
862
+ maxSize: this.memoryConfig.maxSizeBytes ?? 0,
863
+ vectorCount: this.vectors.size
864
+ };
865
+ }
866
+ /**
867
+ * Prune expired entries
868
+ */
869
+ prune() {
870
+ this.cache.purgeStale();
871
+ let pruned = 0;
872
+ for (const key of this.vectors.keys()) {
873
+ if (!this.cache.has(key)) {
874
+ this.vectors.delete(key);
875
+ pruned++;
876
+ }
877
+ }
878
+ return Promise.resolve(pruned);
879
+ }
880
+ };
881
+ function createMemoryCacheStore(config) {
882
+ return new MemoryCacheStore({ type: "memory", ...config });
883
+ }
884
+
885
+ // src/similarity/metrics/SimilarityMetrics.ts
886
+ function cosineSimilarity(a, b) {
887
+ if (a.length !== b.length) return 0;
888
+ let dotProduct2 = 0;
889
+ let normA = 0;
890
+ let normB = 0;
891
+ for (let i = 0; i < a.length; i++) {
892
+ dotProduct2 += a[i] * b[i];
893
+ normA += a[i] * a[i];
894
+ normB += b[i] * b[i];
895
+ }
896
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
897
+ if (denominator === 0) return 0;
898
+ return dotProduct2 / denominator;
899
+ }
900
+ function euclideanDistance(a, b) {
901
+ if (a.length !== b.length) return Infinity;
902
+ let sum = 0;
903
+ for (let i = 0; i < a.length; i++) {
904
+ const diff = a[i] - b[i];
905
+ sum += diff * diff;
906
+ }
907
+ return Math.sqrt(sum);
908
+ }
909
+ function dotProduct(a, b) {
910
+ if (a.length !== b.length) return 0;
911
+ let sum = 0;
912
+ for (let i = 0; i < a.length; i++) {
913
+ sum += a[i] * b[i];
914
+ }
915
+ return sum;
916
+ }
917
+ function manhattanDistance(a, b) {
918
+ if (a.length !== b.length) return Infinity;
919
+ let sum = 0;
920
+ for (let i = 0; i < a.length; i++) {
921
+ sum += Math.abs(a[i] - b[i]);
922
+ }
923
+ return sum;
924
+ }
925
+ function distanceToSimilarity(distance) {
926
+ return 1 / (1 + distance);
927
+ }
928
+ function normalize(vector) {
929
+ const magnitude2 = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
930
+ if (magnitude2 === 0) return vector;
931
+ return vector.map((v) => v / magnitude2);
932
+ }
933
+ function magnitude(vector) {
934
+ return Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
935
+ }
936
+
937
+ // src/stores/RedisCacheStore.ts
938
+ var DEFAULT_CONFIG4 = {
939
+ host: "localhost",
940
+ port: 6379,
941
+ db: 0,
942
+ keyPrefix: "llm-cache",
943
+ connectTimeout: 1e4
944
+ };
945
+ var RedisCacheStore = class extends BaseCacheStore {
946
+ storeType = "redis";
947
+ client = null;
948
+ redisConfig;
949
+ connected = false;
950
+ constructor(config) {
951
+ super(config);
952
+ this.redisConfig = { ...DEFAULT_CONFIG4, ...config };
953
+ }
954
+ /**
955
+ * Connect to Redis
956
+ */
957
+ async connect() {
958
+ if (this.connected) return;
959
+ try {
960
+ const { Redis } = await import('ioredis');
961
+ if (this.redisConfig.url) {
962
+ this.client = new Redis(this.redisConfig.url, {
963
+ connectTimeout: this.redisConfig.connectTimeout ?? 1e4,
964
+ lazyConnect: false,
965
+ tls: this.redisConfig.tls ? {} : void 0
966
+ });
967
+ } else {
968
+ this.client = new Redis({
969
+ host: this.redisConfig.host ?? "localhost",
970
+ port: this.redisConfig.port ?? 6379,
971
+ password: this.redisConfig.password,
972
+ db: this.redisConfig.db ?? 0,
973
+ connectTimeout: this.redisConfig.connectTimeout ?? 1e4,
974
+ tls: this.redisConfig.tls ? {} : void 0
975
+ });
976
+ }
977
+ await this.client.ping();
978
+ this.connected = true;
979
+ } catch (error) {
980
+ throw new Error(
981
+ `Failed to connect to Redis: ${error.message}`
982
+ );
983
+ }
984
+ }
985
+ async ensureConnected() {
986
+ if (!this.connected || !this.client) {
987
+ await this.connect();
988
+ }
989
+ if (!this.client) {
990
+ throw new Error("Redis client not initialized");
991
+ }
992
+ return this.client;
993
+ }
994
+ prefixKey(key) {
995
+ const prefix = this.redisConfig.keyPrefix ?? "llm-cache";
996
+ return `${prefix}:${this.namespace}:${key}`;
997
+ }
998
+ async get(key) {
999
+ this.incrementMetric("gets");
1000
+ const client = await this.ensureConnected();
1001
+ const data = await client.get(this.prefixKey(key));
1002
+ if (!data) {
1003
+ this.incrementMetric("misses");
1004
+ return void 0;
1005
+ }
1006
+ this.incrementMetric("hits");
1007
+ try {
1008
+ const entry = JSON.parse(data);
1009
+ entry.metadata.accessedAt = now();
1010
+ entry.metadata.accessCount++;
1011
+ client.set(this.prefixKey(key), JSON.stringify(entry)).catch(() => {
1012
+ });
1013
+ return entry;
1014
+ } catch {
1015
+ return void 0;
1016
+ }
1017
+ }
1018
+ async set(key, entry) {
1019
+ const startTime = performance.now();
1020
+ this.incrementMetric("sets");
1021
+ const client = await this.ensureConnected();
1022
+ await client.set(this.prefixKey(key), JSON.stringify(entry));
1023
+ if (entry.metadata.ttl > 0) {
1024
+ await client.expire(this.prefixKey(key), entry.metadata.ttl);
1025
+ }
1026
+ return {
1027
+ success: true,
1028
+ id: entry.id,
1029
+ durationMs: performance.now() - startTime
1030
+ };
1031
+ }
1032
+ async has(key) {
1033
+ const client = await this.ensureConnected();
1034
+ return await client.exists(this.prefixKey(key)) > 0;
1035
+ }
1036
+ async delete(key) {
1037
+ this.incrementMetric("deletes");
1038
+ const client = await this.ensureConnected();
1039
+ return await client.del(this.prefixKey(key)) > 0;
1040
+ }
1041
+ async clear() {
1042
+ const client = await this.ensureConnected();
1043
+ const pattern = this.prefixKey("*");
1044
+ const keys = await client.keys(pattern);
1045
+ if (keys.length > 0) {
1046
+ await client.del(...keys);
1047
+ }
1048
+ }
1049
+ async size() {
1050
+ const client = await this.ensureConnected();
1051
+ const keys = await client.keys(this.prefixKey("*"));
1052
+ return keys.length;
1053
+ }
1054
+ async keys() {
1055
+ const client = await this.ensureConnected();
1056
+ const keys = await client.keys(this.prefixKey("*"));
1057
+ const prefix = this.prefixKey("");
1058
+ return keys.map((k) => k.slice(prefix.length));
1059
+ }
1060
+ async query(vector, options) {
1061
+ const startTime = performance.now();
1062
+ const allKeys = await this.keys();
1063
+ const entries = [];
1064
+ const keysToProcess = allKeys.slice(0, 1e3);
1065
+ for (const key of keysToProcess) {
1066
+ const entry = await this.get(key);
1067
+ if (entry?.embedding) {
1068
+ const score = cosineSimilarity(vector, entry.embedding);
1069
+ if (score >= (options?.minSimilarity ?? 0)) {
1070
+ if (options?.namespace && entry.metadata.namespace !== options.namespace) {
1071
+ continue;
1072
+ }
1073
+ entries.push({ ...entry, score });
1074
+ }
1075
+ }
1076
+ }
1077
+ entries.sort((a, b) => b.score - a.score);
1078
+ return {
1079
+ entries: entries.slice(0, options?.topK ?? 10),
1080
+ durationMs: performance.now() - startTime
1081
+ };
1082
+ }
1083
+ async checkHealth() {
1084
+ const startTime = performance.now();
1085
+ try {
1086
+ const client = await this.ensureConnected();
1087
+ await client.ping();
1088
+ return {
1089
+ healthy: true,
1090
+ latencyMs: performance.now() - startTime,
1091
+ lastCheck: now()
1092
+ };
1093
+ } catch (error) {
1094
+ return {
1095
+ healthy: false,
1096
+ latencyMs: performance.now() - startTime,
1097
+ lastCheck: now(),
1098
+ error: error.message
1099
+ };
1100
+ }
1101
+ }
1102
+ async close() {
1103
+ if (this.client) {
1104
+ await this.client.quit();
1105
+ this.client = null;
1106
+ this.connected = false;
1107
+ }
1108
+ }
1109
+ /**
1110
+ * Check if connected to Redis
1111
+ */
1112
+ isConnected() {
1113
+ return this.connected;
1114
+ }
1115
+ };
1116
+ function createRedisCacheStore(config) {
1117
+ return new RedisCacheStore(config);
1118
+ }
1119
+
1120
+ // src/stores/SQLiteCacheStore.ts
1121
+ var DEFAULT_CONFIG5 = {
1122
+ dbPath: "cache.db",
1123
+ inMemory: false,
1124
+ enableVector: false
1125
+ };
1126
+ var SQLiteCacheStore = class extends BaseCacheStore {
1127
+ storeType = "sqlite";
1128
+ db = null;
1129
+ sqliteConfig;
1130
+ initialized = false;
1131
+ constructor(config) {
1132
+ super(config);
1133
+ this.sqliteConfig = { ...DEFAULT_CONFIG5, ...config };
1134
+ }
1135
+ /**
1136
+ * Initialize the database
1137
+ */
1138
+ async init() {
1139
+ if (this.initialized) return;
1140
+ try {
1141
+ const BetterSqlite3 = (await import('better-sqlite3')).default;
1142
+ const db = new BetterSqlite3(
1143
+ this.sqliteConfig.inMemory ? ":memory:" : this.sqliteConfig.dbPath ?? "cache.db"
1144
+ );
1145
+ this.db = db;
1146
+ db.exec(`
1147
+ CREATE TABLE IF NOT EXISTS cache_entries (
1148
+ key TEXT PRIMARY KEY,
1149
+ id TEXT NOT NULL,
1150
+ data TEXT NOT NULL,
1151
+ embedding BLOB,
1152
+ model TEXT NOT NULL,
1153
+ namespace TEXT,
1154
+ created_at INTEGER NOT NULL,
1155
+ accessed_at INTEGER NOT NULL,
1156
+ ttl INTEGER DEFAULT 0
1157
+ );
1158
+
1159
+ CREATE INDEX IF NOT EXISTS idx_namespace ON cache_entries(namespace);
1160
+ CREATE INDEX IF NOT EXISTS idx_model ON cache_entries(model);
1161
+ CREATE INDEX IF NOT EXISTS idx_created_at ON cache_entries(created_at);
1162
+ CREATE INDEX IF NOT EXISTS idx_accessed_at ON cache_entries(accessed_at);
1163
+ `);
1164
+ this.initialized = true;
1165
+ } catch (error) {
1166
+ throw new Error(
1167
+ `Failed to initialize SQLite database: ${error.message}`
1168
+ );
1169
+ }
1170
+ }
1171
+ ensureInitialized() {
1172
+ if (!this.initialized || !this.db) {
1173
+ throw new Error("SQLite store not initialized. Call init() first.");
1174
+ }
1175
+ return this.db;
1176
+ }
1177
+ get(key) {
1178
+ this.incrementMetric("gets");
1179
+ const db = this.ensureInitialized();
1180
+ const row = db.prepare("SELECT data FROM cache_entries WHERE key = ?").get(key);
1181
+ if (!row) {
1182
+ this.incrementMetric("misses");
1183
+ return Promise.resolve(void 0);
1184
+ }
1185
+ this.incrementMetric("hits");
1186
+ try {
1187
+ const entry = JSON.parse(row.data);
1188
+ entry.metadata.accessedAt = now();
1189
+ entry.metadata.accessCount++;
1190
+ db.prepare(
1191
+ "UPDATE cache_entries SET accessed_at = ?, data = ? WHERE key = ?"
1192
+ ).run(now(), JSON.stringify(entry), key);
1193
+ return Promise.resolve(entry);
1194
+ } catch {
1195
+ return Promise.resolve(void 0);
1196
+ }
1197
+ }
1198
+ set(key, entry) {
1199
+ const startTime = performance.now();
1200
+ this.incrementMetric("sets");
1201
+ const db = this.ensureInitialized();
1202
+ const embedding = entry.embedding ? Buffer.from(new Float32Array(entry.embedding).buffer) : null;
1203
+ db.prepare(
1204
+ `
1205
+ INSERT OR REPLACE INTO cache_entries
1206
+ (key, id, data, embedding, model, namespace, created_at, accessed_at, ttl)
1207
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
1208
+ `
1209
+ ).run(
1210
+ key,
1211
+ entry.id,
1212
+ JSON.stringify(entry),
1213
+ embedding,
1214
+ entry.request.model,
1215
+ entry.metadata.namespace ?? null,
1216
+ entry.metadata.createdAt,
1217
+ entry.metadata.accessedAt,
1218
+ entry.metadata.ttl
1219
+ );
1220
+ return Promise.resolve({
1221
+ success: true,
1222
+ id: entry.id,
1223
+ durationMs: performance.now() - startTime
1224
+ });
1225
+ }
1226
+ has(key) {
1227
+ const db = this.ensureInitialized();
1228
+ const row = db.prepare("SELECT 1 FROM cache_entries WHERE key = ?").get(key);
1229
+ return Promise.resolve(!!row);
1230
+ }
1231
+ delete(key) {
1232
+ this.incrementMetric("deletes");
1233
+ const db = this.ensureInitialized();
1234
+ const result = db.prepare("DELETE FROM cache_entries WHERE key = ?").run(key);
1235
+ return Promise.resolve(result.changes > 0);
1236
+ }
1237
+ clear() {
1238
+ const db = this.ensureInitialized();
1239
+ if (this.namespace === "default") {
1240
+ db.prepare("DELETE FROM cache_entries").run();
1241
+ } else {
1242
+ db.prepare("DELETE FROM cache_entries WHERE namespace = ?").run(
1243
+ this.namespace
1244
+ );
1245
+ }
1246
+ return Promise.resolve();
1247
+ }
1248
+ size() {
1249
+ const db = this.ensureInitialized();
1250
+ const row = db.prepare("SELECT COUNT(*) as count FROM cache_entries").get();
1251
+ return Promise.resolve(row.count);
1252
+ }
1253
+ keys() {
1254
+ const db = this.ensureInitialized();
1255
+ const rows = db.prepare("SELECT key FROM cache_entries").all();
1256
+ return Promise.resolve(rows.map((r) => r.key));
1257
+ }
1258
+ query(vector, options) {
1259
+ const startTime = performance.now();
1260
+ const db = this.ensureInitialized();
1261
+ let sql = `
1262
+ SELECT key, data, embedding FROM cache_entries
1263
+ WHERE embedding IS NOT NULL
1264
+ `;
1265
+ const params = [];
1266
+ if (options?.namespace) {
1267
+ sql += " AND namespace = ?";
1268
+ params.push(options.namespace);
1269
+ }
1270
+ const rows = db.prepare(sql).all(...params);
1271
+ const results = [];
1272
+ for (const row of rows) {
1273
+ const stored = new Float32Array(
1274
+ row.embedding.buffer,
1275
+ row.embedding.byteOffset,
1276
+ row.embedding.length / 4
1277
+ );
1278
+ const similarity = cosineSimilarity(vector, Array.from(stored));
1279
+ if (similarity >= (options?.minSimilarity ?? 0)) {
1280
+ try {
1281
+ const entry = JSON.parse(row.data);
1282
+ results.push({ ...entry, score: similarity });
1283
+ } catch {
1284
+ }
1285
+ }
1286
+ }
1287
+ results.sort((a, b) => b.score - a.score);
1288
+ return Promise.resolve({
1289
+ entries: results.slice(0, options?.topK ?? 10),
1290
+ durationMs: performance.now() - startTime
1291
+ });
1292
+ }
1293
+ checkHealth() {
1294
+ const startTime = performance.now();
1295
+ try {
1296
+ this.ensureInitialized();
1297
+ return Promise.resolve({
1298
+ healthy: true,
1299
+ latencyMs: performance.now() - startTime,
1300
+ lastCheck: now()
1301
+ });
1302
+ } catch (error) {
1303
+ return Promise.resolve({
1304
+ healthy: false,
1305
+ latencyMs: performance.now() - startTime,
1306
+ lastCheck: now(),
1307
+ error: error.message
1308
+ });
1309
+ }
1310
+ }
1311
+ close() {
1312
+ if (this.db) {
1313
+ this.db.close();
1314
+ this.db = null;
1315
+ this.initialized = false;
1316
+ }
1317
+ return Promise.resolve();
1318
+ }
1319
+ /**
1320
+ * Prune expired entries
1321
+ */
1322
+ pruneExpired() {
1323
+ const db = this.ensureInitialized();
1324
+ const currentTime = now();
1325
+ const result = db.prepare(
1326
+ `
1327
+ DELETE FROM cache_entries
1328
+ WHERE ttl > 0 AND (created_at + (ttl * 1000)) < ?
1329
+ `
1330
+ ).run(currentTime);
1331
+ return Promise.resolve(result.changes);
1332
+ }
1333
+ /**
1334
+ * Get database file size (for non-memory databases)
1335
+ */
1336
+ async getDbSize() {
1337
+ if (this.sqliteConfig.inMemory) return null;
1338
+ try {
1339
+ const { statSync } = await import('fs');
1340
+ const stats = statSync(this.sqliteConfig.dbPath ?? "cache.db");
1341
+ return stats.size;
1342
+ } catch {
1343
+ return null;
1344
+ }
1345
+ }
1346
+ /**
1347
+ * Check if database is initialized
1348
+ */
1349
+ isInitialized() {
1350
+ return this.initialized;
1351
+ }
1352
+ };
1353
+ function createSQLiteCacheStore(config) {
1354
+ return new SQLiteCacheStore(config);
1355
+ }
1356
+
1357
+ // src/stores/TieredCacheStore.ts
1358
+ var TieredCacheStore = class extends BaseCacheStore {
1359
+ storeType = "tiered";
1360
+ tiers;
1361
+ accessCounts = /* @__PURE__ */ new Map();
1362
+ constructor(config) {
1363
+ super(config);
1364
+ const validTiers = config.tiers.filter(
1365
+ (t) => t.store !== void 0
1366
+ );
1367
+ if (validTiers.length === 0) {
1368
+ throw new Error(
1369
+ "TieredCacheStore requires at least one tier with a store"
1370
+ );
1371
+ }
1372
+ this.tiers = validTiers.sort((a, b) => a.priority - b.priority);
1373
+ }
1374
+ async get(key) {
1375
+ this.incrementMetric("gets");
1376
+ for (let i = 0; i < this.tiers.length; i++) {
1377
+ const tier = this.tiers[i];
1378
+ const entry = await tier.store.get(key);
1379
+ if (entry) {
1380
+ this.incrementMetric("hits");
1381
+ const accessCount = (this.accessCounts.get(key) ?? 0) + 1;
1382
+ this.accessCounts.set(key, accessCount);
1383
+ if (i > 0) {
1384
+ await this.checkPromotion(key, entry, i, accessCount);
1385
+ }
1386
+ return entry;
1387
+ }
1388
+ }
1389
+ this.incrementMetric("misses");
1390
+ return void 0;
1391
+ }
1392
+ async set(key, entry) {
1393
+ const startTime = performance.now();
1394
+ this.incrementMetric("sets");
1395
+ const result = await this.tiers[0].store.set(key, entry);
1396
+ this.accessCounts.set(key, 0);
1397
+ await this.checkDemotion(0);
1398
+ return {
1399
+ ...result,
1400
+ durationMs: performance.now() - startTime
1401
+ };
1402
+ }
1403
+ async has(key) {
1404
+ for (const tier of this.tiers) {
1405
+ if (await tier.store.has(key)) {
1406
+ return true;
1407
+ }
1408
+ }
1409
+ return false;
1410
+ }
1411
+ async delete(key) {
1412
+ this.incrementMetric("deletes");
1413
+ let deleted = false;
1414
+ for (const tier of this.tiers) {
1415
+ if (await tier.store.delete(key)) {
1416
+ deleted = true;
1417
+ }
1418
+ }
1419
+ this.accessCounts.delete(key);
1420
+ return deleted;
1421
+ }
1422
+ async clear() {
1423
+ for (const tier of this.tiers) {
1424
+ await tier.store.clear();
1425
+ }
1426
+ this.accessCounts.clear();
1427
+ }
1428
+ async size() {
1429
+ const allKeys = /* @__PURE__ */ new Set();
1430
+ for (const tier of this.tiers) {
1431
+ const keys = await tier.store.keys();
1432
+ keys.forEach((k) => allKeys.add(k));
1433
+ }
1434
+ return allKeys.size;
1435
+ }
1436
+ async keys() {
1437
+ const allKeys = /* @__PURE__ */ new Set();
1438
+ for (const tier of this.tiers) {
1439
+ const keys = await tier.store.keys();
1440
+ keys.forEach((k) => allKeys.add(k));
1441
+ }
1442
+ return Array.from(allKeys);
1443
+ }
1444
+ async query(vector, options) {
1445
+ const startTime = performance.now();
1446
+ const entriesMap = /* @__PURE__ */ new Map();
1447
+ for (const tier of this.tiers) {
1448
+ const result = await tier.store.query(vector, options);
1449
+ for (const entry of result.entries) {
1450
+ const existing = entriesMap.get(entry.key);
1451
+ if (!existing || entry.score > existing.score) {
1452
+ entriesMap.set(entry.key, entry);
1453
+ }
1454
+ }
1455
+ }
1456
+ const entries = Array.from(entriesMap.values()).sort((a, b) => b.score - a.score).slice(0, options?.topK ?? 10);
1457
+ return {
1458
+ entries,
1459
+ durationMs: performance.now() - startTime
1460
+ };
1461
+ }
1462
+ async checkHealth() {
1463
+ const startTime = performance.now();
1464
+ const tierHealths = [];
1465
+ for (const tier of this.tiers) {
1466
+ const health = await tier.store.checkHealth();
1467
+ tierHealths.push({ name: tier.name, healthy: health.healthy });
1468
+ }
1469
+ const allHealthy = tierHealths.every((t) => t.healthy);
1470
+ return {
1471
+ healthy: allHealthy,
1472
+ latencyMs: performance.now() - startTime,
1473
+ lastCheck: now(),
1474
+ error: allHealthy ? void 0 : `Unhealthy tiers: ${tierHealths.filter((t) => !t.healthy).map((t) => t.name).join(", ")}`
1475
+ };
1476
+ }
1477
+ async close() {
1478
+ for (const tier of this.tiers) {
1479
+ await tier.store.close();
1480
+ }
1481
+ }
1482
+ /**
1483
+ * Get tier statistics
1484
+ */
1485
+ async getTierStats() {
1486
+ const stats = [];
1487
+ for (const tier of this.tiers) {
1488
+ stats.push({
1489
+ name: tier.name,
1490
+ priority: tier.priority,
1491
+ size: await tier.store.size(),
1492
+ maxSize: tier.maxSize
1493
+ });
1494
+ }
1495
+ return stats;
1496
+ }
1497
+ /**
1498
+ * Manually promote an entry to a higher tier
1499
+ */
1500
+ async promote(key, targetTierIndex = 0) {
1501
+ for (let i = targetTierIndex + 1; i < this.tiers.length; i++) {
1502
+ const entry = await this.tiers[i].store.get(key);
1503
+ if (entry) {
1504
+ await this.tiers[targetTierIndex].store.set(key, entry);
1505
+ await this.tiers[i].store.delete(key);
1506
+ return true;
1507
+ }
1508
+ }
1509
+ return false;
1510
+ }
1511
+ /**
1512
+ * Manually demote an entry to a lower tier
1513
+ */
1514
+ async demote(key, targetTierIndex) {
1515
+ for (let i = 0; i < this.tiers.length - 1; i++) {
1516
+ const entry = await this.tiers[i].store.get(key);
1517
+ if (entry) {
1518
+ const target = targetTierIndex ?? i + 1;
1519
+ if (target >= this.tiers.length) return false;
1520
+ await this.tiers[target].store.set(key, entry);
1521
+ await this.tiers[i].store.delete(key);
1522
+ return true;
1523
+ }
1524
+ }
1525
+ return false;
1526
+ }
1527
+ async checkPromotion(key, entry, currentTierIndex, accessCount) {
1528
+ for (let i = currentTierIndex - 1; i >= 0; i--) {
1529
+ const tier = this.tiers[i];
1530
+ const threshold = tier.promotionThreshold ?? 3;
1531
+ if (accessCount >= threshold) {
1532
+ await tier.store.set(key, entry);
1533
+ await this.tiers[currentTierIndex].store.delete(key);
1534
+ break;
1535
+ }
1536
+ }
1537
+ }
1538
+ async checkDemotion(tierIndex) {
1539
+ const tier = this.tiers[tierIndex];
1540
+ if (!tier.maxSize) return;
1541
+ const size = await tier.store.size();
1542
+ if (size <= tier.maxSize) return;
1543
+ const demotionTarget = tier.demotionTarget ?? 0.9;
1544
+ const targetSize = Math.floor(tier.maxSize * demotionTarget);
1545
+ const toRemove = size - targetSize;
1546
+ if (toRemove <= 0) return;
1547
+ const keys = await tier.store.keys();
1548
+ const keysByAccess = keys.map((k) => ({ key: k, count: this.accessCounts.get(k) ?? 0 })).sort((a, b) => a.count - b.count);
1549
+ const nextTierIndex = tierIndex + 1;
1550
+ if (nextTierIndex >= this.tiers.length) {
1551
+ for (let i = 0; i < toRemove && i < keysByAccess.length; i++) {
1552
+ await tier.store.delete(keysByAccess[i].key);
1553
+ this.accessCounts.delete(keysByAccess[i].key);
1554
+ }
1555
+ } else {
1556
+ for (let i = 0; i < toRemove && i < keysByAccess.length; i++) {
1557
+ const key = keysByAccess[i].key;
1558
+ const entry = await tier.store.get(key);
1559
+ if (entry) {
1560
+ await this.tiers[nextTierIndex].store.set(key, entry);
1561
+ await tier.store.delete(key);
1562
+ }
1563
+ }
1564
+ }
1565
+ }
1566
+ };
1567
+ function createTieredCacheStore(config) {
1568
+ return new TieredCacheStore(config);
1569
+ }
1570
+
1571
+ // src/stores/PineconeCacheStore.ts
1572
+ var PineconeCacheStore = class extends BaseCacheStore {
1573
+ storeType = "pinecone";
1574
+ client = null;
1575
+ index = null;
1576
+ ns = null;
1577
+ pineconeConfig;
1578
+ connected = false;
1579
+ constructor(config) {
1580
+ super(config);
1581
+ this.pineconeConfig = config;
1582
+ }
1583
+ /**
1584
+ * Connect to Pinecone
1585
+ */
1586
+ async connect() {
1587
+ if (this.connected) return;
1588
+ try {
1589
+ const { Pinecone } = await import('@pinecone-database/pinecone');
1590
+ this.client = new Pinecone({
1591
+ apiKey: this.pineconeConfig.apiKey
1592
+ });
1593
+ this.index = this.client.Index(this.pineconeConfig.index);
1594
+ this.ns = this.index.namespace(this.namespace);
1595
+ this.connected = true;
1596
+ } catch (error) {
1597
+ throw new Error(
1598
+ `Failed to connect to Pinecone: ${error.message}`
1599
+ );
1600
+ }
1601
+ }
1602
+ async ensureConnected() {
1603
+ if (!this.connected || !this.ns) {
1604
+ await this.connect();
1605
+ }
1606
+ if (!this.ns) {
1607
+ throw new Error("Pinecone namespace not initialized");
1608
+ }
1609
+ return this.ns;
1610
+ }
1611
+ async get(key) {
1612
+ this.incrementMetric("gets");
1613
+ const ns = await this.ensureConnected();
1614
+ try {
1615
+ const result = await ns.fetch([key]);
1616
+ if (!result.records[key]) {
1617
+ this.incrementMetric("misses");
1618
+ return void 0;
1619
+ }
1620
+ this.incrementMetric("hits");
1621
+ const record = result.records[key];
1622
+ const metadata = record.metadata;
1623
+ const entry = JSON.parse(metadata.entryData);
1624
+ entry.metadata.accessedAt = now();
1625
+ entry.metadata.accessCount++;
1626
+ this.updateAccessMetadata(key, record.values, metadata).catch(() => {
1627
+ });
1628
+ return entry;
1629
+ } catch {
1630
+ this.incrementMetric("misses");
1631
+ return void 0;
1632
+ }
1633
+ }
1634
+ async updateAccessMetadata(key, values, metadata) {
1635
+ const ns = await this.ensureConnected();
1636
+ const updatedEntry = JSON.parse(metadata.entryData);
1637
+ updatedEntry.metadata.accessedAt = now();
1638
+ updatedEntry.metadata.accessCount++;
1639
+ await ns.upsert([
1640
+ {
1641
+ id: key,
1642
+ values,
1643
+ metadata: {
1644
+ ...metadata,
1645
+ accessedAt: now(),
1646
+ accessCount: metadata.accessCount + 1,
1647
+ entryData: JSON.stringify(updatedEntry)
1648
+ }
1649
+ }
1650
+ ]);
1651
+ }
1652
+ async set(key, entry) {
1653
+ const startTime = performance.now();
1654
+ this.incrementMetric("sets");
1655
+ const ns = await this.ensureConnected();
1656
+ if (!entry.embedding || entry.embedding.length === 0) {
1657
+ return {
1658
+ success: false,
1659
+ id: entry.id,
1660
+ durationMs: performance.now() - startTime
1661
+ };
1662
+ }
1663
+ const metadata = {
1664
+ key,
1665
+ model: entry.request.model,
1666
+ content: entry.response.content.substring(0, 3e4),
1667
+ // Pinecone metadata limit
1668
+ createdAt: entry.metadata.createdAt,
1669
+ accessedAt: entry.metadata.accessedAt,
1670
+ accessCount: entry.metadata.accessCount,
1671
+ hitCount: entry.metadata.hitCount,
1672
+ ttl: entry.metadata.ttl,
1673
+ namespace: entry.metadata.namespace ?? this.namespace,
1674
+ tags: entry.metadata.tags ?? [],
1675
+ entryData: JSON.stringify(entry)
1676
+ };
1677
+ try {
1678
+ await ns.upsert([
1679
+ {
1680
+ id: key,
1681
+ values: entry.embedding,
1682
+ metadata
1683
+ }
1684
+ ]);
1685
+ return {
1686
+ success: true,
1687
+ id: entry.id,
1688
+ durationMs: performance.now() - startTime
1689
+ };
1690
+ } catch (error) {
1691
+ return {
1692
+ success: false,
1693
+ id: entry.id,
1694
+ durationMs: performance.now() - startTime
1695
+ };
1696
+ }
1697
+ }
1698
+ async has(key) {
1699
+ const ns = await this.ensureConnected();
1700
+ try {
1701
+ const result = await ns.fetch([key]);
1702
+ return !!result.records[key];
1703
+ } catch {
1704
+ return false;
1705
+ }
1706
+ }
1707
+ async delete(key) {
1708
+ this.incrementMetric("deletes");
1709
+ const ns = await this.ensureConnected();
1710
+ try {
1711
+ await ns.deleteOne(key);
1712
+ return true;
1713
+ } catch {
1714
+ return false;
1715
+ }
1716
+ }
1717
+ async clear() {
1718
+ const ns = await this.ensureConnected();
1719
+ await ns.deleteAll();
1720
+ }
1721
+ async size() {
1722
+ if (!this.index) {
1723
+ await this.connect();
1724
+ }
1725
+ try {
1726
+ const stats = await this.index.describeIndexStats();
1727
+ return stats.namespaces[this.namespace]?.recordCount ?? 0;
1728
+ } catch {
1729
+ return 0;
1730
+ }
1731
+ }
1732
+ async keys() {
1733
+ const ns = await this.ensureConnected();
1734
+ try {
1735
+ const result = await ns.listPaginated({ limit: 1e4 });
1736
+ return result.vectors.map((v) => v.id);
1737
+ } catch {
1738
+ return [];
1739
+ }
1740
+ }
1741
+ async query(vector, options) {
1742
+ const startTime = performance.now();
1743
+ const ns = await this.ensureConnected();
1744
+ const queryOptions = {
1745
+ vector,
1746
+ topK: options?.topK ?? 10,
1747
+ includeMetadata: true,
1748
+ includeValues: options?.includeEmbedding ?? false
1749
+ };
1750
+ if (options?.filter) {
1751
+ queryOptions.filter = options.filter;
1752
+ }
1753
+ try {
1754
+ const result = await ns.query(queryOptions);
1755
+ const entries = [];
1756
+ for (const match of result.matches) {
1757
+ if (options?.minSimilarity && match.score < options.minSimilarity) {
1758
+ continue;
1759
+ }
1760
+ const metadata = match.metadata;
1761
+ if (metadata?.entryData) {
1762
+ try {
1763
+ const entry = JSON.parse(metadata.entryData);
1764
+ if (options?.includeEmbedding && match.values) {
1765
+ entry.embedding = match.values;
1766
+ }
1767
+ entries.push({
1768
+ ...entry,
1769
+ score: match.score
1770
+ });
1771
+ } catch {
1772
+ }
1773
+ }
1774
+ }
1775
+ return {
1776
+ entries,
1777
+ durationMs: performance.now() - startTime
1778
+ };
1779
+ } catch (error) {
1780
+ return {
1781
+ entries: [],
1782
+ durationMs: performance.now() - startTime
1783
+ };
1784
+ }
1785
+ }
1786
+ async checkHealth() {
1787
+ const startTime = performance.now();
1788
+ try {
1789
+ if (!this.index) {
1790
+ await this.connect();
1791
+ }
1792
+ await this.index.describeIndexStats();
1793
+ return {
1794
+ healthy: true,
1795
+ latencyMs: performance.now() - startTime,
1796
+ lastCheck: now()
1797
+ };
1798
+ } catch (error) {
1799
+ return {
1800
+ healthy: false,
1801
+ latencyMs: performance.now() - startTime,
1802
+ lastCheck: now(),
1803
+ error: error.message
1804
+ };
1805
+ }
1806
+ }
1807
+ close() {
1808
+ this.client = null;
1809
+ this.index = null;
1810
+ this.ns = null;
1811
+ this.connected = false;
1812
+ return Promise.resolve();
1813
+ }
1814
+ /**
1815
+ * Check if connected to Pinecone
1816
+ */
1817
+ isConnected() {
1818
+ return this.connected;
1819
+ }
1820
+ /**
1821
+ * Get index stats
1822
+ */
1823
+ async getIndexStats() {
1824
+ if (!this.index) {
1825
+ await this.connect();
1826
+ }
1827
+ try {
1828
+ return await this.index.describeIndexStats();
1829
+ } catch {
1830
+ return null;
1831
+ }
1832
+ }
1833
+ };
1834
+ function createPineconeCacheStore(config) {
1835
+ return new PineconeCacheStore(config);
1836
+ }
1837
+
1838
+ // src/strategies/BaseMatchStrategy.ts
1839
+ var BaseMatchStrategy = class {
1840
+ };
1841
+
1842
+ // src/strategies/ExactMatchStrategy.ts
1843
+ var DEFAULT_CONFIG6 = {
1844
+ normalizeWhitespace: true,
1845
+ hashFields: ["model", "messages"]
1846
+ };
1847
+ var ExactMatchStrategy = class extends BaseMatchStrategy {
1848
+ name = "exact";
1849
+ config;
1850
+ constructor(config) {
1851
+ super();
1852
+ this.config = { ...DEFAULT_CONFIG6, ...config };
1853
+ }
1854
+ async match(request, store, _similarity, options) {
1855
+ const startTime = performance.now();
1856
+ const namespace = options?.namespace;
1857
+ const baseKey = generateCacheKey(request.model, request.messages, {
1858
+ normalizeWhitespace: this.config.normalizeWhitespace,
1859
+ includeTemperature: this.config.hashFields?.includes("temperature")
1860
+ });
1861
+ const tempSuffix = request.temperature !== void 0 ? `:t:${request.temperature}` : "";
1862
+ const key = namespace && namespace !== "default" ? `${baseKey}${tempSuffix}:ns:${namespace}` : `${baseKey}${tempSuffix}`;
1863
+ const entry = await store.get(key);
1864
+ if (entry) {
1865
+ if (namespace && entry.metadata.namespace && entry.metadata.namespace !== namespace) {
1866
+ return {
1867
+ hit: false,
1868
+ latencyMs: performance.now() - startTime,
1869
+ source: "miss"
1870
+ };
1871
+ }
1872
+ return {
1873
+ hit: true,
1874
+ entry,
1875
+ similarity: 1,
1876
+ // Exact match = 100% similarity
1877
+ latencyMs: performance.now() - startTime,
1878
+ source: "exact"
1879
+ };
1880
+ }
1881
+ return {
1882
+ hit: false,
1883
+ latencyMs: performance.now() - startTime,
1884
+ source: "miss"
1885
+ };
1886
+ }
1887
+ };
1888
+ function createExactMatchStrategy(config) {
1889
+ return new ExactMatchStrategy(config);
1890
+ }
1891
+
1892
+ // src/strategies/SemanticMatchStrategy.ts
1893
+ var DEFAULT_CONFIG7 = {
1894
+ threshold: 0.92,
1895
+ matchModel: true,
1896
+ topK: 5
1897
+ };
1898
+ var SemanticMatchStrategy = class extends BaseMatchStrategy {
1899
+ name = "semantic";
1900
+ config;
1901
+ constructor(config) {
1902
+ super();
1903
+ this.config = { ...DEFAULT_CONFIG7, ...config };
1904
+ }
1905
+ async match(request, store, similarity, options) {
1906
+ const startTime = performance.now();
1907
+ if (!similarity) {
1908
+ return {
1909
+ hit: false,
1910
+ latencyMs: performance.now() - startTime,
1911
+ source: "miss"
1912
+ };
1913
+ }
1914
+ const userMessage = extractUserMessage(request.messages);
1915
+ if (!userMessage) {
1916
+ return {
1917
+ hit: false,
1918
+ latencyMs: performance.now() - startTime,
1919
+ source: "miss"
1920
+ };
1921
+ }
1922
+ try {
1923
+ const queryEmbedding = await similarity.embed(userMessage);
1924
+ const threshold = options?.threshold ?? this.config.threshold ?? 0.92;
1925
+ const topK = options?.topK ?? this.config.topK ?? 5;
1926
+ const results = await store.query(queryEmbedding, {
1927
+ topK,
1928
+ minSimilarity: threshold,
1929
+ namespace: options?.namespace
1930
+ });
1931
+ if (results.entries.length > 0) {
1932
+ let bestMatch = results.entries[0];
1933
+ if (this.config.matchModel) {
1934
+ const modelMatch = results.entries.find(
1935
+ (e) => e.request.model === request.model
1936
+ );
1937
+ if (modelMatch) {
1938
+ bestMatch = modelMatch;
1939
+ }
1940
+ }
1941
+ if (bestMatch && bestMatch.score >= threshold) {
1942
+ return {
1943
+ hit: true,
1944
+ entry: bestMatch,
1945
+ similarity: bestMatch.score,
1946
+ latencyMs: performance.now() - startTime,
1947
+ source: "semantic"
1948
+ };
1949
+ }
1950
+ }
1951
+ return {
1952
+ hit: false,
1953
+ latencyMs: performance.now() - startTime,
1954
+ source: "miss"
1955
+ };
1956
+ } catch (error) {
1957
+ console.error("Semantic match error:", error);
1958
+ return {
1959
+ hit: false,
1960
+ latencyMs: performance.now() - startTime,
1961
+ source: "miss"
1962
+ };
1963
+ }
1964
+ }
1965
+ };
1966
+ function createSemanticMatchStrategy(config) {
1967
+ return new SemanticMatchStrategy(config);
1968
+ }
1969
+
1970
+ // src/strategies/HybridMatchStrategy.ts
1971
+ var DEFAULT_CONFIG8 = {
1972
+ exact: {
1973
+ normalizeWhitespace: true,
1974
+ hashFields: ["model", "messages"]
1975
+ },
1976
+ semantic: {
1977
+ threshold: 0.92,
1978
+ matchModel: true,
1979
+ topK: 5
1980
+ }
1981
+ };
1982
+ var HybridMatchStrategy = class extends BaseMatchStrategy {
1983
+ name = "hybrid";
1984
+ exact;
1985
+ semantic;
1986
+ config;
1987
+ constructor(config) {
1988
+ super();
1989
+ this.config = { ...DEFAULT_CONFIG8, ...config };
1990
+ this.exact = new ExactMatchStrategy(this.config.exact);
1991
+ this.semantic = new SemanticMatchStrategy(this.config.semantic);
1992
+ }
1993
+ async match(request, store, similarity, options) {
1994
+ const startTime = performance.now();
1995
+ if (this.shouldUseExactOnly(request)) {
1996
+ const result = await this.exact.match(
1997
+ request,
1998
+ store,
1999
+ similarity,
2000
+ options
2001
+ );
2002
+ return {
2003
+ ...result,
2004
+ latencyMs: performance.now() - startTime
2005
+ };
2006
+ }
2007
+ const exactResult = await this.exact.match(
2008
+ request,
2009
+ store,
2010
+ similarity,
2011
+ options
2012
+ );
2013
+ if (exactResult.hit) {
2014
+ return {
2015
+ ...exactResult,
2016
+ latencyMs: performance.now() - startTime
2017
+ };
2018
+ }
2019
+ if (!this.shouldUseSemantic(request)) {
2020
+ return {
2021
+ hit: false,
2022
+ latencyMs: performance.now() - startTime,
2023
+ source: "miss"
2024
+ };
2025
+ }
2026
+ if (similarity) {
2027
+ const semanticResult = await this.semantic.match(
2028
+ request,
2029
+ store,
2030
+ similarity,
2031
+ options
2032
+ );
2033
+ return {
2034
+ ...semanticResult,
2035
+ latencyMs: performance.now() - startTime
2036
+ };
2037
+ }
2038
+ return {
2039
+ hit: false,
2040
+ latencyMs: performance.now() - startTime,
2041
+ source: "miss"
2042
+ };
2043
+ }
2044
+ /**
2045
+ * Check if request should use exact-only matching
2046
+ */
2047
+ shouldUseExactOnly(request) {
2048
+ if (!this.config.exactOnlyPatterns) return false;
2049
+ const userMessage = this.extractUserMessage(request);
2050
+ return this.config.exactOnlyPatterns.some(
2051
+ (pattern) => pattern.test(userMessage)
2052
+ );
2053
+ }
2054
+ /**
2055
+ * Check if semantic matching should be used
2056
+ */
2057
+ shouldUseSemantic(request) {
2058
+ if (!this.config.semanticPatterns) return true;
2059
+ const userMessage = this.extractUserMessage(request);
2060
+ return this.config.semanticPatterns.some(
2061
+ (pattern) => pattern.test(userMessage)
2062
+ );
2063
+ }
2064
+ /**
2065
+ * Extract user message from request
2066
+ */
2067
+ extractUserMessage(request) {
2068
+ for (let i = request.messages.length - 1; i >= 0; i--) {
2069
+ if (request.messages[i].role === "user") {
2070
+ return request.messages[i].content;
2071
+ }
2072
+ }
2073
+ return "";
2074
+ }
2075
+ };
2076
+ function createHybridMatchStrategy(config) {
2077
+ return new HybridMatchStrategy(config);
2078
+ }
2079
+
2080
+ // src/similarity/SimilarityEngine.ts
2081
+ var SimilarityEngine = class {
2082
+ provider;
2083
+ metric;
2084
+ embeddingCache;
2085
+ maxCacheSize;
2086
+ constructor(config) {
2087
+ this.provider = config.provider;
2088
+ this.metric = config.metric ?? "cosine";
2089
+ this.maxCacheSize = config.maxCacheSize ?? 1e4;
2090
+ if (config.cacheEmbeddings) {
2091
+ this.embeddingCache = /* @__PURE__ */ new Map();
2092
+ }
2093
+ }
2094
+ /**
2095
+ * Generate embedding for text
2096
+ */
2097
+ async embed(text) {
2098
+ if (this.embeddingCache?.has(text)) {
2099
+ return this.embeddingCache.get(text);
2100
+ }
2101
+ const embedding = await this.provider.embed(text);
2102
+ if (this.embeddingCache) {
2103
+ if (this.embeddingCache.size >= this.maxCacheSize) {
2104
+ const firstKey = this.embeddingCache.keys().next().value;
2105
+ if (firstKey) {
2106
+ this.embeddingCache.delete(firstKey);
2107
+ }
2108
+ }
2109
+ this.embeddingCache.set(text, embedding);
2110
+ }
2111
+ return embedding;
2112
+ }
2113
+ /**
2114
+ * Generate embeddings for multiple texts
2115
+ */
2116
+ async embedBatch(texts) {
2117
+ if (this.provider.embedBatch) {
2118
+ return this.provider.embedBatch(texts);
2119
+ }
2120
+ return Promise.all(texts.map((text) => this.embed(text)));
2121
+ }
2122
+ /**
2123
+ * Compute similarity between two vectors
2124
+ */
2125
+ computeSimilarity(a, b) {
2126
+ switch (this.metric) {
2127
+ case "cosine":
2128
+ return this.cosineSimilarity(a, b);
2129
+ case "euclidean": {
2130
+ const dist = this.euclideanDistance(a, b);
2131
+ return 1 / (1 + dist);
2132
+ }
2133
+ case "dot_product":
2134
+ return this.dotProduct(a, b);
2135
+ default:
2136
+ return this.cosineSimilarity(a, b);
2137
+ }
2138
+ }
2139
+ /**
2140
+ * Find most similar vectors from candidates
2141
+ */
2142
+ findMostSimilar(query, candidates, minSimilarity = 0) {
2143
+ const results = candidates.map((c) => ({
2144
+ id: c.id,
2145
+ similarity: this.computeSimilarity(query, c.vector)
2146
+ })).filter((r) => r.similarity >= minSimilarity).sort((a, b) => b.similarity - a.similarity);
2147
+ return results;
2148
+ }
2149
+ /**
2150
+ * Get embedding dimensions
2151
+ */
2152
+ get dimensions() {
2153
+ return this.provider.dimensions ?? 0;
2154
+ }
2155
+ /**
2156
+ * Clear embedding cache
2157
+ */
2158
+ clearCache() {
2159
+ this.embeddingCache?.clear();
2160
+ }
2161
+ /**
2162
+ * Get cache statistics
2163
+ */
2164
+ getCacheStats() {
2165
+ if (!this.embeddingCache) return null;
2166
+ return {
2167
+ size: this.embeddingCache.size,
2168
+ maxSize: this.maxCacheSize
2169
+ };
2170
+ }
2171
+ cosineSimilarity(a, b) {
2172
+ if (a.length !== b.length) return 0;
2173
+ let dotProduct2 = 0;
2174
+ let normA = 0;
2175
+ let normB = 0;
2176
+ for (let i = 0; i < a.length; i++) {
2177
+ dotProduct2 += a[i] * b[i];
2178
+ normA += a[i] * a[i];
2179
+ normB += b[i] * b[i];
2180
+ }
2181
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
2182
+ if (denominator === 0) return 0;
2183
+ return dotProduct2 / denominator;
2184
+ }
2185
+ euclideanDistance(a, b) {
2186
+ if (a.length !== b.length) return Infinity;
2187
+ let sum = 0;
2188
+ for (let i = 0; i < a.length; i++) {
2189
+ const diff = a[i] - b[i];
2190
+ sum += diff * diff;
2191
+ }
2192
+ return Math.sqrt(sum);
2193
+ }
2194
+ dotProduct(a, b) {
2195
+ if (a.length !== b.length) return 0;
2196
+ let sum = 0;
2197
+ for (let i = 0; i < a.length; i++) {
2198
+ sum += a[i] * b[i];
2199
+ }
2200
+ return sum;
2201
+ }
2202
+ };
2203
+ function createSimilarityEngine(config) {
2204
+ return new SimilarityEngine(config);
2205
+ }
2206
+
2207
+ // src/streaming/ChunkBuffer.ts
2208
+ var DEFAULT_CONFIG9 = {
2209
+ maxChunks: 100,
2210
+ maxBytes: 64 * 1024,
2211
+ // 64KB
2212
+ flushIntervalMs: 1e3,
2213
+ preserveOrder: true
2214
+ };
2215
+ var ChunkBuffer = class {
2216
+ chunks = [];
2217
+ currentBytes = 0;
2218
+ config;
2219
+ flushTimer = null;
2220
+ onFlush;
2221
+ constructor(config, onFlush) {
2222
+ this.config = { ...DEFAULT_CONFIG9, ...config };
2223
+ this.onFlush = onFlush;
2224
+ if (this.config.flushIntervalMs > 0) {
2225
+ this.startFlushTimer();
2226
+ }
2227
+ }
2228
+ /**
2229
+ * Add a chunk to the buffer
2230
+ */
2231
+ add(chunk) {
2232
+ if (this.config.preserveOrder) {
2233
+ if (chunk.index === void 0) {
2234
+ chunk.index = this.chunks.length;
2235
+ }
2236
+ }
2237
+ this.chunks.push(chunk);
2238
+ this.currentBytes += this.estimateChunkSize(chunk);
2239
+ if (this.shouldFlush()) {
2240
+ this.flush();
2241
+ }
2242
+ }
2243
+ /**
2244
+ * Add multiple chunks
2245
+ */
2246
+ addAll(chunks) {
2247
+ for (const chunk of chunks) {
2248
+ this.add(chunk);
2249
+ }
2250
+ }
2251
+ /**
2252
+ * Flush all buffered chunks
2253
+ */
2254
+ flush() {
2255
+ const flushed = this.chunks;
2256
+ if (this.config.preserveOrder) {
2257
+ flushed.sort((a, b) => a.index - b.index);
2258
+ }
2259
+ this.chunks = [];
2260
+ this.currentBytes = 0;
2261
+ if (this.onFlush && flushed.length > 0) {
2262
+ this.onFlush(flushed);
2263
+ }
2264
+ return flushed;
2265
+ }
2266
+ /**
2267
+ * Get current buffer size
2268
+ */
2269
+ size() {
2270
+ return this.chunks.length;
2271
+ }
2272
+ /**
2273
+ * Get current buffer bytes
2274
+ */
2275
+ bytes() {
2276
+ return this.currentBytes;
2277
+ }
2278
+ /**
2279
+ * Check if buffer is empty
2280
+ */
2281
+ isEmpty() {
2282
+ return this.chunks.length === 0;
2283
+ }
2284
+ /**
2285
+ * Peek at buffered chunks without flushing
2286
+ */
2287
+ peek() {
2288
+ return this.chunks;
2289
+ }
2290
+ /**
2291
+ * Clear the buffer without flushing
2292
+ */
2293
+ clear() {
2294
+ this.chunks = [];
2295
+ this.currentBytes = 0;
2296
+ }
2297
+ /**
2298
+ * Stop the flush timer
2299
+ */
2300
+ stop() {
2301
+ if (this.flushTimer) {
2302
+ clearInterval(this.flushTimer);
2303
+ this.flushTimer = null;
2304
+ }
2305
+ }
2306
+ /**
2307
+ * Destroy the buffer
2308
+ */
2309
+ destroy() {
2310
+ this.stop();
2311
+ this.clear();
2312
+ }
2313
+ shouldFlush() {
2314
+ return this.chunks.length >= this.config.maxChunks || this.currentBytes >= this.config.maxBytes;
2315
+ }
2316
+ startFlushTimer() {
2317
+ this.flushTimer = setInterval(() => {
2318
+ if (!this.isEmpty()) {
2319
+ this.flush();
2320
+ }
2321
+ }, this.config.flushIntervalMs);
2322
+ }
2323
+ estimateChunkSize(chunk) {
2324
+ let size = 0;
2325
+ if (chunk.content) {
2326
+ size += chunk.content.length * 2;
2327
+ }
2328
+ if (chunk.toolCall) {
2329
+ size += JSON.stringify(chunk.toolCall).length;
2330
+ }
2331
+ if (chunk.toolResult) {
2332
+ size += JSON.stringify(chunk.toolResult).length;
2333
+ }
2334
+ if (chunk.metadata) {
2335
+ size += JSON.stringify(chunk.metadata).length;
2336
+ }
2337
+ return size + 50;
2338
+ }
2339
+ };
2340
+ function createChunkBuffer(config, onFlush) {
2341
+ return new ChunkBuffer(config, onFlush);
2342
+ }
2343
+
2344
+ // src/streaming/StreamRecorder.ts
2345
+ var DEFAULT_CONFIG10 = {
2346
+ buffer: {
2347
+ maxChunks: 100,
2348
+ maxBytes: 64 * 1024,
2349
+ flushIntervalMs: 0,
2350
+ // No auto-flush during recording
2351
+ preserveOrder: true
2352
+ },
2353
+ captureToolCalls: true,
2354
+ captureMetadata: true,
2355
+ maxDurationMs: 3e5,
2356
+ // 5 minutes
2357
+ maxChunks: 1e4
2358
+ };
2359
+ var StreamRecorder = class {
2360
+ config;
2361
+ buffer;
2362
+ recording = false;
2363
+ startTime = 0;
2364
+ model = "";
2365
+ messages = [];
2366
+ key = "";
2367
+ totalChars = 0;
2368
+ chunkIndex = 0;
2369
+ timeoutId = null;
2370
+ constructor(config) {
2371
+ this.config = {
2372
+ ...DEFAULT_CONFIG10,
2373
+ ...config,
2374
+ buffer: { ...DEFAULT_CONFIG10.buffer, ...config?.buffer }
2375
+ };
2376
+ this.buffer = new ChunkBuffer(this.config.buffer);
2377
+ }
2378
+ /**
2379
+ * Start recording a new stream
2380
+ */
2381
+ start(model, messages, key) {
2382
+ if (this.recording) {
2383
+ throw new Error("Recording already in progress");
2384
+ }
2385
+ this.recording = true;
2386
+ this.startTime = now();
2387
+ this.model = model;
2388
+ this.messages = messages;
2389
+ this.key = key ?? generateId();
2390
+ this.totalChars = 0;
2391
+ this.chunkIndex = 0;
2392
+ this.buffer.clear();
2393
+ if (this.config.maxDurationMs > 0) {
2394
+ this.timeoutId = setTimeout(() => {
2395
+ if (this.recording) {
2396
+ this.abort("Recording exceeded maximum duration");
2397
+ }
2398
+ }, this.config.maxDurationMs);
2399
+ }
2400
+ }
2401
+ /**
2402
+ * Record a text chunk
2403
+ */
2404
+ recordText(content, metadata) {
2405
+ this.recordChunk({
2406
+ type: "text",
2407
+ content,
2408
+ metadata: this.config.captureMetadata ? metadata : void 0,
2409
+ timestamp: now(),
2410
+ index: this.chunkIndex++
2411
+ });
2412
+ this.totalChars += content.length;
2413
+ }
2414
+ /**
2415
+ * Record a tool call chunk
2416
+ */
2417
+ recordToolCall(id, name, args, metadata) {
2418
+ if (!this.config.captureToolCalls) return;
2419
+ this.recordChunk({
2420
+ type: "tool_call",
2421
+ toolCall: { id, name, arguments: args },
2422
+ metadata: this.config.captureMetadata ? metadata : void 0,
2423
+ timestamp: now(),
2424
+ index: this.chunkIndex++
2425
+ });
2426
+ }
2427
+ /**
2428
+ * Record a tool result chunk
2429
+ */
2430
+ recordToolResult(callId, content, metadata) {
2431
+ if (!this.config.captureToolCalls) return;
2432
+ this.recordChunk({
2433
+ type: "tool_result",
2434
+ toolResult: { callId, content },
2435
+ metadata: this.config.captureMetadata ? metadata : void 0,
2436
+ timestamp: now(),
2437
+ index: this.chunkIndex++
2438
+ });
2439
+ }
2440
+ /**
2441
+ * Record metadata
2442
+ */
2443
+ recordMetadata(metadata) {
2444
+ if (!this.config.captureMetadata) return;
2445
+ this.recordChunk({
2446
+ type: "metadata",
2447
+ metadata,
2448
+ timestamp: now(),
2449
+ index: this.chunkIndex++
2450
+ });
2451
+ }
2452
+ /**
2453
+ * Record a generic chunk
2454
+ */
2455
+ recordChunk(chunk) {
2456
+ if (!this.recording) {
2457
+ throw new Error("Not currently recording");
2458
+ }
2459
+ if (chunk.index >= this.config.maxChunks) {
2460
+ throw new Error("Maximum chunks exceeded");
2461
+ }
2462
+ this.buffer.add(chunk);
2463
+ }
2464
+ /**
2465
+ * Complete the recording and return the recorded stream
2466
+ */
2467
+ complete(tokenUsage) {
2468
+ if (!this.recording) {
2469
+ throw new Error("Not currently recording");
2470
+ }
2471
+ this.clearTimeout();
2472
+ const endTime = now();
2473
+ const chunks = this.buffer.flush();
2474
+ const stream = {
2475
+ id: generateId(),
2476
+ key: this.key,
2477
+ chunks,
2478
+ model: this.model,
2479
+ messages: this.messages,
2480
+ startTime: this.startTime,
2481
+ endTime,
2482
+ durationMs: endTime - this.startTime,
2483
+ totalChars: this.totalChars,
2484
+ tokenUsage,
2485
+ complete: true
2486
+ };
2487
+ this.reset();
2488
+ return stream;
2489
+ }
2490
+ /**
2491
+ * Abort the recording
2492
+ */
2493
+ abort(reason) {
2494
+ if (!this.recording) {
2495
+ throw new Error("Not currently recording");
2496
+ }
2497
+ this.clearTimeout();
2498
+ const endTime = now();
2499
+ const chunks = this.buffer.flush();
2500
+ const stream = {
2501
+ id: generateId(),
2502
+ key: this.key,
2503
+ chunks,
2504
+ model: this.model,
2505
+ messages: this.messages,
2506
+ startTime: this.startTime,
2507
+ endTime,
2508
+ durationMs: endTime - this.startTime,
2509
+ totalChars: this.totalChars,
2510
+ complete: false,
2511
+ error: reason ?? "Recording aborted"
2512
+ };
2513
+ this.reset();
2514
+ return stream;
2515
+ }
2516
+ /**
2517
+ * Check if currently recording
2518
+ */
2519
+ isRecording() {
2520
+ return this.recording;
2521
+ }
2522
+ /**
2523
+ * Get current chunk count
2524
+ */
2525
+ getChunkCount() {
2526
+ return this.chunkIndex;
2527
+ }
2528
+ /**
2529
+ * Get current recording duration in ms
2530
+ */
2531
+ getDuration() {
2532
+ if (!this.recording) return 0;
2533
+ return now() - this.startTime;
2534
+ }
2535
+ /**
2536
+ * Destroy the recorder
2537
+ */
2538
+ destroy() {
2539
+ this.clearTimeout();
2540
+ this.buffer.destroy();
2541
+ this.reset();
2542
+ }
2543
+ reset() {
2544
+ this.recording = false;
2545
+ this.startTime = 0;
2546
+ this.model = "";
2547
+ this.messages = [];
2548
+ this.key = "";
2549
+ this.totalChars = 0;
2550
+ this.chunkIndex = 0;
2551
+ }
2552
+ clearTimeout() {
2553
+ if (this.timeoutId) {
2554
+ clearTimeout(this.timeoutId);
2555
+ this.timeoutId = null;
2556
+ }
2557
+ }
2558
+ };
2559
+ function createStreamRecorder(config) {
2560
+ return new StreamRecorder(config);
2561
+ }
2562
+
2563
+ // src/streaming/StreamReplayer.ts
2564
+ var DEFAULT_CONFIG11 = {
2565
+ speedMultiplier: 1,
2566
+ minDelayMs: 0,
2567
+ maxDelayMs: 100,
2568
+ simulateTiming: false,
2569
+ onChunk: () => {
2570
+ },
2571
+ onComplete: () => {
2572
+ },
2573
+ onError: () => {
2574
+ }
2575
+ };
2576
+ var StreamReplayer = class {
2577
+ config;
2578
+ abortController = null;
2579
+ constructor(config) {
2580
+ this.config = { ...DEFAULT_CONFIG11, ...config };
2581
+ }
2582
+ /**
2583
+ * Replay a recorded stream as an async iterable
2584
+ */
2585
+ async *replay(stream) {
2586
+ this.abortController = new AbortController();
2587
+ const signal = this.abortController.signal;
2588
+ try {
2589
+ const chunks = [...stream.chunks].sort((a, b) => a.index - b.index);
2590
+ let lastTimestamp = chunks[0]?.timestamp ?? 0;
2591
+ for (let i = 0; i < chunks.length; i++) {
2592
+ if (signal.aborted) {
2593
+ break;
2594
+ }
2595
+ const chunk = chunks[i];
2596
+ if (this.config.simulateTiming && i > 0) {
2597
+ const timeDiff = chunk.timestamp - lastTimestamp;
2598
+ const delay = Math.min(
2599
+ Math.max(
2600
+ timeDiff / this.config.speedMultiplier,
2601
+ this.config.minDelayMs
2602
+ ),
2603
+ this.config.maxDelayMs
2604
+ );
2605
+ if (delay > 0) {
2606
+ await this.delay(delay, signal);
2607
+ }
2608
+ }
2609
+ lastTimestamp = chunk.timestamp;
2610
+ this.config.onChunk(chunk);
2611
+ yield chunk;
2612
+ }
2613
+ this.config.onComplete(stream);
2614
+ } catch (error) {
2615
+ if (error.name !== "AbortError") {
2616
+ this.config.onError(error);
2617
+ throw error;
2618
+ }
2619
+ } finally {
2620
+ this.abortController = null;
2621
+ }
2622
+ }
2623
+ /**
2624
+ * Replay as a full async iterable of text content only
2625
+ */
2626
+ async *replayText(stream) {
2627
+ for await (const chunk of this.replay(stream)) {
2628
+ if (chunk.type === "text" && chunk.content) {
2629
+ yield chunk.content;
2630
+ }
2631
+ }
2632
+ }
2633
+ /**
2634
+ * Replay synchronously (no timing simulation)
2635
+ */
2636
+ *replaySync(stream) {
2637
+ const chunks = [...stream.chunks].sort((a, b) => a.index - b.index);
2638
+ for (const chunk of chunks) {
2639
+ this.config.onChunk(chunk);
2640
+ yield chunk;
2641
+ }
2642
+ this.config.onComplete(stream);
2643
+ }
2644
+ /**
2645
+ * Get all chunks at once
2646
+ */
2647
+ getAllChunks(stream) {
2648
+ return [...stream.chunks].sort((a, b) => a.index - b.index);
2649
+ }
2650
+ /**
2651
+ * Get full text content from stream
2652
+ */
2653
+ getFullText(stream) {
2654
+ return stream.chunks.filter((c) => c.type === "text" && c.content).sort((a, b) => a.index - b.index).map((c) => c.content).join("");
2655
+ }
2656
+ /**
2657
+ * Get tool calls from stream
2658
+ */
2659
+ getToolCalls(stream) {
2660
+ return stream.chunks.filter((c) => c.type === "tool_call" && c.toolCall).sort((a, b) => a.index - b.index).map((c) => c.toolCall);
2661
+ }
2662
+ /**
2663
+ * Stop current replay
2664
+ */
2665
+ stop() {
2666
+ if (this.abortController) {
2667
+ this.abortController.abort();
2668
+ }
2669
+ }
2670
+ /**
2671
+ * Update configuration
2672
+ */
2673
+ configure(config) {
2674
+ this.config = { ...this.config, ...config };
2675
+ }
2676
+ delay(ms, signal) {
2677
+ return new Promise((resolve, reject) => {
2678
+ const timeoutId = setTimeout(resolve, ms);
2679
+ signal.addEventListener("abort", () => {
2680
+ clearTimeout(timeoutId);
2681
+ reject(new DOMException("Aborted", "AbortError"));
2682
+ });
2683
+ });
2684
+ }
2685
+ };
2686
+ function createStreamReplayer(config) {
2687
+ return new StreamReplayer(config);
2688
+ }
2689
+ var DEFAULT_CONFIG12 = {
2690
+ recorder: {},
2691
+ replayer: {},
2692
+ cacheIncomplete: false,
2693
+ minLengthToCache: 10,
2694
+ streamTtl: 3600
2695
+ // 1 hour
2696
+ };
2697
+ var StreamCache = class extends EventEmitter2 {
2698
+ store;
2699
+ similarity;
2700
+ config;
2701
+ recorder;
2702
+ replayer;
2703
+ stats = {
2704
+ totalLookups: 0,
2705
+ totalHits: 0,
2706
+ totalMisses: 0,
2707
+ hitRate: 0,
2708
+ avgReplayLatencyMs: 0,
2709
+ totalStreamsCached: 0,
2710
+ totalBytesCached: 0,
2711
+ avgStreamDurationMs: 0
2712
+ };
2713
+ replayLatencies = [];
2714
+ streamDurations = [];
2715
+ constructor(store, config, similarity) {
2716
+ super();
2717
+ this.store = store;
2718
+ this.similarity = similarity;
2719
+ this.config = { ...DEFAULT_CONFIG12, ...config };
2720
+ this.recorder = new StreamRecorder(this.config.recorder);
2721
+ this.replayer = new StreamReplayer(this.config.replayer);
2722
+ }
2723
+ /**
2724
+ * Look up a cached stream
2725
+ */
2726
+ async lookup(model, messages) {
2727
+ const startTime = performance.now();
2728
+ this.stats.totalLookups++;
2729
+ const key = this.generateStreamKey(model, messages);
2730
+ try {
2731
+ const entry = await this.store.get(key);
2732
+ if (entry) {
2733
+ const stream = this.deserializeStream(entry.response.content);
2734
+ if (stream) {
2735
+ this.stats.totalHits++;
2736
+ this.updateHitRate();
2737
+ const latencyMs2 = performance.now() - startTime;
2738
+ this.replayLatencies.push(latencyMs2);
2739
+ const result = {
2740
+ hit: true,
2741
+ stream,
2742
+ similarity: 1,
2743
+ source: "exact",
2744
+ latencyMs: latencyMs2
2745
+ };
2746
+ this.emit("hit", result);
2747
+ return result;
2748
+ }
2749
+ }
2750
+ if (this.similarity) {
2751
+ const userMessage = messages.find((m) => m.role === "user")?.content;
2752
+ if (userMessage) {
2753
+ const embedding = await this.similarity.embed(userMessage);
2754
+ const results = await this.store.query(embedding, {
2755
+ topK: 1,
2756
+ minSimilarity: 0.92
2757
+ });
2758
+ if (results.entries.length > 0) {
2759
+ const entry2 = results.entries[0];
2760
+ const stream = this.deserializeStream(entry2.response.content);
2761
+ if (stream && entry2.score >= 0.92) {
2762
+ this.stats.totalHits++;
2763
+ this.updateHitRate();
2764
+ const latencyMs2 = performance.now() - startTime;
2765
+ this.replayLatencies.push(latencyMs2);
2766
+ const result = {
2767
+ hit: true,
2768
+ stream,
2769
+ similarity: entry2.score,
2770
+ source: "semantic",
2771
+ latencyMs: latencyMs2
2772
+ };
2773
+ this.emit("hit", result);
2774
+ return result;
2775
+ }
2776
+ }
2777
+ }
2778
+ }
2779
+ this.stats.totalMisses++;
2780
+ this.updateHitRate();
2781
+ const latencyMs = performance.now() - startTime;
2782
+ this.emit("miss", key);
2783
+ return {
2784
+ hit: false,
2785
+ source: "miss",
2786
+ latencyMs
2787
+ };
2788
+ } catch (error) {
2789
+ this.emit("error", error);
2790
+ return {
2791
+ hit: false,
2792
+ source: "miss",
2793
+ latencyMs: performance.now() - startTime
2794
+ };
2795
+ }
2796
+ }
2797
+ /**
2798
+ * Cache a recorded stream
2799
+ */
2800
+ async cache(stream, embedding) {
2801
+ if (!stream.complete && !this.config.cacheIncomplete) {
2802
+ return;
2803
+ }
2804
+ if (stream.totalChars < this.config.minLengthToCache) {
2805
+ return;
2806
+ }
2807
+ const key = this.generateStreamKey(stream.model, stream.messages);
2808
+ try {
2809
+ const cacheMessages = stream.messages.map((m) => ({
2810
+ role: m.role,
2811
+ content: m.content
2812
+ }));
2813
+ await this.store.set(key, {
2814
+ id: generateId(),
2815
+ key,
2816
+ request: {
2817
+ messages: cacheMessages,
2818
+ model: stream.model
2819
+ },
2820
+ response: {
2821
+ content: this.serializeStream(stream),
2822
+ model: stream.model,
2823
+ finishReason: stream.complete ? "stop" : "error",
2824
+ usage: {
2825
+ promptTokens: stream.tokenUsage?.prompt ?? 0,
2826
+ completionTokens: stream.tokenUsage?.completion ?? 0,
2827
+ totalTokens: stream.tokenUsage?.total ?? 0
2828
+ }
2829
+ },
2830
+ embedding,
2831
+ metadata: {
2832
+ createdAt: now(),
2833
+ accessedAt: now(),
2834
+ accessCount: 1,
2835
+ hitCount: 0,
2836
+ ttl: this.config.streamTtl
2837
+ }
2838
+ });
2839
+ this.stats.totalStreamsCached++;
2840
+ this.stats.totalBytesCached += this.estimateStreamSize(stream);
2841
+ this.streamDurations.push(stream.durationMs);
2842
+ this.updateAvgStreamDuration();
2843
+ this.emit("record", stream);
2844
+ } catch (error) {
2845
+ this.emit("error", error);
2846
+ }
2847
+ }
2848
+ /**
2849
+ * Wrap a streaming function with caching
2850
+ */
2851
+ async *wrapStream(model, messages, streamFn, options) {
2852
+ const lookupResult = await this.lookup(model, messages);
2853
+ if (lookupResult.hit && lookupResult.stream) {
2854
+ for await (const chunk of this.replayer.replay(lookupResult.stream)) {
2855
+ yield { content: chunk.content };
2856
+ }
2857
+ return;
2858
+ }
2859
+ const key = this.generateStreamKey(model, messages);
2860
+ this.recorder.start(model, messages, key);
2861
+ try {
2862
+ for await (const chunk of streamFn()) {
2863
+ if (chunk.content) {
2864
+ this.recorder.recordText(chunk.content);
2865
+ }
2866
+ yield chunk;
2867
+ }
2868
+ const stream = this.recorder.complete();
2869
+ await this.cache(stream, options?.embedding);
2870
+ } catch (error) {
2871
+ if (this.recorder.isRecording()) {
2872
+ this.recorder.abort(error.message);
2873
+ }
2874
+ throw error;
2875
+ }
2876
+ }
2877
+ /**
2878
+ * Replay a cached stream
2879
+ */
2880
+ async *replay(stream) {
2881
+ for await (const chunk of this.replayer.replay(stream)) {
2882
+ yield chunk;
2883
+ }
2884
+ }
2885
+ /**
2886
+ * Get cache statistics
2887
+ */
2888
+ getStats() {
2889
+ return { ...this.stats };
2890
+ }
2891
+ /**
2892
+ * Clear the stream cache
2893
+ */
2894
+ async clear() {
2895
+ await this.store.clear();
2896
+ this.stats = {
2897
+ totalLookups: 0,
2898
+ totalHits: 0,
2899
+ totalMisses: 0,
2900
+ hitRate: 0,
2901
+ avgReplayLatencyMs: 0,
2902
+ totalStreamsCached: 0,
2903
+ totalBytesCached: 0,
2904
+ avgStreamDurationMs: 0
2905
+ };
2906
+ this.replayLatencies = [];
2907
+ this.streamDurations = [];
2908
+ }
2909
+ /**
2910
+ * Destroy the stream cache
2911
+ */
2912
+ destroy() {
2913
+ this.recorder.destroy();
2914
+ this.replayer.stop();
2915
+ this.removeAllListeners();
2916
+ }
2917
+ generateStreamKey(model, messages) {
2918
+ const cacheMessages = messages.map((m) => ({
2919
+ role: m.role,
2920
+ content: m.content
2921
+ }));
2922
+ return generateCacheKey(model, cacheMessages);
2923
+ }
2924
+ serializeStream(stream) {
2925
+ return JSON.stringify(stream);
2926
+ }
2927
+ deserializeStream(content) {
2928
+ try {
2929
+ return JSON.parse(content);
2930
+ } catch {
2931
+ return null;
2932
+ }
2933
+ }
2934
+ estimateStreamSize(stream) {
2935
+ return JSON.stringify(stream).length;
2936
+ }
2937
+ updateHitRate() {
2938
+ if (this.stats.totalLookups > 0) {
2939
+ this.stats.hitRate = this.stats.totalHits / this.stats.totalLookups * 100;
2940
+ }
2941
+ }
2942
+ updateAvgStreamDuration() {
2943
+ if (this.streamDurations.length > 0) {
2944
+ this.stats.avgStreamDurationMs = this.streamDurations.reduce((a, b) => a + b, 0) / this.streamDurations.length;
2945
+ }
2946
+ }
2947
+ };
2948
+ function createStreamCache(store, config, similarity) {
2949
+ return new StreamCache(store, config, similarity);
2950
+ }
2951
+ var DEFAULT_CONFIG13 = {
2952
+ strategy: "ttl",
2953
+ ttl: {
2954
+ defaultTtl: 3600,
2955
+ softTtl: false
2956
+ },
2957
+ emitEvents: true
2958
+ };
2959
+ var InvalidationManager = class extends EventEmitter2 {
2960
+ store;
2961
+ config;
2962
+ autoInterval = null;
2963
+ stats = {
2964
+ totalInvalidations: 0,
2965
+ ttlRemovals: 0,
2966
+ lruRemovals: 0,
2967
+ eventRemovals: 0,
2968
+ smartRemovals: 0,
2969
+ manualRemovals: 0,
2970
+ totalBytesFreed: 0
2971
+ };
2972
+ accessTimes = /* @__PURE__ */ new Map();
2973
+ constructor(store, config) {
2974
+ super();
2975
+ this.store = store;
2976
+ this.config = { ...DEFAULT_CONFIG13, ...config };
2977
+ }
2978
+ /**
2979
+ * Run invalidation based on configured strategy
2980
+ */
2981
+ async run() {
2982
+ switch (this.config.strategy) {
2983
+ case "ttl":
2984
+ return this.runTTLInvalidation();
2985
+ case "lru":
2986
+ return this.runLRUInvalidation();
2987
+ case "smart":
2988
+ return this.runSmartInvalidation();
2989
+ default:
2990
+ return this.runTTLInvalidation();
2991
+ }
2992
+ }
2993
+ /**
2994
+ * Run TTL-based invalidation
2995
+ */
2996
+ async runTTLInvalidation() {
2997
+ const startTime = performance.now();
2998
+ const invalidatedKeys = [];
2999
+ let bytesFreed = 0;
3000
+ const currentTime = now();
3001
+ const keys = await this.store.keys();
3002
+ for (const key of keys) {
3003
+ const entry = await this.store.get(key);
3004
+ if (!entry) continue;
3005
+ const ttl = entry.metadata.ttl ?? this.getTTL(entry.request.model, entry.metadata.namespace);
3006
+ const age = (currentTime - entry.metadata.createdAt) / 1e3;
3007
+ if (age >= ttl) {
3008
+ if (this.config.ttl?.softTtl && this.config.ttl?.gracePeriod) {
3009
+ if (age < ttl + this.config.ttl.gracePeriod) {
3010
+ continue;
3011
+ }
3012
+ }
3013
+ const deleted = await this.store.delete(key);
3014
+ if (deleted) {
3015
+ invalidatedKeys.push(key);
3016
+ bytesFreed += this.estimateEntrySize(entry);
3017
+ }
3018
+ }
3019
+ }
3020
+ this.stats.totalInvalidations++;
3021
+ this.stats.ttlRemovals += invalidatedKeys.length;
3022
+ this.stats.totalBytesFreed += bytesFreed;
3023
+ this.stats.lastInvalidationAt = now();
3024
+ const result = {
3025
+ invalidatedKeys,
3026
+ entriesRemoved: invalidatedKeys.length,
3027
+ bytesFreed,
3028
+ durationMs: performance.now() - startTime
3029
+ };
3030
+ this.emitEvent("ttl", invalidatedKeys, bytesFreed);
3031
+ return result;
3032
+ }
3033
+ /**
3034
+ * Run LRU-based invalidation
3035
+ */
3036
+ async runLRUInvalidation() {
3037
+ const startTime = performance.now();
3038
+ const invalidatedKeys = [];
3039
+ let bytesFreed = 0;
3040
+ const maxEntries = this.config.lru?.maxEntries ?? 1e3;
3041
+ this.config.lru?.maxSizeBytes ?? Infinity;
3042
+ const batchSize = this.config.lru?.evictionBatchSize ?? 10;
3043
+ const minAge = this.config.lru?.minAge ?? 0;
3044
+ const currentSize = await this.store.size();
3045
+ if (currentSize <= maxEntries) {
3046
+ return {
3047
+ invalidatedKeys: [],
3048
+ entriesRemoved: 0,
3049
+ bytesFreed: 0,
3050
+ durationMs: performance.now() - startTime
3051
+ };
3052
+ }
3053
+ const keys = await this.store.keys();
3054
+ const entriesWithAccess = [];
3055
+ for (const key of keys) {
3056
+ const entry = await this.store.get(key);
3057
+ if (entry) {
3058
+ entriesWithAccess.push({
3059
+ key,
3060
+ accessedAt: entry.metadata.accessedAt,
3061
+ size: this.estimateEntrySize(entry)
3062
+ });
3063
+ }
3064
+ }
3065
+ entriesWithAccess.sort((a, b) => a.accessedAt - b.accessedAt);
3066
+ const toRemove = Math.min(currentSize - maxEntries, batchSize);
3067
+ const currentTime = now();
3068
+ for (let i = 0; i < toRemove && i < entriesWithAccess.length; i++) {
3069
+ const { key, accessedAt, size } = entriesWithAccess[i];
3070
+ const age = (currentTime - accessedAt) / 1e3;
3071
+ if (age < minAge) continue;
3072
+ const deleted = await this.store.delete(key);
3073
+ if (deleted) {
3074
+ invalidatedKeys.push(key);
3075
+ bytesFreed += size;
3076
+ }
3077
+ }
3078
+ this.stats.totalInvalidations++;
3079
+ this.stats.lruRemovals += invalidatedKeys.length;
3080
+ this.stats.totalBytesFreed += bytesFreed;
3081
+ this.stats.lastInvalidationAt = now();
3082
+ const result = {
3083
+ invalidatedKeys,
3084
+ entriesRemoved: invalidatedKeys.length,
3085
+ bytesFreed,
3086
+ durationMs: performance.now() - startTime
3087
+ };
3088
+ this.emitEvent("lru", invalidatedKeys, bytesFreed);
3089
+ return result;
3090
+ }
3091
+ /**
3092
+ * Run smart invalidation (combines TTL + LRU + hit rate analysis)
3093
+ */
3094
+ async runSmartInvalidation() {
3095
+ const startTime = performance.now();
3096
+ const invalidatedKeys = [];
3097
+ let bytesFreed = 0;
3098
+ const minHitRate = this.config.smart?.minHitRate ?? 0.1;
3099
+ const currentTime = now();
3100
+ const keys = await this.store.keys();
3101
+ for (const key of keys) {
3102
+ const entry = await this.store.get(key);
3103
+ if (!entry) continue;
3104
+ let shouldInvalidate = false;
3105
+ const ttl = entry.metadata.ttl ?? this.getTTL(entry.request.model, entry.metadata.namespace);
3106
+ const age = (currentTime - entry.metadata.createdAt) / 1e3;
3107
+ if (age >= ttl) {
3108
+ shouldInvalidate = true;
3109
+ }
3110
+ if (this.config.smart?.analyzeHitRate && entry.metadata.accessCount > 0) {
3111
+ const accessRate = entry.metadata.accessCount / Math.max(age / 3600, 1);
3112
+ if (accessRate < minHitRate) {
3113
+ shouldInvalidate = true;
3114
+ }
3115
+ }
3116
+ if (shouldInvalidate) {
3117
+ const deleted = await this.store.delete(key);
3118
+ if (deleted) {
3119
+ invalidatedKeys.push(key);
3120
+ bytesFreed += this.estimateEntrySize(entry);
3121
+ }
3122
+ }
3123
+ }
3124
+ this.stats.totalInvalidations++;
3125
+ this.stats.smartRemovals += invalidatedKeys.length;
3126
+ this.stats.totalBytesFreed += bytesFreed;
3127
+ this.stats.lastInvalidationAt = now();
3128
+ const result = {
3129
+ invalidatedKeys,
3130
+ entriesRemoved: invalidatedKeys.length,
3131
+ bytesFreed,
3132
+ durationMs: performance.now() - startTime
3133
+ };
3134
+ this.emitEvent("smart", invalidatedKeys, bytesFreed);
3135
+ return result;
3136
+ }
3137
+ /**
3138
+ * Manually invalidate specific keys
3139
+ */
3140
+ async invalidateKeys(keys) {
3141
+ const startTime = performance.now();
3142
+ const invalidatedKeys = [];
3143
+ let bytesFreed = 0;
3144
+ for (const key of keys) {
3145
+ const entry = await this.store.get(key);
3146
+ if (entry) {
3147
+ const size = this.estimateEntrySize(entry);
3148
+ const deleted = await this.store.delete(key);
3149
+ if (deleted) {
3150
+ invalidatedKeys.push(key);
3151
+ bytesFreed += size;
3152
+ }
3153
+ }
3154
+ }
3155
+ this.stats.totalInvalidations++;
3156
+ this.stats.manualRemovals += invalidatedKeys.length;
3157
+ this.stats.totalBytesFreed += bytesFreed;
3158
+ this.stats.lastInvalidationAt = now();
3159
+ const result = {
3160
+ invalidatedKeys,
3161
+ entriesRemoved: invalidatedKeys.length,
3162
+ bytesFreed,
3163
+ durationMs: performance.now() - startTime
3164
+ };
3165
+ this.emitEvent("manual", invalidatedKeys, bytesFreed);
3166
+ return result;
3167
+ }
3168
+ /**
3169
+ * Invalidate by pattern (e.g., namespace or model)
3170
+ */
3171
+ async invalidateByPattern(options) {
3172
+ const startTime = performance.now();
3173
+ const invalidatedKeys = [];
3174
+ let bytesFreed = 0;
3175
+ const currentTime = now();
3176
+ const keys = await this.store.keys();
3177
+ for (const key of keys) {
3178
+ const entry = await this.store.get(key);
3179
+ if (!entry) continue;
3180
+ let matches = true;
3181
+ if (options.namespace && entry.metadata.namespace !== options.namespace) {
3182
+ matches = false;
3183
+ }
3184
+ if (options.model && entry.request.model !== options.model) {
3185
+ matches = false;
3186
+ }
3187
+ if (options.olderThan) {
3188
+ const age = (currentTime - entry.metadata.createdAt) / 1e3;
3189
+ if (age < options.olderThan) {
3190
+ matches = false;
3191
+ }
3192
+ }
3193
+ if (matches) {
3194
+ const size = this.estimateEntrySize(entry);
3195
+ const deleted = await this.store.delete(key);
3196
+ if (deleted) {
3197
+ invalidatedKeys.push(key);
3198
+ bytesFreed += size;
3199
+ }
3200
+ }
3201
+ }
3202
+ this.stats.totalInvalidations++;
3203
+ this.stats.manualRemovals += invalidatedKeys.length;
3204
+ this.stats.totalBytesFreed += bytesFreed;
3205
+ this.stats.lastInvalidationAt = now();
3206
+ const result = {
3207
+ invalidatedKeys,
3208
+ entriesRemoved: invalidatedKeys.length,
3209
+ bytesFreed,
3210
+ durationMs: performance.now() - startTime
3211
+ };
3212
+ this.emitEvent("manual", invalidatedKeys, bytesFreed);
3213
+ return result;
3214
+ }
3215
+ /**
3216
+ * Start automatic invalidation
3217
+ */
3218
+ startAuto(intervalMs = 6e4) {
3219
+ if (this.autoInterval) {
3220
+ this.stopAuto();
3221
+ }
3222
+ this.autoInterval = setInterval(() => {
3223
+ void (async () => {
3224
+ try {
3225
+ await this.run();
3226
+ } catch (error) {
3227
+ this.emit("error", error);
3228
+ }
3229
+ })();
3230
+ }, intervalMs);
3231
+ }
3232
+ /**
3233
+ * Stop automatic invalidation
3234
+ */
3235
+ stopAuto() {
3236
+ if (this.autoInterval) {
3237
+ clearInterval(this.autoInterval);
3238
+ this.autoInterval = null;
3239
+ }
3240
+ }
3241
+ /**
3242
+ * Get invalidation statistics
3243
+ */
3244
+ getStats() {
3245
+ return { ...this.stats };
3246
+ }
3247
+ /**
3248
+ * Reset statistics
3249
+ */
3250
+ resetStats() {
3251
+ this.stats = {
3252
+ totalInvalidations: 0,
3253
+ ttlRemovals: 0,
3254
+ lruRemovals: 0,
3255
+ eventRemovals: 0,
3256
+ smartRemovals: 0,
3257
+ manualRemovals: 0,
3258
+ totalBytesFreed: 0
3259
+ };
3260
+ }
3261
+ /**
3262
+ * Destroy the manager
3263
+ */
3264
+ destroy() {
3265
+ this.stopAuto();
3266
+ this.removeAllListeners();
3267
+ this.accessTimes.clear();
3268
+ }
3269
+ getTTL(model, namespace) {
3270
+ if (this.config.ttl?.modelTtls?.[model]) {
3271
+ return this.config.ttl.modelTtls[model];
3272
+ }
3273
+ if (namespace && this.config.ttl?.namespaceTtls?.[namespace]) {
3274
+ return this.config.ttl.namespaceTtls[namespace];
3275
+ }
3276
+ return this.config.ttl?.defaultTtl ?? 3600;
3277
+ }
3278
+ estimateEntrySize(entry) {
3279
+ return entry.response.content.length * 2 + 200;
3280
+ }
3281
+ emitEvent(reason, keys, bytesFreed) {
3282
+ if (this.config.emitEvents && keys.length > 0) {
3283
+ const event = {
3284
+ timestamp: now(),
3285
+ keys,
3286
+ reason,
3287
+ entriesRemoved: keys.length,
3288
+ bytesFreed
3289
+ };
3290
+ this.emit("invalidate", event);
3291
+ this.config.onInvalidate?.(event);
3292
+ }
3293
+ }
3294
+ };
3295
+ function createInvalidationManager(store, config) {
3296
+ return new InvalidationManager(store, config);
3297
+ }
3298
+
3299
+ export { BaseCacheStore, BaseMatchStrategy, CacheAnalytics, ChunkBuffer, ExactMatchStrategy, HybridMatchStrategy, InvalidationManager, MemoryCacheStore, PineconeCacheStore, RedisCacheStore, SQLiteCacheStore, SemanticCache, SemanticMatchStrategy, SimilarityEngine, StreamCache, StreamRecorder, StreamReplayer, TieredCacheStore, cosineSimilarity, createCacheAnalytics, createChunkBuffer, createExactMatchStrategy, createHybridMatchStrategy, createInvalidationManager, createMemoryCacheStore, createPineconeCacheStore, createRedisCacheStore, createSQLiteCacheStore, createSemanticCache, createSemanticMatchStrategy, createSimilarityEngine, createStreamCache, createStreamRecorder, createStreamReplayer, createTieredCacheStore, distanceToSimilarity, dotProduct, estimateEntrySize, euclideanDistance, extractSystemPrompt, extractUserMessage, generateCacheKey, generateConversationFingerprint, generateId, generateSemanticKey, isExpired, magnitude, manhattanDistance, normalize, normalizeWhitespace, now };
3300
+ //# sourceMappingURL=index.js.map
3301
+ //# sourceMappingURL=index.js.map