@lov3kaizen/agentsea-cache 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +323 -0
- package/dist/BaseMatchStrategy-1E1SHaUt.d.ts +60 -0
- package/dist/SemanticCache-vysguwUQ.d.ts +65 -0
- package/dist/SimilarityEngine-Cwv_mF9a.d.ts +41 -0
- package/dist/analytics/index.d.ts +123 -0
- package/dist/analytics/index.js +275 -0
- package/dist/analytics/index.js.map +1 -0
- package/dist/cache.types-DMuyQseO.d.ts +99 -0
- package/dist/index.d.ts +47 -0
- package/dist/index.js +3301 -0
- package/dist/index.js.map +1 -0
- package/dist/integrations/agentsea/index.d.ts +103 -0
- package/dist/integrations/agentsea/index.js +201 -0
- package/dist/integrations/agentsea/index.js.map +1 -0
- package/dist/integrations/gateway/index.d.ts +98 -0
- package/dist/integrations/gateway/index.js +205 -0
- package/dist/integrations/gateway/index.js.map +1 -0
- package/dist/invalidation/index.d.ts +113 -0
- package/dist/invalidation/index.js +360 -0
- package/dist/invalidation/index.js.map +1 -0
- package/dist/store.types-BQy5Yyz9.d.ts +111 -0
- package/dist/stores/index.d.ts +138 -0
- package/dist/stores/index.js +1147 -0
- package/dist/stores/index.js.map +1 -0
- package/dist/strategies/index.d.ts +36 -0
- package/dist/strategies/index.js +280 -0
- package/dist/strategies/index.js.map +1 -0
- package/dist/streaming/index.d.ts +206 -0
- package/dist/streaming/index.js +794 -0
- package/dist/streaming/index.js.map +1 -0
- package/package.json +108 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 lovekaizen
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
# @lov3kaizen/agentsea-cache
|
|
2
|
+
|
|
3
|
+
Semantic caching layer for LLM responses. Reduces LLM costs by 30-50% through intelligent caching using exact match and semantic similarity.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Exact Match Caching** - Hash-based caching for identical requests
|
|
8
|
+
- **Semantic Matching** - Embedding-based similarity for semantically similar queries
|
|
9
|
+
- **Multiple Backends** - Memory, Redis, SQLite, Pinecone support
|
|
10
|
+
- **Cost Analytics** - Track tokens saved and cost reduction
|
|
11
|
+
- **Streaming Support** - Cache and replay streaming LLM responses
|
|
12
|
+
- **Multi-Tier Caching** - L1/L2/L3 cache hierarchy with promotion
|
|
13
|
+
- **AgentSea Integration** - CachedProvider and CacheMiddleware for agents
|
|
14
|
+
- **Gateway Integration** - Cache layer for LLM Gateway routing
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pnpm add @lov3kaizen/agentsea-cache
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
For semantic matching, also install:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pnpm add @lov3kaizen/agentsea-embeddings
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Quick Start
|
|
29
|
+
|
|
30
|
+
### Basic Exact Match Caching
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
import {
|
|
34
|
+
SemanticCache,
|
|
35
|
+
MemoryCacheStore,
|
|
36
|
+
ExactMatchStrategy,
|
|
37
|
+
} from '@lov3kaizen/agentsea-cache';
|
|
38
|
+
|
|
39
|
+
// Create cache with memory store and exact matching
|
|
40
|
+
const cache = new SemanticCache(
|
|
41
|
+
{
|
|
42
|
+
defaultTTL: 3600, // 1 hour
|
|
43
|
+
matchStrategy: 'exact',
|
|
44
|
+
},
|
|
45
|
+
new MemoryCacheStore({ type: 'memory', maxEntries: 10000 }),
|
|
46
|
+
new ExactMatchStrategy(),
|
|
47
|
+
);
|
|
48
|
+
|
|
49
|
+
// Wrap your LLM call
|
|
50
|
+
const response = await cache.wrap(
|
|
51
|
+
{
|
|
52
|
+
model: 'gpt-4o',
|
|
53
|
+
messages: [{ role: 'user', content: 'What is the capital of France?' }],
|
|
54
|
+
},
|
|
55
|
+
async (request) => {
|
|
56
|
+
// Your LLM call here
|
|
57
|
+
return await openai.chat.completions.create(request);
|
|
58
|
+
},
|
|
59
|
+
);
|
|
60
|
+
|
|
61
|
+
console.log('Cached:', response._cache?.hit);
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### Semantic Matching (with Embeddings Package)
|
|
65
|
+
|
|
66
|
+
```typescript
|
|
67
|
+
import {
|
|
68
|
+
SemanticCache,
|
|
69
|
+
MemoryCacheStore,
|
|
70
|
+
HybridMatchStrategy,
|
|
71
|
+
SimilarityEngine,
|
|
72
|
+
} from '@lov3kaizen/agentsea-cache';
|
|
73
|
+
import { OpenAIProvider } from '@lov3kaizen/agentsea-embeddings';
|
|
74
|
+
|
|
75
|
+
// Create embedding provider
|
|
76
|
+
const embeddingProvider = new OpenAIProvider({
|
|
77
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
78
|
+
model: 'text-embedding-3-small',
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
// Create similarity engine
|
|
82
|
+
const similarity = new SimilarityEngine({
|
|
83
|
+
provider: embeddingProvider,
|
|
84
|
+
cacheEmbeddings: true,
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
// Create cache with semantic matching
|
|
88
|
+
const cache = new SemanticCache(
|
|
89
|
+
{
|
|
90
|
+
defaultTTL: 3600,
|
|
91
|
+
similarityThreshold: 0.92, // 92% similarity required
|
|
92
|
+
matchStrategy: 'hybrid', // Try exact first, then semantic
|
|
93
|
+
},
|
|
94
|
+
new MemoryCacheStore({ type: 'memory' }),
|
|
95
|
+
new HybridMatchStrategy(),
|
|
96
|
+
similarity,
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
// Similar queries will hit cache
|
|
100
|
+
const response1 = await cache.wrap(
|
|
101
|
+
{
|
|
102
|
+
model: 'gpt-4o',
|
|
103
|
+
messages: [{ role: 'user', content: 'What is the capital of France?' }],
|
|
104
|
+
},
|
|
105
|
+
llmCall,
|
|
106
|
+
);
|
|
107
|
+
|
|
108
|
+
// This will hit cache due to semantic similarity!
|
|
109
|
+
const response2 = await cache.wrap(
|
|
110
|
+
{
|
|
111
|
+
model: 'gpt-4o',
|
|
112
|
+
messages: [{ role: 'user', content: "What's France's capital city?" }],
|
|
113
|
+
},
|
|
114
|
+
llmCall,
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
console.log('Second call cached:', response2._cache?.hit); // true
|
|
118
|
+
console.log('Similarity:', response2._cache?.similarity); // ~0.95
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Configuration
|
|
122
|
+
|
|
123
|
+
### SemanticCacheConfig
|
|
124
|
+
|
|
125
|
+
| Option | Type | Default | Description |
|
|
126
|
+
| --------------------- | ------- | --------- | ------------------------------------------------ |
|
|
127
|
+
| `defaultTTL` | number | 3600 | Default TTL in seconds (0 = no expiry) |
|
|
128
|
+
| `similarityThreshold` | number | 0.92 | Similarity threshold for semantic matching (0-1) |
|
|
129
|
+
| `maxEntries` | number | 10000 | Maximum cache entries |
|
|
130
|
+
| `matchStrategy` | string | 'hybrid' | 'exact', 'semantic', or 'hybrid' |
|
|
131
|
+
| `namespace` | string | 'default' | Namespace for multi-tenant isolation |
|
|
132
|
+
| `analyticsEnabled` | boolean | true | Enable analytics tracking |
|
|
133
|
+
|
|
134
|
+
### Store Options
|
|
135
|
+
|
|
136
|
+
#### MemoryStoreConfig
|
|
137
|
+
|
|
138
|
+
```typescript
|
|
139
|
+
{
|
|
140
|
+
type: 'memory',
|
|
141
|
+
maxEntries: 10000,
|
|
142
|
+
maxSizeBytes: 1024 * 1024 * 1024, // 1GB
|
|
143
|
+
evictionPolicy: 'lru'
|
|
144
|
+
}
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
#### RedisStoreConfig
|
|
148
|
+
|
|
149
|
+
```typescript
|
|
150
|
+
{
|
|
151
|
+
type: 'redis',
|
|
152
|
+
url: 'redis://localhost:6379',
|
|
153
|
+
keyPrefix: 'llm-cache'
|
|
154
|
+
}
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
#### PineconeStoreConfig
|
|
158
|
+
|
|
159
|
+
```typescript
|
|
160
|
+
{
|
|
161
|
+
type: 'pinecone',
|
|
162
|
+
apiKey: process.env.PINECONE_API_KEY,
|
|
163
|
+
index: 'llm-cache',
|
|
164
|
+
namespace: 'production'
|
|
165
|
+
}
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
#### TieredStoreConfig
|
|
169
|
+
|
|
170
|
+
```typescript
|
|
171
|
+
{
|
|
172
|
+
type: 'tiered',
|
|
173
|
+
tiers: [
|
|
174
|
+
{ name: 'l1-memory', priority: 1, store: memoryStore, ttl: 300 },
|
|
175
|
+
{ name: 'l2-redis', priority: 2, store: redisStore, ttl: 3600 }
|
|
176
|
+
],
|
|
177
|
+
writeThrough: true,
|
|
178
|
+
promoteOnHit: true
|
|
179
|
+
}
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## Analytics
|
|
183
|
+
|
|
184
|
+
Track cache performance and cost savings:
|
|
185
|
+
|
|
186
|
+
```typescript
|
|
187
|
+
// Get statistics
|
|
188
|
+
const stats = cache.getStats();
|
|
189
|
+
console.log(`Hit Rate: ${(stats.hitRate * 100).toFixed(1)}%`);
|
|
190
|
+
console.log(`Tokens Saved: ${stats.tokensSaved.toLocaleString()}`);
|
|
191
|
+
console.log(`Cost Savings: $${stats.costSavingsUSD.toFixed(2)}`);
|
|
192
|
+
|
|
193
|
+
// Get detailed analytics
|
|
194
|
+
const analytics = cache.getAnalytics();
|
|
195
|
+
const report = analytics.getCostSavingsReport();
|
|
196
|
+
console.log(`Reduction: ${report.reductionPercent.toFixed(1)}%`);
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## API Reference
|
|
200
|
+
|
|
201
|
+
### SemanticCache
|
|
202
|
+
|
|
203
|
+
```typescript
|
|
204
|
+
class SemanticCache {
|
|
205
|
+
// Wrap an LLM call with caching
|
|
206
|
+
wrap<T>(request, fn, options?): Promise<T>;
|
|
207
|
+
|
|
208
|
+
// Direct cache operations
|
|
209
|
+
get(request, options?): Promise<CacheLookupResult>;
|
|
210
|
+
set(request, response, options?): Promise<void>;
|
|
211
|
+
delete(key): Promise<boolean>;
|
|
212
|
+
clear(): Promise<void>;
|
|
213
|
+
|
|
214
|
+
// Invalidation
|
|
215
|
+
invalidateByPattern(pattern): Promise<number>;
|
|
216
|
+
invalidateByTags(tags): Promise<number>;
|
|
217
|
+
|
|
218
|
+
// Analytics
|
|
219
|
+
getStats(): CacheStats;
|
|
220
|
+
getAnalytics(): CacheAnalytics;
|
|
221
|
+
|
|
222
|
+
// Health
|
|
223
|
+
checkHealth(): Promise<StoreHealth>;
|
|
224
|
+
close(): Promise<void>;
|
|
225
|
+
}
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### WrapOptions
|
|
229
|
+
|
|
230
|
+
```typescript
|
|
231
|
+
interface WrapOptions {
|
|
232
|
+
ttl?: number; // Custom TTL
|
|
233
|
+
tags?: string[]; // Tags for grouping
|
|
234
|
+
namespace?: string; // Namespace override
|
|
235
|
+
skipCache?: boolean; // Bypass cache
|
|
236
|
+
forceRefresh?: boolean; // Force update
|
|
237
|
+
}
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
## Streaming Cache
|
|
241
|
+
|
|
242
|
+
Cache and replay streaming LLM responses:
|
|
243
|
+
|
|
244
|
+
```typescript
|
|
245
|
+
import { StreamCache, MemoryCacheStore } from '@lov3kaizen/agentsea-cache';
|
|
246
|
+
|
|
247
|
+
const streamCache = new StreamCache(store, {
|
|
248
|
+
minLengthToCache: 50,
|
|
249
|
+
cacheIncomplete: false,
|
|
250
|
+
streamTtl: 3600,
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
// Wrap streaming calls
|
|
254
|
+
const stream = streamCache.wrapStream('gpt-4o', messages, async function* () {
|
|
255
|
+
for await (const chunk of llm.stream(request)) {
|
|
256
|
+
yield chunk;
|
|
257
|
+
}
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
// Cached streams are replayed transparently
|
|
261
|
+
for await (const chunk of stream) {
|
|
262
|
+
process.stdout.write(chunk.content);
|
|
263
|
+
}
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
## AgentSea Integration
|
|
267
|
+
|
|
268
|
+
### CachedProvider
|
|
269
|
+
|
|
270
|
+
Wrap any LLM provider with caching:
|
|
271
|
+
|
|
272
|
+
```typescript
|
|
273
|
+
import { CachedProvider } from '@lov3kaizen/agentsea-cache';
|
|
274
|
+
|
|
275
|
+
const cachedProvider = new CachedProvider({
|
|
276
|
+
provider: anthropicProvider,
|
|
277
|
+
cache: semanticCache,
|
|
278
|
+
skipModels: ['gpt-4-vision'], // Don't cache vision models
|
|
279
|
+
});
|
|
280
|
+
|
|
281
|
+
// Uses cache transparently
|
|
282
|
+
const response = await cachedProvider.complete({
|
|
283
|
+
model: 'claude-sonnet-4-20250514',
|
|
284
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
285
|
+
});
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
### CacheMiddleware
|
|
289
|
+
|
|
290
|
+
Add caching to agent pipelines:
|
|
291
|
+
|
|
292
|
+
```typescript
|
|
293
|
+
import { CacheMiddleware } from '@lov3kaizen/agentsea-cache';
|
|
294
|
+
|
|
295
|
+
const middleware = new CacheMiddleware({
|
|
296
|
+
cache: semanticCache,
|
|
297
|
+
skipToolRequests: true, // Don't cache tool-using requests
|
|
298
|
+
defaultTTL: 1800,
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
const response = await middleware.handle(request, next);
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
## Examples
|
|
305
|
+
|
|
306
|
+
See the `examples/` directory for complete examples:
|
|
307
|
+
|
|
308
|
+
- `basic-cache.ts` - Exact match caching
|
|
309
|
+
- `semantic-similarity.ts` - Semantic matching with embeddings
|
|
310
|
+
- `streaming-cache.ts` - Caching streaming responses
|
|
311
|
+
- `multi-tier.ts` - Multi-tier cache hierarchy
|
|
312
|
+
- `agentsea-integration.ts` - AgentSea CachedProvider and Middleware
|
|
313
|
+
|
|
314
|
+
## Roadmap
|
|
315
|
+
|
|
316
|
+
- [x] Phase 1: Core cache with exact matching
|
|
317
|
+
- [x] Phase 2: Semantic matching with embeddings
|
|
318
|
+
- [x] Phase 3: Streaming cache, multi-tier, invalidation
|
|
319
|
+
- [x] Phase 4: AgentSea and Gateway integrations
|
|
320
|
+
|
|
321
|
+
## License
|
|
322
|
+
|
|
323
|
+
MIT
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { C as CacheMessage, a as CacheLookupResult, b as CacheEntry } from './cache.types-DMuyQseO.js';
|
|
2
|
+
import { B as BaseCacheStore } from './store.types-BQy5Yyz9.js';
|
|
3
|
+
import { S as SimilarityEngine } from './SimilarityEngine-Cwv_mF9a.js';
|
|
4
|
+
|
|
5
|
+
type MatchStrategyType = 'exact' | 'semantic' | 'hybrid' | 'fuzzy';
|
|
6
|
+
interface MatchOptions {
|
|
7
|
+
threshold?: number;
|
|
8
|
+
namespace?: string;
|
|
9
|
+
topK?: number;
|
|
10
|
+
matchModel?: boolean;
|
|
11
|
+
}
|
|
12
|
+
interface MatchRequest {
|
|
13
|
+
model: string;
|
|
14
|
+
messages: CacheMessage[];
|
|
15
|
+
temperature?: number;
|
|
16
|
+
systemPrompt?: string;
|
|
17
|
+
}
|
|
18
|
+
interface MatchResult extends CacheLookupResult {
|
|
19
|
+
strategy: MatchStrategyType;
|
|
20
|
+
candidates?: Array<{
|
|
21
|
+
entry: CacheEntry;
|
|
22
|
+
score: number;
|
|
23
|
+
reason: string;
|
|
24
|
+
}>;
|
|
25
|
+
}
|
|
26
|
+
interface ExactMatchConfig {
|
|
27
|
+
normalizeWhitespace?: boolean;
|
|
28
|
+
hashFields?: Array<'model' | 'messages' | 'temperature' | 'systemPrompt'>;
|
|
29
|
+
}
|
|
30
|
+
interface SemanticMatchConfig {
|
|
31
|
+
threshold?: number;
|
|
32
|
+
matchModel?: boolean;
|
|
33
|
+
topK?: number;
|
|
34
|
+
}
|
|
35
|
+
interface HybridMatchConfig {
|
|
36
|
+
exact?: ExactMatchConfig;
|
|
37
|
+
semantic?: SemanticMatchConfig;
|
|
38
|
+
semanticPatterns?: RegExp[];
|
|
39
|
+
exactOnlyPatterns?: RegExp[];
|
|
40
|
+
}
|
|
41
|
+
interface FuzzyMatchConfig {
|
|
42
|
+
minSimilarity?: number;
|
|
43
|
+
maxDistance?: number;
|
|
44
|
+
}
|
|
45
|
+
interface ThresholdConfig {
|
|
46
|
+
base: number;
|
|
47
|
+
contextThresholds?: Record<string, number>;
|
|
48
|
+
autoAdjust?: boolean;
|
|
49
|
+
min?: number;
|
|
50
|
+
max?: number;
|
|
51
|
+
}
|
|
52
|
+
type ContextType = 'code' | 'chat' | 'analysis' | 'creative' | 'default';
|
|
53
|
+
type ContextDetector = (request: MatchRequest) => ContextType;
|
|
54
|
+
|
|
55
|
+
declare abstract class BaseMatchStrategy {
|
|
56
|
+
abstract readonly name: MatchStrategyType;
|
|
57
|
+
abstract match(request: MatchRequest, store: BaseCacheStore, similarity?: SimilarityEngine, options?: MatchOptions): Promise<CacheLookupResult>;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export { BaseMatchStrategy as B, type ContextType as C, type ExactMatchConfig as E, type FuzzyMatchConfig as F, type HybridMatchConfig as H, type MatchStrategyType as M, type SemanticMatchConfig as S, type ThresholdConfig as T, type MatchOptions as a, type MatchRequest as b, type MatchResult as c, type ContextDetector as d };
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { EventEmitter } from 'eventemitter3';
|
|
2
|
+
import { b as CacheEntry, S as SemanticCacheConfig, C as CacheMessage, W as WrapOptions, a as CacheLookupResult, c as CacheStats } from './cache.types-DMuyQseO.js';
|
|
3
|
+
import { B as BaseCacheStore, S as StoreHealth } from './store.types-BQy5Yyz9.js';
|
|
4
|
+
import { B as BaseMatchStrategy } from './BaseMatchStrategy-1E1SHaUt.js';
|
|
5
|
+
import { S as SimilarityEngine } from './SimilarityEngine-Cwv_mF9a.js';
|
|
6
|
+
import { CacheAnalytics } from './analytics/index.js';
|
|
7
|
+
|
|
8
|
+
interface SemanticCacheEvents {
|
|
9
|
+
hit: (entry: CacheEntry, similarity: number) => void;
|
|
10
|
+
miss: (key: string, reason: string) => void;
|
|
11
|
+
set: (entry: CacheEntry) => void;
|
|
12
|
+
delete: (key: string) => void;
|
|
13
|
+
evict: (entry: CacheEntry, reason: string) => void;
|
|
14
|
+
error: (error: Error, context: string) => void;
|
|
15
|
+
}
|
|
16
|
+
interface CacheRequest {
|
|
17
|
+
model: string;
|
|
18
|
+
messages: CacheMessage[];
|
|
19
|
+
temperature?: number;
|
|
20
|
+
maxTokens?: number;
|
|
21
|
+
tools?: unknown[];
|
|
22
|
+
}
|
|
23
|
+
interface CacheResponseInput {
|
|
24
|
+
content: string;
|
|
25
|
+
model?: string;
|
|
26
|
+
usage?: {
|
|
27
|
+
promptTokens: number;
|
|
28
|
+
completionTokens: number;
|
|
29
|
+
totalTokens: number;
|
|
30
|
+
};
|
|
31
|
+
finishReason?: string;
|
|
32
|
+
toolCalls?: unknown[];
|
|
33
|
+
}
|
|
34
|
+
declare class SemanticCache extends EventEmitter<SemanticCacheEvents> {
|
|
35
|
+
private readonly config;
|
|
36
|
+
private readonly store;
|
|
37
|
+
private readonly strategy;
|
|
38
|
+
private readonly similarity?;
|
|
39
|
+
private readonly analytics;
|
|
40
|
+
private stats;
|
|
41
|
+
constructor(config: Partial<SemanticCacheConfig>, store: BaseCacheStore, strategy: BaseMatchStrategy, similarity?: SimilarityEngine);
|
|
42
|
+
private createInitialStats;
|
|
43
|
+
wrap<T extends CacheResponseInput>(request: CacheRequest, fn: (req: CacheRequest) => Promise<T>, options?: WrapOptions): Promise<T & {
|
|
44
|
+
_cache?: {
|
|
45
|
+
hit: boolean;
|
|
46
|
+
similarity?: number;
|
|
47
|
+
};
|
|
48
|
+
}>;
|
|
49
|
+
get(request: CacheRequest, options?: WrapOptions): Promise<CacheLookupResult>;
|
|
50
|
+
set(request: CacheRequest, response: CacheResponseInput, options?: WrapOptions): Promise<void>;
|
|
51
|
+
delete(key: string): Promise<boolean>;
|
|
52
|
+
clear(): Promise<void>;
|
|
53
|
+
invalidateByPattern(pattern: RegExp): Promise<number>;
|
|
54
|
+
invalidateByTags(tags: string[]): Promise<number>;
|
|
55
|
+
getStats(): CacheStats;
|
|
56
|
+
getAnalytics(): CacheAnalytics;
|
|
57
|
+
getConfig(): Readonly<SemanticCacheConfig>;
|
|
58
|
+
checkHealth(): Promise<StoreHealth>;
|
|
59
|
+
close(): Promise<void>;
|
|
60
|
+
private updateStats;
|
|
61
|
+
private updateHitRate;
|
|
62
|
+
}
|
|
63
|
+
declare function createSemanticCache(config: Partial<SemanticCacheConfig>, store: BaseCacheStore, strategy: BaseMatchStrategy, similarity?: SimilarityEngine): SemanticCache;
|
|
64
|
+
|
|
65
|
+
export { type CacheRequest as C, SemanticCache as S, type SemanticCacheEvents as a, type CacheResponseInput as b, createSemanticCache as c };
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
interface EmbeddingProvider {
|
|
2
|
+
embed(text: string): Promise<number[]>;
|
|
3
|
+
embedBatch?(texts: string[]): Promise<number[][]>;
|
|
4
|
+
readonly dimensions?: number;
|
|
5
|
+
}
|
|
6
|
+
type SimilarityMetric = 'cosine' | 'euclidean' | 'dot_product';
|
|
7
|
+
interface SimilarityEngineConfig {
|
|
8
|
+
provider: EmbeddingProvider;
|
|
9
|
+
metric?: SimilarityMetric;
|
|
10
|
+
cacheEmbeddings?: boolean;
|
|
11
|
+
maxCacheSize?: number;
|
|
12
|
+
}
|
|
13
|
+
declare class SimilarityEngine {
|
|
14
|
+
private provider;
|
|
15
|
+
private metric;
|
|
16
|
+
private embeddingCache?;
|
|
17
|
+
private maxCacheSize;
|
|
18
|
+
constructor(config: SimilarityEngineConfig);
|
|
19
|
+
embed(text: string): Promise<number[]>;
|
|
20
|
+
embedBatch(texts: string[]): Promise<number[][]>;
|
|
21
|
+
computeSimilarity(a: number[], b: number[]): number;
|
|
22
|
+
findMostSimilar(query: number[], candidates: Array<{
|
|
23
|
+
id: string;
|
|
24
|
+
vector: number[];
|
|
25
|
+
}>, minSimilarity?: number): Array<{
|
|
26
|
+
id: string;
|
|
27
|
+
similarity: number;
|
|
28
|
+
}>;
|
|
29
|
+
get dimensions(): number;
|
|
30
|
+
clearCache(): void;
|
|
31
|
+
getCacheStats(): {
|
|
32
|
+
size: number;
|
|
33
|
+
maxSize: number;
|
|
34
|
+
} | null;
|
|
35
|
+
private cosineSimilarity;
|
|
36
|
+
private euclideanDistance;
|
|
37
|
+
private dotProduct;
|
|
38
|
+
}
|
|
39
|
+
declare function createSimilarityEngine(config: SimilarityEngineConfig): SimilarityEngine;
|
|
40
|
+
|
|
41
|
+
export { type EmbeddingProvider as E, SimilarityEngine as S, type SimilarityMetric as a, type SimilarityEngineConfig as b, createSimilarityEngine as c };
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { b as CacheEntry } from '../cache.types-DMuyQseO.js';
|
|
2
|
+
|
|
3
|
+
interface AnalyticsData {
|
|
4
|
+
totalHits: number;
|
|
5
|
+
totalMisses: number;
|
|
6
|
+
exactHits: number;
|
|
7
|
+
semanticHits: number;
|
|
8
|
+
hitRate: number;
|
|
9
|
+
avgLatencyMs: number;
|
|
10
|
+
p50LatencyMs: number;
|
|
11
|
+
p95LatencyMs: number;
|
|
12
|
+
p99LatencyMs: number;
|
|
13
|
+
totalTokensSaved: number;
|
|
14
|
+
estimatedCostSavingsUSD: number;
|
|
15
|
+
topModels: Array<{
|
|
16
|
+
model: string;
|
|
17
|
+
hits: number;
|
|
18
|
+
}>;
|
|
19
|
+
topNamespaces: Array<{
|
|
20
|
+
namespace: string;
|
|
21
|
+
hits: number;
|
|
22
|
+
}>;
|
|
23
|
+
hourlyStats: Array<{
|
|
24
|
+
hour: number;
|
|
25
|
+
hits: number;
|
|
26
|
+
misses: number;
|
|
27
|
+
avgLatencyMs: number;
|
|
28
|
+
}>;
|
|
29
|
+
}
|
|
30
|
+
interface CostSavingsReport {
|
|
31
|
+
period: string;
|
|
32
|
+
totalRequests: number;
|
|
33
|
+
cachedRequests: number;
|
|
34
|
+
hitRate: number;
|
|
35
|
+
inputTokensSaved: number;
|
|
36
|
+
outputTokensSaved: number;
|
|
37
|
+
totalTokensSaved: number;
|
|
38
|
+
estimatedCostWithoutCache: number;
|
|
39
|
+
actualCostWithCache: number;
|
|
40
|
+
costSaved: number;
|
|
41
|
+
reductionPercent: number;
|
|
42
|
+
}
|
|
43
|
+
interface ModelPricing {
|
|
44
|
+
inputPer1K: number;
|
|
45
|
+
outputPer1K: number;
|
|
46
|
+
}
|
|
47
|
+
interface PerformanceMetrics {
|
|
48
|
+
avgLookupMs: number;
|
|
49
|
+
avgEmbeddingMs: number;
|
|
50
|
+
avgStoreReadMs: number;
|
|
51
|
+
avgStoreWriteMs: number;
|
|
52
|
+
p50LatencyMs: number;
|
|
53
|
+
p95LatencyMs: number;
|
|
54
|
+
p99LatencyMs: number;
|
|
55
|
+
totalOperations: number;
|
|
56
|
+
failedOperations: number;
|
|
57
|
+
errorRate: number;
|
|
58
|
+
}
|
|
59
|
+
interface HitEvent {
|
|
60
|
+
timestamp: number;
|
|
61
|
+
type: 'exact' | 'semantic';
|
|
62
|
+
model: string;
|
|
63
|
+
namespace?: string;
|
|
64
|
+
similarity?: number;
|
|
65
|
+
latencyMs: number;
|
|
66
|
+
tokensSaved: number;
|
|
67
|
+
}
|
|
68
|
+
interface MissEvent {
|
|
69
|
+
timestamp: number;
|
|
70
|
+
model: string;
|
|
71
|
+
namespace?: string;
|
|
72
|
+
latencyMs: number;
|
|
73
|
+
reason: 'not_found' | 'below_threshold' | 'expired' | 'error';
|
|
74
|
+
}
|
|
75
|
+
interface AnalyticsQueryOptions {
|
|
76
|
+
startTime?: number;
|
|
77
|
+
endTime?: number;
|
|
78
|
+
model?: string;
|
|
79
|
+
namespace?: string;
|
|
80
|
+
groupBy?: 'model' | 'namespace' | 'hour' | 'day';
|
|
81
|
+
}
|
|
82
|
+
type AnalyticsExportFormat = 'json' | 'csv' | 'prometheus';
|
|
83
|
+
interface AnalyticsConfig {
|
|
84
|
+
enabled?: boolean;
|
|
85
|
+
sampleRate?: number;
|
|
86
|
+
retentionSeconds?: number;
|
|
87
|
+
flushIntervalMs?: number;
|
|
88
|
+
modelPricing?: Record<string, ModelPricing>;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
declare class CacheAnalytics {
|
|
92
|
+
private config;
|
|
93
|
+
private hits;
|
|
94
|
+
private misses;
|
|
95
|
+
private exactHits;
|
|
96
|
+
private semanticHits;
|
|
97
|
+
private tokensSaved;
|
|
98
|
+
private inputTokensSaved;
|
|
99
|
+
private outputTokensSaved;
|
|
100
|
+
private latencies;
|
|
101
|
+
private modelHits;
|
|
102
|
+
private namespaceHits;
|
|
103
|
+
private hitEvents;
|
|
104
|
+
private missEvents;
|
|
105
|
+
private setCount;
|
|
106
|
+
constructor(config?: Partial<AnalyticsConfig>);
|
|
107
|
+
recordHit(entry: CacheEntry, type: 'exact' | 'semantic', latencyMs: number): void;
|
|
108
|
+
recordMiss(latencyMs: number, reason?: MissEvent['reason']): void;
|
|
109
|
+
recordSet(_entry: CacheEntry): void;
|
|
110
|
+
getSummary(): AnalyticsData;
|
|
111
|
+
getCostSavingsReport(periodLabel?: string): CostSavingsReport;
|
|
112
|
+
getPerformanceMetrics(): PerformanceMetrics;
|
|
113
|
+
reset(): void;
|
|
114
|
+
export(format?: 'json' | 'csv'): string;
|
|
115
|
+
private calculateCostSavings;
|
|
116
|
+
private getTopModels;
|
|
117
|
+
private getTopNamespaces;
|
|
118
|
+
private getHourlyStats;
|
|
119
|
+
private trimEvents;
|
|
120
|
+
}
|
|
121
|
+
declare function createCacheAnalytics(config?: Partial<AnalyticsConfig>): CacheAnalytics;
|
|
122
|
+
|
|
123
|
+
export { type AnalyticsConfig, type AnalyticsData, type AnalyticsExportFormat, type AnalyticsQueryOptions, CacheAnalytics, type CostSavingsReport, type HitEvent, type MissEvent, type ModelPricing, type PerformanceMetrics, createCacheAnalytics };
|