@neural-tools/semantic-cache 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +21 -80
- package/README.md +417 -0
- package/dist/index.d.mts +64 -0
- package/dist/index.d.ts +6 -4
- package/dist/index.js +1 -146
- package/dist/index.mjs +1 -0
- package/package.json +6 -6
package/LICENSE.md
CHANGED
|
@@ -1,80 +1,21 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2025 Luke Amy
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
### 2. Pro/Enterprise License (Proprietary)
|
|
24
|
-
|
|
25
|
-
The following features require a valid Pro or Enterprise license:
|
|
26
|
-
|
|
27
|
-
**Pro Features:**
|
|
28
|
-
- Vector database integration
|
|
29
|
-
- Semantic caching
|
|
30
|
-
- Fine-tuning workflows
|
|
31
|
-
- Cloud deployment templates (AWS/GCP)
|
|
32
|
-
- Premium templates and examples
|
|
33
|
-
- GitHub automation features
|
|
34
|
-
|
|
35
|
-
**Enterprise Features:**
|
|
36
|
-
- White-label support
|
|
37
|
-
- Custom integrations
|
|
38
|
-
- Priority support
|
|
39
|
-
- SLA guarantees
|
|
40
|
-
- Team collaboration features
|
|
41
|
-
|
|
42
|
-
These features are proprietary and may not be used without a valid license key purchased from neural-tools.dev.
|
|
43
|
-
|
|
44
|
-
### License Terms
|
|
45
|
-
|
|
46
|
-
1. **Free Tier**: You may use the free tier features for any purpose, including commercial use, under the MIT License terms.
|
|
47
|
-
|
|
48
|
-
2. **Pro/Enterprise**: You must purchase a license to access Pro or Enterprise features. Each license is:
|
|
49
|
-
- Per-user for individual licenses
|
|
50
|
-
- Per-organization for team/enterprise licenses
|
|
51
|
-
- Non-transferable without written consent
|
|
52
|
-
- Subject to the terms at neural-tools.dev/terms
|
|
53
|
-
|
|
54
|
-
3. **Source Code**: This repository is private. You may not:
|
|
55
|
-
- Redistribute the source code
|
|
56
|
-
- Create derivative works for redistribution
|
|
57
|
-
- Reverse engineer Pro/Enterprise features
|
|
58
|
-
- Remove or circumvent license checks
|
|
59
|
-
|
|
60
|
-
4. **Support**: Support is provided based on your license tier:
|
|
61
|
-
- Free: Community support only
|
|
62
|
-
- Pro: Email support (48-hour response)
|
|
63
|
-
- Enterprise: Priority support with SLA
|
|
64
|
-
|
|
65
|
-
### Warranty Disclaimer
|
|
66
|
-
|
|
67
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
68
|
-
|
|
69
|
-
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
70
|
-
|
|
71
|
-
### Contact
|
|
72
|
-
|
|
73
|
-
For licensing inquiries:
|
|
74
|
-
- Email: licensing@neural-tools.dev
|
|
75
|
-
- Website: https://neural-tools.dev/pricing
|
|
76
|
-
- Support: support@neural-tools.dev
|
|
77
|
-
|
|
78
|
-
---
|
|
79
|
-
|
|
80
|
-
**Last Updated:** January 2025
|
|
1
|
+
# MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Luke Amy
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
# @neural-tools/semantic-cache
|
|
2
|
+
|
|
3
|
+
> Semantic caching for LLM responses
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/@neural-tools/semantic-cache)
|
|
6
|
+
[](../../LICENSE.md)
|
|
7
|
+
|
|
8
|
+
Intelligent caching for LLM responses using semantic similarity. Save costs and improve response times by reusing similar completions.
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
npm install @neural-tools/semantic-cache @neural-tools/vector-db
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Features
|
|
17
|
+
|
|
18
|
+
- **Semantic Matching** - Finds similar prompts, not just exact matches
|
|
19
|
+
- **Cost Savings** - Reduce API calls to expensive LLMs
|
|
20
|
+
- **Fast Responses** - Instant replies for cached queries
|
|
21
|
+
- **Configurable** - Adjust similarity threshold
|
|
22
|
+
- **Provider Agnostic** - Works with any vector database
|
|
23
|
+
- **TTL Support** - Automatic cache expiration
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
```typescript
|
|
28
|
+
import { SemanticCache } from '@neural-tools/semantic-cache';
|
|
29
|
+
import { VectorDB } from '@neural-tools/vector-db';
|
|
30
|
+
|
|
31
|
+
// Setup vector database
|
|
32
|
+
const vectorDB = new VectorDB({
|
|
33
|
+
provider: 'pinecone',
|
|
34
|
+
config: {
|
|
35
|
+
apiKey: process.env.PINECONE_API_KEY,
|
|
36
|
+
environment: 'us-west1-gcp',
|
|
37
|
+
indexName: 'llm-cache'
|
|
38
|
+
}
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
// Create semantic cache
|
|
42
|
+
const cache = new SemanticCache({
|
|
43
|
+
vectorDB,
|
|
44
|
+
similarityThreshold: 0.9, // 0-1, higher = more similar
|
|
45
|
+
ttl: 3600 // Cache lifetime in seconds
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
await cache.initialize();
|
|
49
|
+
|
|
50
|
+
// Your embedding function
|
|
51
|
+
async function embed(text: string): Promise<number[]> {
|
|
52
|
+
// Use OpenAI, Anthropic, or any embedding model
|
|
53
|
+
// Return vector of embeddings
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Check cache before calling LLM
|
|
57
|
+
const prompt = "What is the capital of France?";
|
|
58
|
+
const embedding = await embed(prompt);
|
|
59
|
+
|
|
60
|
+
const cached = await cache.get(embedding);
|
|
61
|
+
|
|
62
|
+
if (cached) {
|
|
63
|
+
console.log('Cache hit!', cached.response);
|
|
64
|
+
} else {
|
|
65
|
+
// Call your LLM
|
|
66
|
+
const response = await callLLM(prompt);
|
|
67
|
+
|
|
68
|
+
// Store in cache
|
|
69
|
+
await cache.set(embedding, {
|
|
70
|
+
prompt,
|
|
71
|
+
response,
|
|
72
|
+
model: 'claude-3-opus',
|
|
73
|
+
timestamp: Date.now()
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## API Reference
|
|
79
|
+
|
|
80
|
+
### Constructor
|
|
81
|
+
|
|
82
|
+
```typescript
|
|
83
|
+
new SemanticCache(options: SemanticCacheOptions)
|
|
84
|
+
|
|
85
|
+
interface SemanticCacheOptions {
|
|
86
|
+
vectorDB: VectorDB;
|
|
87
|
+
similarityThreshold?: number; // Default: 0.9
|
|
88
|
+
ttl?: number; // Seconds, default: 3600
|
|
89
|
+
namespace?: string;
|
|
90
|
+
}
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Methods
|
|
94
|
+
|
|
95
|
+
#### `initialize()`
|
|
96
|
+
|
|
97
|
+
Initialize the cache and vector database connection.
|
|
98
|
+
|
|
99
|
+
```typescript
|
|
100
|
+
await cache.initialize();
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
#### `get(embedding)`
|
|
104
|
+
|
|
105
|
+
Retrieve a cached response for similar prompts.
|
|
106
|
+
|
|
107
|
+
```typescript
|
|
108
|
+
const result = await cache.get(embedding);
|
|
109
|
+
|
|
110
|
+
if (result) {
|
|
111
|
+
console.log(result.response);
|
|
112
|
+
console.log(result.similarity); // How similar (0-1)
|
|
113
|
+
console.log(result.metadata);
|
|
114
|
+
}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
#### `set(embedding, data)`
|
|
118
|
+
|
|
119
|
+
Store a response in the cache.
|
|
120
|
+
|
|
121
|
+
```typescript
|
|
122
|
+
await cache.set(embedding, {
|
|
123
|
+
prompt: string;
|
|
124
|
+
response: string;
|
|
125
|
+
model?: string;
|
|
126
|
+
tokens?: number;
|
|
127
|
+
timestamp?: number;
|
|
128
|
+
metadata?: Record<string, any>;
|
|
129
|
+
});
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
#### `delete(id)`
|
|
133
|
+
|
|
134
|
+
Remove a specific cache entry.
|
|
135
|
+
|
|
136
|
+
```typescript
|
|
137
|
+
await cache.delete('cache-entry-id');
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
#### `clear()`
|
|
141
|
+
|
|
142
|
+
Clear all cached entries.
|
|
143
|
+
|
|
144
|
+
```typescript
|
|
145
|
+
await cache.clear();
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
#### `stats()`
|
|
149
|
+
|
|
150
|
+
Get cache statistics.
|
|
151
|
+
|
|
152
|
+
```typescript
|
|
153
|
+
const stats = await cache.stats();
|
|
154
|
+
console.log(stats);
|
|
155
|
+
// {
|
|
156
|
+
// totalEntries: 1234,
|
|
157
|
+
// hitRate: 0.75,
|
|
158
|
+
// avgSimilarity: 0.92
|
|
159
|
+
// }
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Usage Examples
|
|
163
|
+
|
|
164
|
+
### With OpenAI
|
|
165
|
+
|
|
166
|
+
```typescript
|
|
167
|
+
import { SemanticCache } from '@neural-tools/semantic-cache';
|
|
168
|
+
import { VectorDB } from '@neural-tools/vector-db';
|
|
169
|
+
import OpenAI from 'openai';
|
|
170
|
+
|
|
171
|
+
const openai = new OpenAI();
|
|
172
|
+
const vectorDB = new VectorDB({ /* ... */ });
|
|
173
|
+
const cache = new SemanticCache({ vectorDB });
|
|
174
|
+
|
|
175
|
+
await cache.initialize();
|
|
176
|
+
|
|
177
|
+
async function completionWithCache(prompt: string) {
|
|
178
|
+
// Get embedding
|
|
179
|
+
const embeddingResponse = await openai.embeddings.create({
|
|
180
|
+
model: 'text-embedding-3-small',
|
|
181
|
+
input: prompt
|
|
182
|
+
});
|
|
183
|
+
const embedding = embeddingResponse.data[0].embedding;
|
|
184
|
+
|
|
185
|
+
// Check cache
|
|
186
|
+
const cached = await cache.get(embedding);
|
|
187
|
+
if (cached) {
|
|
188
|
+
console.log('Cache hit! Saved API call.');
|
|
189
|
+
return cached.response;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Call LLM
|
|
193
|
+
const completion = await openai.chat.completions.create({
|
|
194
|
+
model: 'gpt-4',
|
|
195
|
+
messages: [{ role: 'user', content: prompt }]
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
const response = completion.choices[0].message.content;
|
|
199
|
+
|
|
200
|
+
// Cache the response
|
|
201
|
+
await cache.set(embedding, {
|
|
202
|
+
prompt,
|
|
203
|
+
response,
|
|
204
|
+
model: 'gpt-4',
|
|
205
|
+
tokens: completion.usage?.total_tokens
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
return response;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Use it
|
|
212
|
+
const answer = await completionWithCache('Explain quantum computing');
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### With Anthropic Claude
|
|
216
|
+
|
|
217
|
+
```typescript
|
|
218
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
219
|
+
import { SemanticCache } from '@neural-tools/semantic-cache';
|
|
220
|
+
|
|
221
|
+
const anthropic = new Anthropic();
|
|
222
|
+
const cache = new SemanticCache({ /* ... */ });
|
|
223
|
+
|
|
224
|
+
async function claudeWithCache(prompt: string) {
|
|
225
|
+
const embedding = await getEmbedding(prompt);
|
|
226
|
+
|
|
227
|
+
const cached = await cache.get(embedding);
|
|
228
|
+
if (cached) return cached.response;
|
|
229
|
+
|
|
230
|
+
const message = await anthropic.messages.create({
|
|
231
|
+
model: 'claude-3-opus-20240229',
|
|
232
|
+
max_tokens: 1024,
|
|
233
|
+
messages: [{ role: 'user', content: prompt }]
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
const response = message.content[0].text;
|
|
237
|
+
|
|
238
|
+
await cache.set(embedding, {
|
|
239
|
+
prompt,
|
|
240
|
+
response,
|
|
241
|
+
model: 'claude-3-opus-20240229'
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
return response;
|
|
245
|
+
}
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### Custom Similarity Threshold
|
|
249
|
+
|
|
250
|
+
```typescript
|
|
251
|
+
// Strict matching (0.95+)
|
|
252
|
+
const strictCache = new SemanticCache({
|
|
253
|
+
vectorDB,
|
|
254
|
+
similarityThreshold: 0.95
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
// Loose matching (0.80+)
|
|
258
|
+
const looseCache = new SemanticCache({
|
|
259
|
+
vectorDB,
|
|
260
|
+
similarityThreshold: 0.80
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
// Very strict (0.98+) - almost exact matches only
|
|
264
|
+
const veryStrictCache = new SemanticCache({
|
|
265
|
+
vectorDB,
|
|
266
|
+
similarityThreshold: 0.98
|
|
267
|
+
});
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### With TTL (Time-To-Live)
|
|
271
|
+
|
|
272
|
+
```typescript
|
|
273
|
+
const cache = new SemanticCache({
|
|
274
|
+
vectorDB,
|
|
275
|
+
ttl: 86400 // 24 hours
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
// Cached responses expire after 24 hours
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
### Namespace for Multiple Models
|
|
282
|
+
|
|
283
|
+
```typescript
|
|
284
|
+
const gpt4Cache = new SemanticCache({
|
|
285
|
+
vectorDB,
|
|
286
|
+
namespace: 'gpt-4'
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
const claudeCache = new SemanticCache({
|
|
290
|
+
vectorDB,
|
|
291
|
+
namespace: 'claude-opus'
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
// Separate caches for different models
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
## Configuration
|
|
298
|
+
|
|
299
|
+
### Similarity Threshold
|
|
300
|
+
|
|
301
|
+
Controls how similar prompts need to be:
|
|
302
|
+
|
|
303
|
+
- `0.99` - Nearly identical prompts
|
|
304
|
+
- `0.95` - Very similar prompts (recommended for production)
|
|
305
|
+
- `0.90` - Similar prompts (good balance)
|
|
306
|
+
- `0.85` - Somewhat similar prompts
|
|
307
|
+
- `0.80` - Loosely similar prompts
|
|
308
|
+
|
|
309
|
+
### TTL (Time-To-Live)
|
|
310
|
+
|
|
311
|
+
How long to keep cached responses:
|
|
312
|
+
|
|
313
|
+
```typescript
|
|
314
|
+
{
|
|
315
|
+
ttl: 3600 // 1 hour
|
|
316
|
+
ttl: 86400 // 24 hours
|
|
317
|
+
ttl: 604800 // 1 week
|
|
318
|
+
ttl: 0 // Never expire
|
|
319
|
+
}
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
## Cost Savings Example
|
|
323
|
+
|
|
324
|
+
```typescript
|
|
325
|
+
// Without caching
|
|
326
|
+
// 1000 requests to GPT-4 @ $0.03 per 1K tokens
|
|
327
|
+
// Average 500 tokens per response
|
|
328
|
+
// Cost: 1000 * 0.03 * 0.5 = $15
|
|
329
|
+
|
|
330
|
+
// With semantic caching (75% hit rate)
|
|
331
|
+
// 250 requests to GPT-4
|
|
332
|
+
// 750 cache hits (free)
|
|
333
|
+
// Cost: 250 * 0.03 * 0.5 = $3.75
|
|
334
|
+
// Savings: $11.25 (75%)
|
|
335
|
+
|
|
336
|
+
const cache = new SemanticCache({ vectorDB });
|
|
337
|
+
// Just add caching, save 75%!
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
## Performance
|
|
341
|
+
|
|
342
|
+
Typical performance characteristics:
|
|
343
|
+
|
|
344
|
+
- **Cache Hit**: 10-50ms (vector lookup)
|
|
345
|
+
- **Cache Miss**: LLM latency + 20-100ms (store)
|
|
346
|
+
- **Memory**: Minimal (vectors stored in vector DB)
|
|
347
|
+
|
|
348
|
+
## Best Practices
|
|
349
|
+
|
|
350
|
+
### 1. Choose the Right Threshold
|
|
351
|
+
|
|
352
|
+
```typescript
|
|
353
|
+
// For FAQ / repetitive queries
|
|
354
|
+
{ similarityThreshold: 0.85 }
|
|
355
|
+
|
|
356
|
+
// For production assistants
|
|
357
|
+
{ similarityThreshold: 0.92 }
|
|
358
|
+
|
|
359
|
+
// For high-accuracy requirements
|
|
360
|
+
{ similarityThreshold: 0.97 }
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
### 2. Set Appropriate TTL
|
|
364
|
+
|
|
365
|
+
```typescript
|
|
366
|
+
// Real-time data (weather, news)
|
|
367
|
+
{ ttl: 300 } // 5 minutes
|
|
368
|
+
|
|
369
|
+
// General knowledge
|
|
370
|
+
{ ttl: 86400 } // 24 hours
|
|
371
|
+
|
|
372
|
+
// Static content
|
|
373
|
+
{ ttl: 604800 } // 1 week
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
### 3. Monitor Hit Rates
|
|
377
|
+
|
|
378
|
+
```typescript
|
|
379
|
+
const stats = await cache.stats();
|
|
380
|
+
console.log(`Hit rate: ${(stats.hitRate * 100).toFixed(1)}%`);
|
|
381
|
+
|
|
382
|
+
// Adjust threshold if hit rate is too low/high
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
### 4. Use Namespaces
|
|
386
|
+
|
|
387
|
+
```typescript
|
|
388
|
+
// Separate caches by use case
|
|
389
|
+
const customerSupport = new SemanticCache({
|
|
390
|
+
vectorDB,
|
|
391
|
+
namespace: 'customer-support'
|
|
392
|
+
});
|
|
393
|
+
|
|
394
|
+
const codeGen = new SemanticCache({
|
|
395
|
+
vectorDB,
|
|
396
|
+
namespace: 'code-generation'
|
|
397
|
+
});
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
## Dependencies
|
|
401
|
+
|
|
402
|
+
- [@neural-tools/core](../core) - Core utilities
|
|
403
|
+
- [@neural-tools/vector-db](../vector-db) - Vector database abstraction
|
|
404
|
+
|
|
405
|
+
## Contributing
|
|
406
|
+
|
|
407
|
+
Contributions are welcome! See the [main repository](https://github.com/MacLeanLuke/neural-tools) for guidelines.
|
|
408
|
+
|
|
409
|
+
## License
|
|
410
|
+
|
|
411
|
+
MIT - See [LICENSE.md](../../LICENSE.md) for details.
|
|
412
|
+
|
|
413
|
+
## Links
|
|
414
|
+
|
|
415
|
+
- [Documentation](https://neural-tools.com/docs/semantic-cache.html)
|
|
416
|
+
- [GitHub](https://github.com/MacLeanLuke/neural-tools)
|
|
417
|
+
- [npm](https://www.npmjs.com/package/@neural-tools/semantic-cache)
|
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
interface SemanticCacheConfig {
|
|
2
|
+
/**
|
|
3
|
+
* Similarity threshold (0-1). Higher = more strict matching.
|
|
4
|
+
* Default: 0.95
|
|
5
|
+
*/
|
|
6
|
+
similarityThreshold?: number;
|
|
7
|
+
/**
|
|
8
|
+
* Time to live in seconds. 0 = never expire.
|
|
9
|
+
* Default: 3600 (1 hour)
|
|
10
|
+
*/
|
|
11
|
+
ttl?: number;
|
|
12
|
+
/**
|
|
13
|
+
* Vector store provider
|
|
14
|
+
* Default: 'local'
|
|
15
|
+
*/
|
|
16
|
+
provider?: 'local' | 'pinecone' | 'qdrant' | 'chromadb';
|
|
17
|
+
/**
|
|
18
|
+
* Vector database configuration
|
|
19
|
+
*/
|
|
20
|
+
vectorDBConfig?: any;
|
|
21
|
+
}
|
|
22
|
+
interface CacheEntry {
|
|
23
|
+
prompt: string;
|
|
24
|
+
response: string;
|
|
25
|
+
metadata?: Record<string, any>;
|
|
26
|
+
timestamp: number;
|
|
27
|
+
ttl?: number;
|
|
28
|
+
}
|
|
29
|
+
declare class SemanticCache {
|
|
30
|
+
private vectorStore;
|
|
31
|
+
private config;
|
|
32
|
+
private initialized;
|
|
33
|
+
constructor(config?: SemanticCacheConfig);
|
|
34
|
+
/**
|
|
35
|
+
* Initialize the semantic cache
|
|
36
|
+
*/
|
|
37
|
+
initialize(): Promise<void>;
|
|
38
|
+
/**
|
|
39
|
+
* Get a cached response for a prompt
|
|
40
|
+
*/
|
|
41
|
+
get(prompt: string): Promise<string | null>;
|
|
42
|
+
/**
|
|
43
|
+
* Set a cache entry
|
|
44
|
+
*/
|
|
45
|
+
set(prompt: string, response: string, metadata?: Record<string, any>): Promise<void>;
|
|
46
|
+
/**
|
|
47
|
+
* Clear all cache entries
|
|
48
|
+
*/
|
|
49
|
+
clear(): Promise<void>;
|
|
50
|
+
/**
|
|
51
|
+
* Clean up expired entries
|
|
52
|
+
*/
|
|
53
|
+
cleanup(): Promise<number>;
|
|
54
|
+
/**
|
|
55
|
+
* Close the cache connection
|
|
56
|
+
*/
|
|
57
|
+
close(): Promise<void>;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Create a semantic cache instance
|
|
61
|
+
*/
|
|
62
|
+
declare function createSemanticCache(config?: SemanticCacheConfig): SemanticCache;
|
|
63
|
+
|
|
64
|
+
export { type CacheEntry, SemanticCache, type SemanticCacheConfig, createSemanticCache };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
interface SemanticCacheConfig {
|
|
2
2
|
/**
|
|
3
3
|
* Similarity threshold (0-1). Higher = more strict matching.
|
|
4
4
|
* Default: 0.95
|
|
@@ -19,14 +19,14 @@ export interface SemanticCacheConfig {
|
|
|
19
19
|
*/
|
|
20
20
|
vectorDBConfig?: any;
|
|
21
21
|
}
|
|
22
|
-
|
|
22
|
+
interface CacheEntry {
|
|
23
23
|
prompt: string;
|
|
24
24
|
response: string;
|
|
25
25
|
metadata?: Record<string, any>;
|
|
26
26
|
timestamp: number;
|
|
27
27
|
ttl?: number;
|
|
28
28
|
}
|
|
29
|
-
|
|
29
|
+
declare class SemanticCache {
|
|
30
30
|
private vectorStore;
|
|
31
31
|
private config;
|
|
32
32
|
private initialized;
|
|
@@ -59,4 +59,6 @@ export declare class SemanticCache {
|
|
|
59
59
|
/**
|
|
60
60
|
* Create a semantic cache instance
|
|
61
61
|
*/
|
|
62
|
-
|
|
62
|
+
declare function createSemanticCache(config?: SemanticCacheConfig): SemanticCache;
|
|
63
|
+
|
|
64
|
+
export { type CacheEntry, SemanticCache, type SemanticCacheConfig, createSemanticCache };
|
package/dist/index.js
CHANGED
|
@@ -1,146 +1 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.SemanticCache = void 0;
|
|
4
|
-
exports.createSemanticCache = createSemanticCache;
|
|
5
|
-
const vector_db_1 = require("@neural-tools/vector-db");
|
|
6
|
-
const core_1 = require("@neural-tools/core");
|
|
7
|
-
class SemanticCache {
|
|
8
|
-
vectorStore = null;
|
|
9
|
-
config;
|
|
10
|
-
initialized = false;
|
|
11
|
-
constructor(config = {}) {
|
|
12
|
-
this.config = {
|
|
13
|
-
similarityThreshold: config.similarityThreshold || 0.95,
|
|
14
|
-
ttl: config.ttl || 3600,
|
|
15
|
-
provider: config.provider || 'local',
|
|
16
|
-
vectorDBConfig: config.vectorDBConfig || {}
|
|
17
|
-
};
|
|
18
|
-
}
|
|
19
|
-
/**
|
|
20
|
-
* Initialize the semantic cache
|
|
21
|
-
*/
|
|
22
|
-
async initialize() {
|
|
23
|
-
if (this.initialized)
|
|
24
|
-
return;
|
|
25
|
-
// Check feature access for non-local providers
|
|
26
|
-
if (this.config.provider !== 'local') {
|
|
27
|
-
await (0, core_1.requireFeature)('semantic-cache', 'Semantic Cache');
|
|
28
|
-
}
|
|
29
|
-
this.vectorStore = await (0, vector_db_1.createVectorStore)({
|
|
30
|
-
provider: this.config.provider,
|
|
31
|
-
...this.config.vectorDBConfig
|
|
32
|
-
});
|
|
33
|
-
await this.vectorStore.connect();
|
|
34
|
-
this.initialized = true;
|
|
35
|
-
}
|
|
36
|
-
/**
|
|
37
|
-
* Get a cached response for a prompt
|
|
38
|
-
*/
|
|
39
|
-
async get(prompt) {
|
|
40
|
-
if (!this.initialized) {
|
|
41
|
-
await this.initialize();
|
|
42
|
-
}
|
|
43
|
-
if (!this.vectorStore) {
|
|
44
|
-
throw new Error('Vector store not initialized');
|
|
45
|
-
}
|
|
46
|
-
// Create embedding for the prompt
|
|
47
|
-
const embedding = await (0, vector_db_1.createEmbedding)(prompt);
|
|
48
|
-
// Query for similar prompts
|
|
49
|
-
const results = await this.vectorStore.query(embedding, {
|
|
50
|
-
topK: 1,
|
|
51
|
-
includeMetadata: true
|
|
52
|
-
});
|
|
53
|
-
if (results.length === 0) {
|
|
54
|
-
return null;
|
|
55
|
-
}
|
|
56
|
-
const bestMatch = results[0];
|
|
57
|
-
// Check similarity threshold
|
|
58
|
-
if (bestMatch.score < this.config.similarityThreshold) {
|
|
59
|
-
return null;
|
|
60
|
-
}
|
|
61
|
-
// Check if expired
|
|
62
|
-
const entry = bestMatch.metadata;
|
|
63
|
-
if (entry.ttl && entry.ttl > 0) {
|
|
64
|
-
const age = Date.now() - entry.timestamp;
|
|
65
|
-
if (age > entry.ttl * 1000) {
|
|
66
|
-
// Entry expired, delete it
|
|
67
|
-
await this.vectorStore.delete([bestMatch.id]);
|
|
68
|
-
return null;
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
return entry.response;
|
|
72
|
-
}
|
|
73
|
-
/**
|
|
74
|
-
* Set a cache entry
|
|
75
|
-
*/
|
|
76
|
-
async set(prompt, response, metadata) {
|
|
77
|
-
if (!this.initialized) {
|
|
78
|
-
await this.initialize();
|
|
79
|
-
}
|
|
80
|
-
if (!this.vectorStore) {
|
|
81
|
-
throw new Error('Vector store not initialized');
|
|
82
|
-
}
|
|
83
|
-
// Create embedding for the prompt
|
|
84
|
-
const embedding = await (0, vector_db_1.createEmbedding)(prompt);
|
|
85
|
-
// Create cache entry
|
|
86
|
-
const entry = {
|
|
87
|
-
prompt,
|
|
88
|
-
response,
|
|
89
|
-
metadata,
|
|
90
|
-
timestamp: Date.now(),
|
|
91
|
-
ttl: this.config.ttl
|
|
92
|
-
};
|
|
93
|
-
// Store in vector database
|
|
94
|
-
const id = `cache-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
|
|
95
|
-
await this.vectorStore.upsert([
|
|
96
|
-
{
|
|
97
|
-
id,
|
|
98
|
-
values: embedding,
|
|
99
|
-
metadata: entry
|
|
100
|
-
}
|
|
101
|
-
]);
|
|
102
|
-
}
|
|
103
|
-
/**
|
|
104
|
-
* Clear all cache entries
|
|
105
|
-
*/
|
|
106
|
-
async clear() {
|
|
107
|
-
if (!this.initialized) {
|
|
108
|
-
await this.initialize();
|
|
109
|
-
}
|
|
110
|
-
if (!this.vectorStore) {
|
|
111
|
-
throw new Error('Vector store not initialized');
|
|
112
|
-
}
|
|
113
|
-
await this.vectorStore.deleteNamespace('default');
|
|
114
|
-
}
|
|
115
|
-
/**
|
|
116
|
-
* Clean up expired entries
|
|
117
|
-
*/
|
|
118
|
-
async cleanup() {
|
|
119
|
-
if (!this.initialized) {
|
|
120
|
-
await this.initialize();
|
|
121
|
-
}
|
|
122
|
-
if (!this.vectorStore) {
|
|
123
|
-
throw new Error('Vector store not initialized');
|
|
124
|
-
}
|
|
125
|
-
// This is a simplified cleanup - in production, you'd want to
|
|
126
|
-
// query all vectors and check their TTL
|
|
127
|
-
// For now, we'll return 0 as a placeholder
|
|
128
|
-
return 0;
|
|
129
|
-
}
|
|
130
|
-
/**
|
|
131
|
-
* Close the cache connection
|
|
132
|
-
*/
|
|
133
|
-
async close() {
|
|
134
|
-
if (this.vectorStore) {
|
|
135
|
-
await this.vectorStore.disconnect();
|
|
136
|
-
}
|
|
137
|
-
this.initialized = false;
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
exports.SemanticCache = SemanticCache;
|
|
141
|
-
/**
|
|
142
|
-
* Create a semantic cache instance
|
|
143
|
-
*/
|
|
144
|
-
function createSemanticCache(config) {
|
|
145
|
-
return new SemanticCache(config);
|
|
146
|
-
}
|
|
1
|
+
"use strict";var c=Object.defineProperty;var d=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var f=Object.prototype.hasOwnProperty;var u=(e,t)=>{for(var r in t)c(e,r,{get:t[r],enumerable:!0})},v=(e,t,r,o)=>{if(t&&typeof t=="object"||typeof t=="function")for(let i of m(t))!f.call(e,i)&&i!==r&&c(e,i,{get:()=>t[i],enumerable:!(o=d(t,i))||o.enumerable});return e};var g=e=>v(c({},"__esModule",{value:!0}),e);var p={};u(p,{SemanticCache:()=>s,createSemanticCache:()=>w});module.exports=g(p);var n=require("@neural-tools/vector-db"),h=require("@neural-tools/core"),s=class{vectorStore=null;config;initialized=!1;constructor(t={}){this.config={similarityThreshold:t.similarityThreshold||.95,ttl:t.ttl||3600,provider:t.provider||"local",vectorDBConfig:t.vectorDBConfig||{}}}async initialize(){this.initialized||(this.config.provider!=="local"&&await(0,h.requireFeature)("semantic-cache","Semantic Cache"),this.vectorStore=await(0,n.createVectorStore)({provider:this.config.provider,...this.config.vectorDBConfig}),await this.vectorStore.connect(),this.initialized=!0)}async get(t){if(this.initialized||await this.initialize(),!this.vectorStore)throw new Error("Vector store not initialized");let r=await(0,n.createEmbedding)(t),o=await this.vectorStore.query(r,{topK:1,includeMetadata:!0});if(o.length===0)return null;let i=o[0];if(i.score<this.config.similarityThreshold)return null;let a=i.metadata;return a.ttl&&a.ttl>0&&Date.now()-a.timestamp>a.ttl*1e3?(await this.vectorStore.delete([i.id]),null):a.response}async set(t,r,o){if(this.initialized||await this.initialize(),!this.vectorStore)throw new Error("Vector store not initialized");let i=await(0,n.createEmbedding)(t),a={prompt:t,response:r,metadata:o,timestamp:Date.now(),ttl:this.config.ttl},l=`cache-${Date.now()}-${Math.random().toString(36).substr(2,9)}`;await this.vectorStore.upsert([{id:l,values:i,metadata:a}])}async clear(){if(this.initialized||await this.initialize(),!this.vectorStore)throw new Error("Vector store not initialized");await this.vectorStore.deleteNamespace("default")}async cleanup(){if(this.initialized||await this.initialize(),!this.vectorStore)throw new Error("Vector store not initialized");return 0}async close(){this.vectorStore&&await this.vectorStore.disconnect(),this.initialized=!1}};function w(e){return new s(e)}0&&(module.exports={SemanticCache,createSemanticCache});
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{createVectorStore as l,createEmbedding as c}from"@neural-tools/vector-db";import{requireFeature as h}from"@neural-tools/core";var a=class{vectorStore=null;config;initialized=!1;constructor(t={}){this.config={similarityThreshold:t.similarityThreshold||.95,ttl:t.ttl||3600,provider:t.provider||"local",vectorDBConfig:t.vectorDBConfig||{}}}async initialize(){this.initialized||(this.config.provider!=="local"&&await h("semantic-cache","Semantic Cache"),this.vectorStore=await l({provider:this.config.provider,...this.config.vectorDBConfig}),await this.vectorStore.connect(),this.initialized=!0)}async get(t){if(this.initialized||await this.initialize(),!this.vectorStore)throw new Error("Vector store not initialized");let o=await c(t),r=await this.vectorStore.query(o,{topK:1,includeMetadata:!0});if(r.length===0)return null;let e=r[0];if(e.score<this.config.similarityThreshold)return null;let i=e.metadata;return i.ttl&&i.ttl>0&&Date.now()-i.timestamp>i.ttl*1e3?(await this.vectorStore.delete([e.id]),null):i.response}async set(t,o,r){if(this.initialized||await this.initialize(),!this.vectorStore)throw new Error("Vector store not initialized");let e=await c(t),i={prompt:t,response:o,metadata:r,timestamp:Date.now(),ttl:this.config.ttl},s=`cache-${Date.now()}-${Math.random().toString(36).substr(2,9)}`;await this.vectorStore.upsert([{id:s,values:e,metadata:i}])}async clear(){if(this.initialized||await this.initialize(),!this.vectorStore)throw new Error("Vector store not initialized");await this.vectorStore.deleteNamespace("default")}async cleanup(){if(this.initialized||await this.initialize(),!this.vectorStore)throw new Error("Vector store not initialized");return 0}async close(){this.vectorStore&&await this.vectorStore.disconnect(),this.initialized=!1}};function u(n){return new a(n)}export{a as SemanticCache,u as createSemanticCache};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@neural-tools/semantic-cache",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6",
|
|
4
4
|
"description": "Semantic caching for LLM responses",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
"url": "https://github.com/MacLeanLuke/neural-tools.git",
|
|
14
14
|
"directory": "packages/semantic-cache"
|
|
15
15
|
},
|
|
16
|
-
"homepage": "https://neural-tools.com",
|
|
16
|
+
"homepage": "https://neural-tools.com/docs/semantic-cache.html",
|
|
17
17
|
"bugs": {
|
|
18
18
|
"url": "https://github.com/MacLeanLuke/neural-tools/issues"
|
|
19
19
|
},
|
|
@@ -27,8 +27,8 @@
|
|
|
27
27
|
"vector-search"
|
|
28
28
|
],
|
|
29
29
|
"dependencies": {
|
|
30
|
-
"@neural-tools/core": "0.1.
|
|
31
|
-
"@neural-tools/vector-db": "0.1.
|
|
30
|
+
"@neural-tools/core": "0.1.6",
|
|
31
|
+
"@neural-tools/vector-db": "0.1.6"
|
|
32
32
|
},
|
|
33
33
|
"devDependencies": {
|
|
34
34
|
"@types/node": "^20.11.5",
|
|
@@ -38,8 +38,8 @@
|
|
|
38
38
|
"dist"
|
|
39
39
|
],
|
|
40
40
|
"scripts": {
|
|
41
|
-
"build": "
|
|
42
|
-
"dev": "
|
|
41
|
+
"build": "tsup",
|
|
42
|
+
"dev": "tsup --watch",
|
|
43
43
|
"clean": "rm -rf dist",
|
|
44
44
|
"test": "echo 'Tests coming soon'"
|
|
45
45
|
}
|