@nano-llm-cache/core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +445 -0
- package/dist/index.d.mts +152 -0
- package/dist/index.d.ts +152 -0
- package/dist/index.js +452 -0
- package/dist/index.mjs +423 -0
- package/package.json +68 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
// src/storage.ts
|
|
2
|
+
import { get, set, del, keys } from "idb-keyval";
|
|
3
|
+
var CacheStorage = class {
|
|
4
|
+
constructor(prefix = "nano-llm-cache") {
|
|
5
|
+
this.prefix = prefix;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Generate storage key
|
|
9
|
+
*/
|
|
10
|
+
getKey(id) {
|
|
11
|
+
return `${this.prefix}:${id}`;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Save a cache entry
|
|
15
|
+
*/
|
|
16
|
+
async save(id, entry) {
|
|
17
|
+
await set(this.getKey(id), entry);
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Get a cache entry by ID
|
|
21
|
+
*/
|
|
22
|
+
async get(id) {
|
|
23
|
+
return await get(this.getKey(id));
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Get all cache entries
|
|
27
|
+
*/
|
|
28
|
+
async getAll() {
|
|
29
|
+
const allKeys = await keys();
|
|
30
|
+
const cacheKeys = allKeys.filter(
|
|
31
|
+
(key) => typeof key === "string" && key.startsWith(this.prefix)
|
|
32
|
+
);
|
|
33
|
+
const entries = [];
|
|
34
|
+
for (const key of cacheKeys) {
|
|
35
|
+
const entry = await get(key);
|
|
36
|
+
if (entry) {
|
|
37
|
+
entries.push(entry);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return entries;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Delete a cache entry
|
|
44
|
+
*/
|
|
45
|
+
async delete(id) {
|
|
46
|
+
await del(this.getKey(id));
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Clear all cache entries
|
|
50
|
+
*/
|
|
51
|
+
async clear() {
|
|
52
|
+
const allKeys = await keys();
|
|
53
|
+
const cacheKeys = allKeys.filter(
|
|
54
|
+
(key) => typeof key === "string" && key.startsWith(this.prefix)
|
|
55
|
+
);
|
|
56
|
+
for (const key of cacheKeys) {
|
|
57
|
+
await del(key);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Remove expired entries based on maxAge
|
|
62
|
+
*/
|
|
63
|
+
async removeExpired(maxAge) {
|
|
64
|
+
const now = Date.now();
|
|
65
|
+
const entries = await this.getAll();
|
|
66
|
+
let removedCount = 0;
|
|
67
|
+
for (const entry of entries) {
|
|
68
|
+
if (now - entry.timestamp > maxAge) {
|
|
69
|
+
const id = this.hashPrompt(entry.prompt);
|
|
70
|
+
await this.delete(id);
|
|
71
|
+
removedCount++;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return removedCount;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Simple hash function for prompt
|
|
78
|
+
*/
|
|
79
|
+
hashPrompt(prompt) {
|
|
80
|
+
let hash = 0;
|
|
81
|
+
for (let i = 0; i < prompt.length; i++) {
|
|
82
|
+
const char = prompt.charCodeAt(i);
|
|
83
|
+
hash = (hash << 5) - hash + char;
|
|
84
|
+
hash = hash & hash;
|
|
85
|
+
}
|
|
86
|
+
return Math.abs(hash).toString(36);
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Get cache statistics
|
|
90
|
+
*/
|
|
91
|
+
async getStats() {
|
|
92
|
+
const entries = await this.getAll();
|
|
93
|
+
if (entries.length === 0) {
|
|
94
|
+
return {
|
|
95
|
+
totalEntries: 0,
|
|
96
|
+
oldestEntry: null,
|
|
97
|
+
newestEntry: null
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
const timestamps = entries.map((e) => e.timestamp);
|
|
101
|
+
return {
|
|
102
|
+
totalEntries: entries.length,
|
|
103
|
+
oldestEntry: Math.min(...timestamps),
|
|
104
|
+
newestEntry: Math.max(...timestamps)
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
// src/embeddings.ts
|
|
110
|
+
import { pipeline, env } from "@xenova/transformers";
|
|
111
|
+
|
|
112
|
+
// src/similarity.ts
|
|
113
|
+
function calculateSimilarity(vecA, vecB) {
|
|
114
|
+
if (vecA.length !== vecB.length) {
|
|
115
|
+
throw new Error("Vectors must have the same length");
|
|
116
|
+
}
|
|
117
|
+
if (vecA.length === 0) {
|
|
118
|
+
return 0;
|
|
119
|
+
}
|
|
120
|
+
let dotProduct = 0;
|
|
121
|
+
let magnitudeA = 0;
|
|
122
|
+
let magnitudeB = 0;
|
|
123
|
+
for (let i = 0; i < vecA.length; i++) {
|
|
124
|
+
dotProduct += vecA[i] * vecB[i];
|
|
125
|
+
magnitudeA += vecA[i] * vecA[i];
|
|
126
|
+
magnitudeB += vecB[i] * vecB[i];
|
|
127
|
+
}
|
|
128
|
+
magnitudeA = Math.sqrt(magnitudeA);
|
|
129
|
+
magnitudeB = Math.sqrt(magnitudeB);
|
|
130
|
+
if (magnitudeA === 0 || magnitudeB === 0) {
|
|
131
|
+
return 0;
|
|
132
|
+
}
|
|
133
|
+
const similarity = dotProduct / (magnitudeA * magnitudeB);
|
|
134
|
+
return Math.max(0, Math.min(1, similarity));
|
|
135
|
+
}
|
|
136
|
+
function normalizeVector(vec) {
|
|
137
|
+
const magnitude = Math.sqrt(vec.reduce((sum, val) => sum + val * val, 0));
|
|
138
|
+
if (magnitude === 0) {
|
|
139
|
+
return vec;
|
|
140
|
+
}
|
|
141
|
+
return vec.map((val) => val / magnitude);
|
|
142
|
+
}
|
|
143
|
+
function toArray(arrayLike) {
|
|
144
|
+
return Array.from(arrayLike);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// src/embeddings.ts
|
|
148
|
+
env.allowLocalModels = false;
|
|
149
|
+
env.useBrowserCache = true;
|
|
150
|
+
var EmbeddingGenerator = class {
|
|
151
|
+
constructor(modelName = "Xenova/all-MiniLM-L6-v2", debug = false) {
|
|
152
|
+
this.model = null;
|
|
153
|
+
this.loading = null;
|
|
154
|
+
this.modelName = modelName;
|
|
155
|
+
this.debug = debug;
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Initialize the embedding model (lazy loading)
|
|
159
|
+
*/
|
|
160
|
+
async initialize() {
|
|
161
|
+
if (this.model) {
|
|
162
|
+
return;
|
|
163
|
+
}
|
|
164
|
+
if (this.loading) {
|
|
165
|
+
await this.loading;
|
|
166
|
+
return;
|
|
167
|
+
}
|
|
168
|
+
this.loading = (async () => {
|
|
169
|
+
try {
|
|
170
|
+
if (this.debug) {
|
|
171
|
+
console.log(`[NanoCache] Loading embedding model: ${this.modelName}`);
|
|
172
|
+
}
|
|
173
|
+
this.model = await pipeline("feature-extraction", this.modelName);
|
|
174
|
+
if (this.debug) {
|
|
175
|
+
console.log("[NanoCache] Embedding model loaded successfully");
|
|
176
|
+
}
|
|
177
|
+
} catch (error) {
|
|
178
|
+
this.loading = null;
|
|
179
|
+
throw new Error(`Failed to load embedding model: ${error}`);
|
|
180
|
+
}
|
|
181
|
+
})();
|
|
182
|
+
await this.loading;
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Generate embedding for a text prompt
|
|
186
|
+
*/
|
|
187
|
+
async generate(text) {
|
|
188
|
+
await this.initialize();
|
|
189
|
+
if (!this.model) {
|
|
190
|
+
throw new Error("Embedding model not initialized");
|
|
191
|
+
}
|
|
192
|
+
try {
|
|
193
|
+
const output = await this.model(text, {
|
|
194
|
+
pooling: "mean",
|
|
195
|
+
normalize: true
|
|
196
|
+
});
|
|
197
|
+
let embedding;
|
|
198
|
+
if (output.data) {
|
|
199
|
+
embedding = toArray(output.data);
|
|
200
|
+
} else if (Array.isArray(output)) {
|
|
201
|
+
embedding = output;
|
|
202
|
+
} else {
|
|
203
|
+
throw new Error("Unexpected embedding output format");
|
|
204
|
+
}
|
|
205
|
+
if (this.debug) {
|
|
206
|
+
console.log(`[NanoCache] Generated embedding of length ${embedding.length}`);
|
|
207
|
+
}
|
|
208
|
+
return embedding;
|
|
209
|
+
} catch (error) {
|
|
210
|
+
throw new Error(`Failed to generate embedding: ${error}`);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Generate embeddings for multiple texts in batch
|
|
215
|
+
*/
|
|
216
|
+
async generateBatch(texts) {
|
|
217
|
+
await this.initialize();
|
|
218
|
+
if (!this.model) {
|
|
219
|
+
throw new Error("Embedding model not initialized");
|
|
220
|
+
}
|
|
221
|
+
const embeddings = [];
|
|
222
|
+
for (const text of texts) {
|
|
223
|
+
const embedding = await this.generate(text);
|
|
224
|
+
embeddings.push(embedding);
|
|
225
|
+
}
|
|
226
|
+
return embeddings;
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Check if model is loaded
|
|
230
|
+
*/
|
|
231
|
+
isLoaded() {
|
|
232
|
+
return this.model !== null;
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Unload the model to free memory
|
|
236
|
+
*/
|
|
237
|
+
async unload() {
|
|
238
|
+
this.model = null;
|
|
239
|
+
this.loading = null;
|
|
240
|
+
if (this.debug) {
|
|
241
|
+
console.log("[NanoCache] Embedding model unloaded");
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
};
|
|
245
|
+
|
|
246
|
+
// src/cache.ts
|
|
247
|
+
var NanoCache = class {
|
|
248
|
+
constructor(config = {}) {
|
|
249
|
+
this.config = {
|
|
250
|
+
similarityThreshold: config.similarityThreshold ?? 0.95,
|
|
251
|
+
maxAge: config.maxAge ?? 0,
|
|
252
|
+
modelName: config.modelName ?? "Xenova/all-MiniLM-L6-v2",
|
|
253
|
+
debug: config.debug ?? false,
|
|
254
|
+
storagePrefix: config.storagePrefix ?? "nano-llm-cache"
|
|
255
|
+
};
|
|
256
|
+
this.storage = new CacheStorage(this.config.storagePrefix);
|
|
257
|
+
this.embeddings = new EmbeddingGenerator(this.config.modelName, this.config.debug);
|
|
258
|
+
}
|
|
259
|
+
/**
|
|
260
|
+
* Query the cache for a similar prompt
|
|
261
|
+
*/
|
|
262
|
+
async query(prompt) {
|
|
263
|
+
try {
|
|
264
|
+
if (this.config.maxAge > 0) {
|
|
265
|
+
await this.storage.removeExpired(this.config.maxAge);
|
|
266
|
+
}
|
|
267
|
+
const queryEmbedding = await this.embeddings.generate(prompt);
|
|
268
|
+
const entries = await this.storage.getAll();
|
|
269
|
+
if (entries.length === 0) {
|
|
270
|
+
if (this.config.debug) {
|
|
271
|
+
console.log("[NanoCache] Cache is empty");
|
|
272
|
+
}
|
|
273
|
+
return { hit: false };
|
|
274
|
+
}
|
|
275
|
+
let bestMatch = null;
|
|
276
|
+
let bestSimilarity = 0;
|
|
277
|
+
for (const entry of entries) {
|
|
278
|
+
const similarity = calculateSimilarity(queryEmbedding, entry.embedding);
|
|
279
|
+
if (similarity > bestSimilarity) {
|
|
280
|
+
bestSimilarity = similarity;
|
|
281
|
+
bestMatch = entry;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
if (bestMatch && bestSimilarity >= this.config.similarityThreshold) {
|
|
285
|
+
if (this.config.debug) {
|
|
286
|
+
console.log(`[NanoCache] Cache HIT! Similarity: ${bestSimilarity.toFixed(4)}`);
|
|
287
|
+
console.log(`[NanoCache] Original: "${bestMatch.prompt}"`);
|
|
288
|
+
console.log(`[NanoCache] Query: "${prompt}"`);
|
|
289
|
+
}
|
|
290
|
+
return {
|
|
291
|
+
hit: true,
|
|
292
|
+
response: bestMatch.response,
|
|
293
|
+
similarity: bestSimilarity,
|
|
294
|
+
entry: bestMatch
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
if (this.config.debug) {
|
|
298
|
+
console.log(`[NanoCache] Cache MISS. Best similarity: ${bestSimilarity.toFixed(4)}`);
|
|
299
|
+
}
|
|
300
|
+
return { hit: false, similarity: bestSimilarity };
|
|
301
|
+
} catch (error) {
|
|
302
|
+
console.error("[NanoCache] Query error:", error);
|
|
303
|
+
return { hit: false };
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
/**
|
|
307
|
+
* Save a prompt-response pair to the cache
|
|
308
|
+
*/
|
|
309
|
+
async save(prompt, response, metadata) {
|
|
310
|
+
try {
|
|
311
|
+
const embedding = await this.embeddings.generate(prompt);
|
|
312
|
+
const entry = {
|
|
313
|
+
prompt,
|
|
314
|
+
embedding,
|
|
315
|
+
response,
|
|
316
|
+
timestamp: Date.now(),
|
|
317
|
+
metadata
|
|
318
|
+
};
|
|
319
|
+
const id = this.hashPrompt(prompt);
|
|
320
|
+
await this.storage.save(id, entry);
|
|
321
|
+
if (this.config.debug) {
|
|
322
|
+
console.log(`[NanoCache] Saved entry for prompt: "${prompt}"`);
|
|
323
|
+
}
|
|
324
|
+
} catch (error) {
|
|
325
|
+
console.error("[NanoCache] Save error:", error);
|
|
326
|
+
throw error;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
/**
|
|
330
|
+
* Clear all cached entries
|
|
331
|
+
*/
|
|
332
|
+
async clear() {
|
|
333
|
+
await this.storage.clear();
|
|
334
|
+
if (this.config.debug) {
|
|
335
|
+
console.log("[NanoCache] Cache cleared");
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
/**
|
|
339
|
+
* Get cache statistics
|
|
340
|
+
*/
|
|
341
|
+
async getStats() {
|
|
342
|
+
return await this.storage.getStats();
|
|
343
|
+
}
|
|
344
|
+
/**
|
|
345
|
+
* Check if embedding model is loaded
|
|
346
|
+
*/
|
|
347
|
+
isModelLoaded() {
|
|
348
|
+
return this.embeddings.isLoaded();
|
|
349
|
+
}
|
|
350
|
+
/**
|
|
351
|
+
* Preload the embedding model
|
|
352
|
+
*/
|
|
353
|
+
async preloadModel() {
|
|
354
|
+
await this.embeddings.generate("warmup");
|
|
355
|
+
if (this.config.debug) {
|
|
356
|
+
console.log("[NanoCache] Model preloaded");
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
/**
|
|
360
|
+
* Unload the embedding model to free memory
|
|
361
|
+
*/
|
|
362
|
+
async unloadModel() {
|
|
363
|
+
await this.embeddings.unload();
|
|
364
|
+
}
|
|
365
|
+
/**
|
|
366
|
+
* Simple hash function for prompt
|
|
367
|
+
*/
|
|
368
|
+
hashPrompt(prompt) {
|
|
369
|
+
let hash = 0;
|
|
370
|
+
for (let i = 0; i < prompt.length; i++) {
|
|
371
|
+
const char = prompt.charCodeAt(i);
|
|
372
|
+
hash = (hash << 5) - hash + char;
|
|
373
|
+
hash = hash & hash;
|
|
374
|
+
}
|
|
375
|
+
return Math.abs(hash).toString(36);
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Create a wrapper for OpenAI-compatible chat completion
|
|
379
|
+
* This allows drop-in replacement of openai.chat.completions.create
|
|
380
|
+
*/
|
|
381
|
+
createChatWrapper(originalFn) {
|
|
382
|
+
const self = this;
|
|
383
|
+
return (async function wrappedCreate(request) {
|
|
384
|
+
const userMessage = request.messages.filter((m) => m.role === "user").map((m) => m.content).join("\n");
|
|
385
|
+
if (!userMessage) {
|
|
386
|
+
return await originalFn(request);
|
|
387
|
+
}
|
|
388
|
+
const cacheResult = await self.query(userMessage);
|
|
389
|
+
if (cacheResult.hit && cacheResult.response) {
|
|
390
|
+
return {
|
|
391
|
+
id: `nano-cache-${Date.now()}`,
|
|
392
|
+
object: "chat.completion",
|
|
393
|
+
created: Math.floor(Date.now() / 1e3),
|
|
394
|
+
model: request.model,
|
|
395
|
+
choices: [
|
|
396
|
+
{
|
|
397
|
+
index: 0,
|
|
398
|
+
message: {
|
|
399
|
+
role: "assistant",
|
|
400
|
+
content: cacheResult.response
|
|
401
|
+
},
|
|
402
|
+
finish_reason: "stop"
|
|
403
|
+
}
|
|
404
|
+
]
|
|
405
|
+
};
|
|
406
|
+
}
|
|
407
|
+
const response = await originalFn(request);
|
|
408
|
+
const assistantMessage = response.choices[0]?.message?.content;
|
|
409
|
+
if (assistantMessage) {
|
|
410
|
+
await self.save(userMessage, assistantMessage, {
|
|
411
|
+
model: request.model,
|
|
412
|
+
timestamp: response.created
|
|
413
|
+
});
|
|
414
|
+
}
|
|
415
|
+
return response;
|
|
416
|
+
});
|
|
417
|
+
}
|
|
418
|
+
};
|
|
419
|
+
export {
|
|
420
|
+
NanoCache,
|
|
421
|
+
calculateSimilarity,
|
|
422
|
+
normalizeVector
|
|
423
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@nano-llm-cache/core",
|
|
3
|
+
"publishConfig": {
|
|
4
|
+
"access": "public"
|
|
5
|
+
},
|
|
6
|
+
"version": "1.0.0",
|
|
7
|
+
"description": "A semantic cache for LLM API calls using local embeddings and vector similarity",
|
|
8
|
+
"main": "./dist/index.js",
|
|
9
|
+
"module": "./dist/index.mjs",
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"exports": {
|
|
12
|
+
".": {
|
|
13
|
+
"require": "./dist/index.js",
|
|
14
|
+
"import": "./dist/index.mjs",
|
|
15
|
+
"types": "./dist/index.d.ts"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"files": [
|
|
19
|
+
"dist"
|
|
20
|
+
],
|
|
21
|
+
"scripts": {
|
|
22
|
+
"build": "tsup src/index.ts --format cjs,esm --dts --clean",
|
|
23
|
+
"dev": "tsup src/index.ts --format cjs,esm --dts --watch",
|
|
24
|
+
"test": "vitest",
|
|
25
|
+
"test:ui": "vitest --ui",
|
|
26
|
+
"test:coverage": "vitest --coverage",
|
|
27
|
+
"lint": "eslint src --ext .ts",
|
|
28
|
+
"prepublishOnly": "npm run build"
|
|
29
|
+
},
|
|
30
|
+
"keywords": [
|
|
31
|
+
"llm",
|
|
32
|
+
"cache",
|
|
33
|
+
"semantic",
|
|
34
|
+
"vector",
|
|
35
|
+
"embeddings",
|
|
36
|
+
"openai",
|
|
37
|
+
"ai",
|
|
38
|
+
"machine-learning",
|
|
39
|
+
"transformers"
|
|
40
|
+
],
|
|
41
|
+
"author": "",
|
|
42
|
+
"license": "MIT",
|
|
43
|
+
"devDependencies": {
|
|
44
|
+
"@types/node": "^20.11.5",
|
|
45
|
+
"@typescript-eslint/eslint-plugin": "^6.19.0",
|
|
46
|
+
"@typescript-eslint/parser": "^6.19.0",
|
|
47
|
+
"@vitest/ui": "^1.2.1",
|
|
48
|
+
"eslint": "^8.56.0",
|
|
49
|
+
"tsup": "^8.0.1",
|
|
50
|
+
"typescript": "^5.3.3",
|
|
51
|
+
"vitest": "^1.2.1"
|
|
52
|
+
},
|
|
53
|
+
"dependencies": {
|
|
54
|
+
"@xenova/transformers": "^2.17.1",
|
|
55
|
+
"idb-keyval": "^6.2.1"
|
|
56
|
+
},
|
|
57
|
+
"peerDependencies": {
|
|
58
|
+
"openai": "^4.0.0"
|
|
59
|
+
},
|
|
60
|
+
"peerDependenciesMeta": {
|
|
61
|
+
"openai": {
|
|
62
|
+
"optional": true
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
"engines": {
|
|
66
|
+
"node": ">=18.0.0"
|
|
67
|
+
}
|
|
68
|
+
}
|