@nano-llm-cache/core 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs ADDED
@@ -0,0 +1,423 @@
1
+ // src/storage.ts
2
+ import { get, set, del, keys } from "idb-keyval";
3
+ var CacheStorage = class {
4
+ constructor(prefix = "nano-llm-cache") {
5
+ this.prefix = prefix;
6
+ }
7
+ /**
8
+ * Generate storage key
9
+ */
10
+ getKey(id) {
11
+ return `${this.prefix}:${id}`;
12
+ }
13
+ /**
14
+ * Save a cache entry
15
+ */
16
+ async save(id, entry) {
17
+ await set(this.getKey(id), entry);
18
+ }
19
+ /**
20
+ * Get a cache entry by ID
21
+ */
22
+ async get(id) {
23
+ return await get(this.getKey(id));
24
+ }
25
+ /**
26
+ * Get all cache entries
27
+ */
28
+ async getAll() {
29
+ const allKeys = await keys();
30
+ const cacheKeys = allKeys.filter(
31
+ (key) => typeof key === "string" && key.startsWith(this.prefix)
32
+ );
33
+ const entries = [];
34
+ for (const key of cacheKeys) {
35
+ const entry = await get(key);
36
+ if (entry) {
37
+ entries.push(entry);
38
+ }
39
+ }
40
+ return entries;
41
+ }
42
+ /**
43
+ * Delete a cache entry
44
+ */
45
+ async delete(id) {
46
+ await del(this.getKey(id));
47
+ }
48
+ /**
49
+ * Clear all cache entries
50
+ */
51
+ async clear() {
52
+ const allKeys = await keys();
53
+ const cacheKeys = allKeys.filter(
54
+ (key) => typeof key === "string" && key.startsWith(this.prefix)
55
+ );
56
+ for (const key of cacheKeys) {
57
+ await del(key);
58
+ }
59
+ }
60
+ /**
61
+ * Remove expired entries based on maxAge
62
+ */
63
+ async removeExpired(maxAge) {
64
+ const now = Date.now();
65
+ const entries = await this.getAll();
66
+ let removedCount = 0;
67
+ for (const entry of entries) {
68
+ if (now - entry.timestamp > maxAge) {
69
+ const id = this.hashPrompt(entry.prompt);
70
+ await this.delete(id);
71
+ removedCount++;
72
+ }
73
+ }
74
+ return removedCount;
75
+ }
76
+ /**
77
+ * Simple hash function for prompt
78
+ */
79
+ hashPrompt(prompt) {
80
+ let hash = 0;
81
+ for (let i = 0; i < prompt.length; i++) {
82
+ const char = prompt.charCodeAt(i);
83
+ hash = (hash << 5) - hash + char;
84
+ hash = hash & hash;
85
+ }
86
+ return Math.abs(hash).toString(36);
87
+ }
88
+ /**
89
+ * Get cache statistics
90
+ */
91
+ async getStats() {
92
+ const entries = await this.getAll();
93
+ if (entries.length === 0) {
94
+ return {
95
+ totalEntries: 0,
96
+ oldestEntry: null,
97
+ newestEntry: null
98
+ };
99
+ }
100
+ const timestamps = entries.map((e) => e.timestamp);
101
+ return {
102
+ totalEntries: entries.length,
103
+ oldestEntry: Math.min(...timestamps),
104
+ newestEntry: Math.max(...timestamps)
105
+ };
106
+ }
107
+ };
108
+
109
+ // src/embeddings.ts
110
+ import { pipeline, env } from "@xenova/transformers";
111
+
112
+ // src/similarity.ts
113
+ function calculateSimilarity(vecA, vecB) {
114
+ if (vecA.length !== vecB.length) {
115
+ throw new Error("Vectors must have the same length");
116
+ }
117
+ if (vecA.length === 0) {
118
+ return 0;
119
+ }
120
+ let dotProduct = 0;
121
+ let magnitudeA = 0;
122
+ let magnitudeB = 0;
123
+ for (let i = 0; i < vecA.length; i++) {
124
+ dotProduct += vecA[i] * vecB[i];
125
+ magnitudeA += vecA[i] * vecA[i];
126
+ magnitudeB += vecB[i] * vecB[i];
127
+ }
128
+ magnitudeA = Math.sqrt(magnitudeA);
129
+ magnitudeB = Math.sqrt(magnitudeB);
130
+ if (magnitudeA === 0 || magnitudeB === 0) {
131
+ return 0;
132
+ }
133
+ const similarity = dotProduct / (magnitudeA * magnitudeB);
134
+ return Math.max(0, Math.min(1, similarity));
135
+ }
136
+ function normalizeVector(vec) {
137
+ const magnitude = Math.sqrt(vec.reduce((sum, val) => sum + val * val, 0));
138
+ if (magnitude === 0) {
139
+ return vec;
140
+ }
141
+ return vec.map((val) => val / magnitude);
142
+ }
143
+ function toArray(arrayLike) {
144
+ return Array.from(arrayLike);
145
+ }
146
+
147
+ // src/embeddings.ts
148
+ env.allowLocalModels = false;
149
+ env.useBrowserCache = true;
150
+ var EmbeddingGenerator = class {
151
+ constructor(modelName = "Xenova/all-MiniLM-L6-v2", debug = false) {
152
+ this.model = null;
153
+ this.loading = null;
154
+ this.modelName = modelName;
155
+ this.debug = debug;
156
+ }
157
+ /**
158
+ * Initialize the embedding model (lazy loading)
159
+ */
160
+ async initialize() {
161
+ if (this.model) {
162
+ return;
163
+ }
164
+ if (this.loading) {
165
+ await this.loading;
166
+ return;
167
+ }
168
+ this.loading = (async () => {
169
+ try {
170
+ if (this.debug) {
171
+ console.log(`[NanoCache] Loading embedding model: ${this.modelName}`);
172
+ }
173
+ this.model = await pipeline("feature-extraction", this.modelName);
174
+ if (this.debug) {
175
+ console.log("[NanoCache] Embedding model loaded successfully");
176
+ }
177
+ } catch (error) {
178
+ this.loading = null;
179
+ throw new Error(`Failed to load embedding model: ${error}`);
180
+ }
181
+ })();
182
+ await this.loading;
183
+ }
184
+ /**
185
+ * Generate embedding for a text prompt
186
+ */
187
+ async generate(text) {
188
+ await this.initialize();
189
+ if (!this.model) {
190
+ throw new Error("Embedding model not initialized");
191
+ }
192
+ try {
193
+ const output = await this.model(text, {
194
+ pooling: "mean",
195
+ normalize: true
196
+ });
197
+ let embedding;
198
+ if (output.data) {
199
+ embedding = toArray(output.data);
200
+ } else if (Array.isArray(output)) {
201
+ embedding = output;
202
+ } else {
203
+ throw new Error("Unexpected embedding output format");
204
+ }
205
+ if (this.debug) {
206
+ console.log(`[NanoCache] Generated embedding of length ${embedding.length}`);
207
+ }
208
+ return embedding;
209
+ } catch (error) {
210
+ throw new Error(`Failed to generate embedding: ${error}`);
211
+ }
212
+ }
213
+ /**
214
+ * Generate embeddings for multiple texts in batch
215
+ */
216
+ async generateBatch(texts) {
217
+ await this.initialize();
218
+ if (!this.model) {
219
+ throw new Error("Embedding model not initialized");
220
+ }
221
+ const embeddings = [];
222
+ for (const text of texts) {
223
+ const embedding = await this.generate(text);
224
+ embeddings.push(embedding);
225
+ }
226
+ return embeddings;
227
+ }
228
+ /**
229
+ * Check if model is loaded
230
+ */
231
+ isLoaded() {
232
+ return this.model !== null;
233
+ }
234
+ /**
235
+ * Unload the model to free memory
236
+ */
237
+ async unload() {
238
+ this.model = null;
239
+ this.loading = null;
240
+ if (this.debug) {
241
+ console.log("[NanoCache] Embedding model unloaded");
242
+ }
243
+ }
244
+ };
245
+
246
+ // src/cache.ts
247
+ var NanoCache = class {
248
+ constructor(config = {}) {
249
+ this.config = {
250
+ similarityThreshold: config.similarityThreshold ?? 0.95,
251
+ maxAge: config.maxAge ?? 0,
252
+ modelName: config.modelName ?? "Xenova/all-MiniLM-L6-v2",
253
+ debug: config.debug ?? false,
254
+ storagePrefix: config.storagePrefix ?? "nano-llm-cache"
255
+ };
256
+ this.storage = new CacheStorage(this.config.storagePrefix);
257
+ this.embeddings = new EmbeddingGenerator(this.config.modelName, this.config.debug);
258
+ }
259
+ /**
260
+ * Query the cache for a similar prompt
261
+ */
262
+ async query(prompt) {
263
+ try {
264
+ if (this.config.maxAge > 0) {
265
+ await this.storage.removeExpired(this.config.maxAge);
266
+ }
267
+ const queryEmbedding = await this.embeddings.generate(prompt);
268
+ const entries = await this.storage.getAll();
269
+ if (entries.length === 0) {
270
+ if (this.config.debug) {
271
+ console.log("[NanoCache] Cache is empty");
272
+ }
273
+ return { hit: false };
274
+ }
275
+ let bestMatch = null;
276
+ let bestSimilarity = 0;
277
+ for (const entry of entries) {
278
+ const similarity = calculateSimilarity(queryEmbedding, entry.embedding);
279
+ if (similarity > bestSimilarity) {
280
+ bestSimilarity = similarity;
281
+ bestMatch = entry;
282
+ }
283
+ }
284
+ if (bestMatch && bestSimilarity >= this.config.similarityThreshold) {
285
+ if (this.config.debug) {
286
+ console.log(`[NanoCache] Cache HIT! Similarity: ${bestSimilarity.toFixed(4)}`);
287
+ console.log(`[NanoCache] Original: "${bestMatch.prompt}"`);
288
+ console.log(`[NanoCache] Query: "${prompt}"`);
289
+ }
290
+ return {
291
+ hit: true,
292
+ response: bestMatch.response,
293
+ similarity: bestSimilarity,
294
+ entry: bestMatch
295
+ };
296
+ }
297
+ if (this.config.debug) {
298
+ console.log(`[NanoCache] Cache MISS. Best similarity: ${bestSimilarity.toFixed(4)}`);
299
+ }
300
+ return { hit: false, similarity: bestSimilarity };
301
+ } catch (error) {
302
+ console.error("[NanoCache] Query error:", error);
303
+ return { hit: false };
304
+ }
305
+ }
306
+ /**
307
+ * Save a prompt-response pair to the cache
308
+ */
309
+ async save(prompt, response, metadata) {
310
+ try {
311
+ const embedding = await this.embeddings.generate(prompt);
312
+ const entry = {
313
+ prompt,
314
+ embedding,
315
+ response,
316
+ timestamp: Date.now(),
317
+ metadata
318
+ };
319
+ const id = this.hashPrompt(prompt);
320
+ await this.storage.save(id, entry);
321
+ if (this.config.debug) {
322
+ console.log(`[NanoCache] Saved entry for prompt: "${prompt}"`);
323
+ }
324
+ } catch (error) {
325
+ console.error("[NanoCache] Save error:", error);
326
+ throw error;
327
+ }
328
+ }
329
+ /**
330
+ * Clear all cached entries
331
+ */
332
+ async clear() {
333
+ await this.storage.clear();
334
+ if (this.config.debug) {
335
+ console.log("[NanoCache] Cache cleared");
336
+ }
337
+ }
338
+ /**
339
+ * Get cache statistics
340
+ */
341
+ async getStats() {
342
+ return await this.storage.getStats();
343
+ }
344
+ /**
345
+ * Check if embedding model is loaded
346
+ */
347
+ isModelLoaded() {
348
+ return this.embeddings.isLoaded();
349
+ }
350
+ /**
351
+ * Preload the embedding model
352
+ */
353
+ async preloadModel() {
354
+ await this.embeddings.generate("warmup");
355
+ if (this.config.debug) {
356
+ console.log("[NanoCache] Model preloaded");
357
+ }
358
+ }
359
+ /**
360
+ * Unload the embedding model to free memory
361
+ */
362
+ async unloadModel() {
363
+ await this.embeddings.unload();
364
+ }
365
+ /**
366
+ * Simple hash function for prompt
367
+ */
368
+ hashPrompt(prompt) {
369
+ let hash = 0;
370
+ for (let i = 0; i < prompt.length; i++) {
371
+ const char = prompt.charCodeAt(i);
372
+ hash = (hash << 5) - hash + char;
373
+ hash = hash & hash;
374
+ }
375
+ return Math.abs(hash).toString(36);
376
+ }
377
+ /**
378
+ * Create a wrapper for OpenAI-compatible chat completion
379
+ * This allows drop-in replacement of openai.chat.completions.create
380
+ */
381
+ createChatWrapper(originalFn) {
382
+ const self = this;
383
+ return (async function wrappedCreate(request) {
384
+ const userMessage = request.messages.filter((m) => m.role === "user").map((m) => m.content).join("\n");
385
+ if (!userMessage) {
386
+ return await originalFn(request);
387
+ }
388
+ const cacheResult = await self.query(userMessage);
389
+ if (cacheResult.hit && cacheResult.response) {
390
+ return {
391
+ id: `nano-cache-${Date.now()}`,
392
+ object: "chat.completion",
393
+ created: Math.floor(Date.now() / 1e3),
394
+ model: request.model,
395
+ choices: [
396
+ {
397
+ index: 0,
398
+ message: {
399
+ role: "assistant",
400
+ content: cacheResult.response
401
+ },
402
+ finish_reason: "stop"
403
+ }
404
+ ]
405
+ };
406
+ }
407
+ const response = await originalFn(request);
408
+ const assistantMessage = response.choices[0]?.message?.content;
409
+ if (assistantMessage) {
410
+ await self.save(userMessage, assistantMessage, {
411
+ model: request.model,
412
+ timestamp: response.created
413
+ });
414
+ }
415
+ return response;
416
+ });
417
+ }
418
+ };
419
+ export {
420
+ NanoCache,
421
+ calculateSimilarity,
422
+ normalizeVector
423
+ };
package/package.json ADDED
@@ -0,0 +1,68 @@
1
+ {
2
+ "name": "@nano-llm-cache/core",
3
+ "publishConfig": {
4
+ "access": "public"
5
+ },
6
+ "version": "1.0.0",
7
+ "description": "A semantic cache for LLM API calls using local embeddings and vector similarity",
8
+ "main": "./dist/index.js",
9
+ "module": "./dist/index.mjs",
10
+ "types": "./dist/index.d.ts",
11
+ "exports": {
12
+ ".": {
13
+ "require": "./dist/index.js",
14
+ "import": "./dist/index.mjs",
15
+ "types": "./dist/index.d.ts"
16
+ }
17
+ },
18
+ "files": [
19
+ "dist"
20
+ ],
21
+ "scripts": {
22
+ "build": "tsup src/index.ts --format cjs,esm --dts --clean",
23
+ "dev": "tsup src/index.ts --format cjs,esm --dts --watch",
24
+ "test": "vitest",
25
+ "test:ui": "vitest --ui",
26
+ "test:coverage": "vitest --coverage",
27
+ "lint": "eslint src --ext .ts",
28
+ "prepublishOnly": "npm run build"
29
+ },
30
+ "keywords": [
31
+ "llm",
32
+ "cache",
33
+ "semantic",
34
+ "vector",
35
+ "embeddings",
36
+ "openai",
37
+ "ai",
38
+ "machine-learning",
39
+ "transformers"
40
+ ],
41
+ "author": "",
42
+ "license": "MIT",
43
+ "devDependencies": {
44
+ "@types/node": "^20.11.5",
45
+ "@typescript-eslint/eslint-plugin": "^6.19.0",
46
+ "@typescript-eslint/parser": "^6.19.0",
47
+ "@vitest/ui": "^1.2.1",
48
+ "eslint": "^8.56.0",
49
+ "tsup": "^8.0.1",
50
+ "typescript": "^5.3.3",
51
+ "vitest": "^1.2.1"
52
+ },
53
+ "dependencies": {
54
+ "@xenova/transformers": "^2.17.1",
55
+ "idb-keyval": "^6.2.1"
56
+ },
57
+ "peerDependencies": {
58
+ "openai": "^4.0.0"
59
+ },
60
+ "peerDependenciesMeta": {
61
+ "openai": {
62
+ "optional": true
63
+ }
64
+ },
65
+ "engines": {
66
+ "node": ">=18.0.0"
67
+ }
68
+ }