@nano-llm-cache/core 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,152 @@
1
+ /**
2
+ * Configuration options for NanoCache
3
+ */
4
+ interface NanoCacheConfig {
5
+ /**
6
+ * Similarity threshold for cache hits (0-1)
7
+ * @default 0.95
8
+ */
9
+ similarityThreshold?: number;
10
+ /**
11
+ * Maximum age of cached entries in milliseconds
12
+ * @default undefined (no expiration)
13
+ */
14
+ maxAge?: number;
15
+ /**
16
+ * Model name for embeddings
17
+ * @default 'Xenova/all-MiniLM-L6-v2'
18
+ */
19
+ modelName?: string;
20
+ /**
21
+ * Enable debug logging
22
+ * @default false
23
+ */
24
+ debug?: boolean;
25
+ /**
26
+ * Custom storage key prefix
27
+ * @default 'nano-llm-cache'
28
+ */
29
+ storagePrefix?: string;
30
+ }
31
+ /**
32
+ * Cached entry structure
33
+ */
34
+ interface CacheEntry {
35
+ prompt: string;
36
+ embedding: number[];
37
+ response: string;
38
+ timestamp: number;
39
+ metadata?: Record<string, any>;
40
+ }
41
+ /**
42
+ * Cache query result
43
+ */
44
+ interface CacheQueryResult {
45
+ hit: boolean;
46
+ response?: string;
47
+ similarity?: number;
48
+ entry?: CacheEntry;
49
+ }
50
+ /**
51
+ * OpenAI-compatible message structure
52
+ */
53
+ interface ChatMessage {
54
+ role: 'system' | 'user' | 'assistant';
55
+ content: string;
56
+ }
57
+ /**
58
+ * OpenAI-compatible chat completion request
59
+ */
60
+ interface ChatCompletionRequest {
61
+ model: string;
62
+ messages: ChatMessage[];
63
+ temperature?: number;
64
+ max_tokens?: number;
65
+ [key: string]: any;
66
+ }
67
+ /**
68
+ * OpenAI-compatible chat completion response
69
+ */
70
+ interface ChatCompletionResponse {
71
+ id: string;
72
+ object: string;
73
+ created: number;
74
+ model: string;
75
+ choices: Array<{
76
+ index: number;
77
+ message: ChatMessage;
78
+ finish_reason: string;
79
+ }>;
80
+ usage?: {
81
+ prompt_tokens: number;
82
+ completion_tokens: number;
83
+ total_tokens: number;
84
+ };
85
+ }
86
+
87
+ /**
88
+ * NanoCache - Semantic cache for LLM API calls
89
+ */
90
+ declare class NanoCache {
91
+ private storage;
92
+ private embeddings;
93
+ private config;
94
+ constructor(config?: NanoCacheConfig);
95
+ /**
96
+ * Query the cache for a similar prompt
97
+ */
98
+ query(prompt: string): Promise<CacheQueryResult>;
99
+ /**
100
+ * Save a prompt-response pair to the cache
101
+ */
102
+ save(prompt: string, response: string, metadata?: Record<string, any>): Promise<void>;
103
+ /**
104
+ * Clear all cached entries
105
+ */
106
+ clear(): Promise<void>;
107
+ /**
108
+ * Get cache statistics
109
+ */
110
+ getStats(): Promise<{
111
+ totalEntries: number;
112
+ oldestEntry: number | null;
113
+ newestEntry: number | null;
114
+ }>;
115
+ /**
116
+ * Check if embedding model is loaded
117
+ */
118
+ isModelLoaded(): boolean;
119
+ /**
120
+ * Preload the embedding model
121
+ */
122
+ preloadModel(): Promise<void>;
123
+ /**
124
+ * Unload the embedding model to free memory
125
+ */
126
+ unloadModel(): Promise<void>;
127
+ /**
128
+ * Simple hash function for prompt
129
+ */
130
+ private hashPrompt;
131
+ /**
132
+ * Create a wrapper for OpenAI-compatible chat completion
133
+ * This allows drop-in replacement of openai.chat.completions.create
134
+ */
135
+ createChatWrapper<T extends (req: ChatCompletionRequest) => Promise<ChatCompletionResponse>>(originalFn: T): T;
136
+ }
137
+
138
+ /**
139
+ * Calculate cosine similarity between two vectors
140
+ * @param vecA - First vector
141
+ * @param vecB - Second vector
142
+ * @returns Similarity score between 0 and 1
143
+ */
144
+ declare function calculateSimilarity(vecA: number[], vecB: number[]): number;
145
+ /**
146
+ * Normalize a vector to unit length
147
+ * @param vec - Input vector
148
+ * @returns Normalized vector
149
+ */
150
+ declare function normalizeVector(vec: number[]): number[];
151
+
152
+ export { type CacheEntry, type CacheQueryResult, type ChatCompletionRequest, type ChatCompletionResponse, type ChatMessage, NanoCache, type NanoCacheConfig, calculateSimilarity, normalizeVector };
package/dist/index.js ADDED
@@ -0,0 +1,452 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var index_exports = {};
22
+ __export(index_exports, {
23
+ NanoCache: () => NanoCache,
24
+ calculateSimilarity: () => calculateSimilarity,
25
+ normalizeVector: () => normalizeVector
26
+ });
27
+ module.exports = __toCommonJS(index_exports);
28
+
29
+ // src/storage.ts
30
+ var import_idb_keyval = require("idb-keyval");
31
+ var CacheStorage = class {
32
+ constructor(prefix = "nano-llm-cache") {
33
+ this.prefix = prefix;
34
+ }
35
+ /**
36
+ * Generate storage key
37
+ */
38
+ getKey(id) {
39
+ return `${this.prefix}:${id}`;
40
+ }
41
+ /**
42
+ * Save a cache entry
43
+ */
44
+ async save(id, entry) {
45
+ await (0, import_idb_keyval.set)(this.getKey(id), entry);
46
+ }
47
+ /**
48
+ * Get a cache entry by ID
49
+ */
50
+ async get(id) {
51
+ return await (0, import_idb_keyval.get)(this.getKey(id));
52
+ }
53
+ /**
54
+ * Get all cache entries
55
+ */
56
+ async getAll() {
57
+ const allKeys = await (0, import_idb_keyval.keys)();
58
+ const cacheKeys = allKeys.filter(
59
+ (key) => typeof key === "string" && key.startsWith(this.prefix)
60
+ );
61
+ const entries = [];
62
+ for (const key of cacheKeys) {
63
+ const entry = await (0, import_idb_keyval.get)(key);
64
+ if (entry) {
65
+ entries.push(entry);
66
+ }
67
+ }
68
+ return entries;
69
+ }
70
+ /**
71
+ * Delete a cache entry
72
+ */
73
+ async delete(id) {
74
+ await (0, import_idb_keyval.del)(this.getKey(id));
75
+ }
76
+ /**
77
+ * Clear all cache entries
78
+ */
79
+ async clear() {
80
+ const allKeys = await (0, import_idb_keyval.keys)();
81
+ const cacheKeys = allKeys.filter(
82
+ (key) => typeof key === "string" && key.startsWith(this.prefix)
83
+ );
84
+ for (const key of cacheKeys) {
85
+ await (0, import_idb_keyval.del)(key);
86
+ }
87
+ }
88
+ /**
89
+ * Remove expired entries based on maxAge
90
+ */
91
+ async removeExpired(maxAge) {
92
+ const now = Date.now();
93
+ const entries = await this.getAll();
94
+ let removedCount = 0;
95
+ for (const entry of entries) {
96
+ if (now - entry.timestamp > maxAge) {
97
+ const id = this.hashPrompt(entry.prompt);
98
+ await this.delete(id);
99
+ removedCount++;
100
+ }
101
+ }
102
+ return removedCount;
103
+ }
104
+ /**
105
+ * Simple hash function for prompt
106
+ */
107
+ hashPrompt(prompt) {
108
+ let hash = 0;
109
+ for (let i = 0; i < prompt.length; i++) {
110
+ const char = prompt.charCodeAt(i);
111
+ hash = (hash << 5) - hash + char;
112
+ hash = hash & hash;
113
+ }
114
+ return Math.abs(hash).toString(36);
115
+ }
116
+ /**
117
+ * Get cache statistics
118
+ */
119
+ async getStats() {
120
+ const entries = await this.getAll();
121
+ if (entries.length === 0) {
122
+ return {
123
+ totalEntries: 0,
124
+ oldestEntry: null,
125
+ newestEntry: null
126
+ };
127
+ }
128
+ const timestamps = entries.map((e) => e.timestamp);
129
+ return {
130
+ totalEntries: entries.length,
131
+ oldestEntry: Math.min(...timestamps),
132
+ newestEntry: Math.max(...timestamps)
133
+ };
134
+ }
135
+ };
136
+
137
+ // src/embeddings.ts
138
+ var import_transformers = require("@xenova/transformers");
139
+
140
+ // src/similarity.ts
141
+ function calculateSimilarity(vecA, vecB) {
142
+ if (vecA.length !== vecB.length) {
143
+ throw new Error("Vectors must have the same length");
144
+ }
145
+ if (vecA.length === 0) {
146
+ return 0;
147
+ }
148
+ let dotProduct = 0;
149
+ let magnitudeA = 0;
150
+ let magnitudeB = 0;
151
+ for (let i = 0; i < vecA.length; i++) {
152
+ dotProduct += vecA[i] * vecB[i];
153
+ magnitudeA += vecA[i] * vecA[i];
154
+ magnitudeB += vecB[i] * vecB[i];
155
+ }
156
+ magnitudeA = Math.sqrt(magnitudeA);
157
+ magnitudeB = Math.sqrt(magnitudeB);
158
+ if (magnitudeA === 0 || magnitudeB === 0) {
159
+ return 0;
160
+ }
161
+ const similarity = dotProduct / (magnitudeA * magnitudeB);
162
+ return Math.max(0, Math.min(1, similarity));
163
+ }
164
+ function normalizeVector(vec) {
165
+ const magnitude = Math.sqrt(vec.reduce((sum, val) => sum + val * val, 0));
166
+ if (magnitude === 0) {
167
+ return vec;
168
+ }
169
+ return vec.map((val) => val / magnitude);
170
+ }
171
+ function toArray(arrayLike) {
172
+ return Array.from(arrayLike);
173
+ }
174
+
175
+ // src/embeddings.ts
176
+ import_transformers.env.allowLocalModels = false;
177
+ import_transformers.env.useBrowserCache = true;
178
+ var EmbeddingGenerator = class {
179
+ constructor(modelName = "Xenova/all-MiniLM-L6-v2", debug = false) {
180
+ this.model = null;
181
+ this.loading = null;
182
+ this.modelName = modelName;
183
+ this.debug = debug;
184
+ }
185
+ /**
186
+ * Initialize the embedding model (lazy loading)
187
+ */
188
+ async initialize() {
189
+ if (this.model) {
190
+ return;
191
+ }
192
+ if (this.loading) {
193
+ await this.loading;
194
+ return;
195
+ }
196
+ this.loading = (async () => {
197
+ try {
198
+ if (this.debug) {
199
+ console.log(`[NanoCache] Loading embedding model: ${this.modelName}`);
200
+ }
201
+ this.model = await (0, import_transformers.pipeline)("feature-extraction", this.modelName);
202
+ if (this.debug) {
203
+ console.log("[NanoCache] Embedding model loaded successfully");
204
+ }
205
+ } catch (error) {
206
+ this.loading = null;
207
+ throw new Error(`Failed to load embedding model: ${error}`);
208
+ }
209
+ })();
210
+ await this.loading;
211
+ }
212
+ /**
213
+ * Generate embedding for a text prompt
214
+ */
215
+ async generate(text) {
216
+ await this.initialize();
217
+ if (!this.model) {
218
+ throw new Error("Embedding model not initialized");
219
+ }
220
+ try {
221
+ const output = await this.model(text, {
222
+ pooling: "mean",
223
+ normalize: true
224
+ });
225
+ let embedding;
226
+ if (output.data) {
227
+ embedding = toArray(output.data);
228
+ } else if (Array.isArray(output)) {
229
+ embedding = output;
230
+ } else {
231
+ throw new Error("Unexpected embedding output format");
232
+ }
233
+ if (this.debug) {
234
+ console.log(`[NanoCache] Generated embedding of length ${embedding.length}`);
235
+ }
236
+ return embedding;
237
+ } catch (error) {
238
+ throw new Error(`Failed to generate embedding: ${error}`);
239
+ }
240
+ }
241
+ /**
242
+ * Generate embeddings for multiple texts in batch
243
+ */
244
+ async generateBatch(texts) {
245
+ await this.initialize();
246
+ if (!this.model) {
247
+ throw new Error("Embedding model not initialized");
248
+ }
249
+ const embeddings = [];
250
+ for (const text of texts) {
251
+ const embedding = await this.generate(text);
252
+ embeddings.push(embedding);
253
+ }
254
+ return embeddings;
255
+ }
256
+ /**
257
+ * Check if model is loaded
258
+ */
259
+ isLoaded() {
260
+ return this.model !== null;
261
+ }
262
+ /**
263
+ * Unload the model to free memory
264
+ */
265
+ async unload() {
266
+ this.model = null;
267
+ this.loading = null;
268
+ if (this.debug) {
269
+ console.log("[NanoCache] Embedding model unloaded");
270
+ }
271
+ }
272
+ };
273
+
274
+ // src/cache.ts
275
+ var NanoCache = class {
276
+ constructor(config = {}) {
277
+ this.config = {
278
+ similarityThreshold: config.similarityThreshold ?? 0.95,
279
+ maxAge: config.maxAge ?? 0,
280
+ modelName: config.modelName ?? "Xenova/all-MiniLM-L6-v2",
281
+ debug: config.debug ?? false,
282
+ storagePrefix: config.storagePrefix ?? "nano-llm-cache"
283
+ };
284
+ this.storage = new CacheStorage(this.config.storagePrefix);
285
+ this.embeddings = new EmbeddingGenerator(this.config.modelName, this.config.debug);
286
+ }
287
+ /**
288
+ * Query the cache for a similar prompt
289
+ */
290
+ async query(prompt) {
291
+ try {
292
+ if (this.config.maxAge > 0) {
293
+ await this.storage.removeExpired(this.config.maxAge);
294
+ }
295
+ const queryEmbedding = await this.embeddings.generate(prompt);
296
+ const entries = await this.storage.getAll();
297
+ if (entries.length === 0) {
298
+ if (this.config.debug) {
299
+ console.log("[NanoCache] Cache is empty");
300
+ }
301
+ return { hit: false };
302
+ }
303
+ let bestMatch = null;
304
+ let bestSimilarity = 0;
305
+ for (const entry of entries) {
306
+ const similarity = calculateSimilarity(queryEmbedding, entry.embedding);
307
+ if (similarity > bestSimilarity) {
308
+ bestSimilarity = similarity;
309
+ bestMatch = entry;
310
+ }
311
+ }
312
+ if (bestMatch && bestSimilarity >= this.config.similarityThreshold) {
313
+ if (this.config.debug) {
314
+ console.log(`[NanoCache] Cache HIT! Similarity: ${bestSimilarity.toFixed(4)}`);
315
+ console.log(`[NanoCache] Original: "${bestMatch.prompt}"`);
316
+ console.log(`[NanoCache] Query: "${prompt}"`);
317
+ }
318
+ return {
319
+ hit: true,
320
+ response: bestMatch.response,
321
+ similarity: bestSimilarity,
322
+ entry: bestMatch
323
+ };
324
+ }
325
+ if (this.config.debug) {
326
+ console.log(`[NanoCache] Cache MISS. Best similarity: ${bestSimilarity.toFixed(4)}`);
327
+ }
328
+ return { hit: false, similarity: bestSimilarity };
329
+ } catch (error) {
330
+ console.error("[NanoCache] Query error:", error);
331
+ return { hit: false };
332
+ }
333
+ }
334
+ /**
335
+ * Save a prompt-response pair to the cache
336
+ */
337
+ async save(prompt, response, metadata) {
338
+ try {
339
+ const embedding = await this.embeddings.generate(prompt);
340
+ const entry = {
341
+ prompt,
342
+ embedding,
343
+ response,
344
+ timestamp: Date.now(),
345
+ metadata
346
+ };
347
+ const id = this.hashPrompt(prompt);
348
+ await this.storage.save(id, entry);
349
+ if (this.config.debug) {
350
+ console.log(`[NanoCache] Saved entry for prompt: "${prompt}"`);
351
+ }
352
+ } catch (error) {
353
+ console.error("[NanoCache] Save error:", error);
354
+ throw error;
355
+ }
356
+ }
357
+ /**
358
+ * Clear all cached entries
359
+ */
360
+ async clear() {
361
+ await this.storage.clear();
362
+ if (this.config.debug) {
363
+ console.log("[NanoCache] Cache cleared");
364
+ }
365
+ }
366
+ /**
367
+ * Get cache statistics
368
+ */
369
+ async getStats() {
370
+ return await this.storage.getStats();
371
+ }
372
+ /**
373
+ * Check if embedding model is loaded
374
+ */
375
+ isModelLoaded() {
376
+ return this.embeddings.isLoaded();
377
+ }
378
+ /**
379
+ * Preload the embedding model
380
+ */
381
+ async preloadModel() {
382
+ await this.embeddings.generate("warmup");
383
+ if (this.config.debug) {
384
+ console.log("[NanoCache] Model preloaded");
385
+ }
386
+ }
387
+ /**
388
+ * Unload the embedding model to free memory
389
+ */
390
+ async unloadModel() {
391
+ await this.embeddings.unload();
392
+ }
393
+ /**
394
+ * Simple hash function for prompt
395
+ */
396
+ hashPrompt(prompt) {
397
+ let hash = 0;
398
+ for (let i = 0; i < prompt.length; i++) {
399
+ const char = prompt.charCodeAt(i);
400
+ hash = (hash << 5) - hash + char;
401
+ hash = hash & hash;
402
+ }
403
+ return Math.abs(hash).toString(36);
404
+ }
405
+ /**
406
+ * Create a wrapper for OpenAI-compatible chat completion
407
+ * This allows drop-in replacement of openai.chat.completions.create
408
+ */
409
+ createChatWrapper(originalFn) {
410
+ const self = this;
411
+ return (async function wrappedCreate(request) {
412
+ const userMessage = request.messages.filter((m) => m.role === "user").map((m) => m.content).join("\n");
413
+ if (!userMessage) {
414
+ return await originalFn(request);
415
+ }
416
+ const cacheResult = await self.query(userMessage);
417
+ if (cacheResult.hit && cacheResult.response) {
418
+ return {
419
+ id: `nano-cache-${Date.now()}`,
420
+ object: "chat.completion",
421
+ created: Math.floor(Date.now() / 1e3),
422
+ model: request.model,
423
+ choices: [
424
+ {
425
+ index: 0,
426
+ message: {
427
+ role: "assistant",
428
+ content: cacheResult.response
429
+ },
430
+ finish_reason: "stop"
431
+ }
432
+ ]
433
+ };
434
+ }
435
+ const response = await originalFn(request);
436
+ const assistantMessage = response.choices[0]?.message?.content;
437
+ if (assistantMessage) {
438
+ await self.save(userMessage, assistantMessage, {
439
+ model: request.model,
440
+ timestamp: response.created
441
+ });
442
+ }
443
+ return response;
444
+ });
445
+ }
446
+ };
447
+ // Annotate the CommonJS export names for ESM import in node:
448
+ 0 && (module.exports = {
449
+ NanoCache,
450
+ calculateSimilarity,
451
+ normalizeVector
452
+ });