npm - @nano-llm-cache/core - Versions diffs - 1.0.0 - Mend

@nano-llm-cache/core 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,152 @@
+/**
+ * Configuration options for NanoCache
+ */
+interface NanoCacheConfig {
+    /**
+     * Similarity threshold for cache hits (0-1)
+     * @default 0.95
+     */
+    similarityThreshold?: number;
+    /**
+     * Maximum age of cached entries in milliseconds
+     * @default undefined (no expiration)
+     */
+    maxAge?: number;
+    /**
+     * Model name for embeddings
+     * @default 'Xenova/all-MiniLM-L6-v2'
+     */
+    modelName?: string;
+    /**
+     * Enable debug logging
+     * @default false
+     */
+    debug?: boolean;
+    /**
+     * Custom storage key prefix
+     * @default 'nano-llm-cache'
+     */
+    storagePrefix?: string;
+}
+/**
+ * Cached entry structure
+ */
+interface CacheEntry {
+    prompt: string;
+    embedding: number[];
+    response: string;
+    timestamp: number;
+    metadata?: Record<string, any>;
+}
+/**
+ * Cache query result
+ */
+interface CacheQueryResult {
+    hit: boolean;
+    response?: string;
+    similarity?: number;
+    entry?: CacheEntry;
+}
+/**
+ * OpenAI-compatible message structure
+ */
+interface ChatMessage {
+    role: 'system' | 'user' | 'assistant';
+    content: string;
+}
+/**
+ * OpenAI-compatible chat completion request
+ */
+interface ChatCompletionRequest {
+    model: string;
+    messages: ChatMessage[];
+    temperature?: number;
+    max_tokens?: number;
+    [key: string]: any;
+}
+/**
+ * OpenAI-compatible chat completion response
+ */
+interface ChatCompletionResponse {
+    id: string;
+    object: string;
+    created: number;
+    model: string;
+    choices: Array<{
+        index: number;
+        message: ChatMessage;
+        finish_reason: string;
+    }>;
+    usage?: {
+        prompt_tokens: number;
+        completion_tokens: number;
+        total_tokens: number;
+    };
+}
+/**
+ * NanoCache - Semantic cache for LLM API calls
+ */
+declare class NanoCache {
+    private storage;
+    private embeddings;
+    private config;
+    constructor(config?: NanoCacheConfig);
+    /**
+     * Query the cache for a similar prompt
+     */
+    query(prompt: string): Promise<CacheQueryResult>;
+    /**
+     * Save a prompt-response pair to the cache
+     */
+    save(prompt: string, response: string, metadata?: Record<string, any>): Promise<void>;
+    /**
+     * Clear all cached entries
+     */
+    clear(): Promise<void>;
+    /**
+     * Get cache statistics
+     */
+    getStats(): Promise<{
+        totalEntries: number;
+        oldestEntry: number | null;
+        newestEntry: number | null;
+    }>;
+    /**
+     * Check if embedding model is loaded
+     */
+    isModelLoaded(): boolean;
+    /**
+     * Preload the embedding model
+     */
+    preloadModel(): Promise<void>;
+    /**
+     * Unload the embedding model to free memory
+     */
+    unloadModel(): Promise<void>;
+    /**
+     * Simple hash function for prompt
+     */
+    private hashPrompt;
+    /**
+     * Create a wrapper for OpenAI-compatible chat completion
+     * This allows drop-in replacement of openai.chat.completions.create
+     */
+    createChatWrapper<T extends (req: ChatCompletionRequest) => Promise<ChatCompletionResponse>>(originalFn: T): T;
+}
+/**
+ * Calculate cosine similarity between two vectors
+ * @param vecA - First vector
+ * @param vecB - Second vector
+ * @returns Similarity score between 0 and 1
+ */
+declare function calculateSimilarity(vecA: number[], vecB: number[]): number;
+/**
+ * Normalize a vector to unit length
+ * @param vec - Input vector
+ * @returns Normalized vector
+ */
+declare function normalizeVector(vec: number[]): number[];
+export { type CacheEntry, type CacheQueryResult, type ChatCompletionRequest, type ChatCompletionResponse, type ChatMessage, NanoCache, type NanoCacheConfig, calculateSimilarity, normalizeVector };

package/dist/index.js ADDED Viewed

@@ -0,0 +1,452 @@
+"use strict";
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+var __copyProps = (to, from, except, desc) => {
+  if (from && typeof from === "object" || typeof from === "function") {
+    for (let key of __getOwnPropNames(from))
+      if (!__hasOwnProp.call(to, key) && key !== except)
+        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
+  }
+  return to;
+};
+var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+// src/index.ts
+var index_exports = {};
+__export(index_exports, {
+  NanoCache: () => NanoCache,
+  calculateSimilarity: () => calculateSimilarity,
+  normalizeVector: () => normalizeVector
+});
+module.exports = __toCommonJS(index_exports);
+// src/storage.ts
+var import_idb_keyval = require("idb-keyval");
+var CacheStorage = class {
+  constructor(prefix = "nano-llm-cache") {
+    this.prefix = prefix;
+  }
+  /**
+   * Generate storage key
+   */
+  getKey(id) {
+    return `${this.prefix}:${id}`;
+  }
+  /**
+   * Save a cache entry
+   */
+  async save(id, entry) {
+    await (0, import_idb_keyval.set)(this.getKey(id), entry);
+  }
+  /**
+   * Get a cache entry by ID
+   */
+  async get(id) {
+    return await (0, import_idb_keyval.get)(this.getKey(id));
+  }
+  /**
+   * Get all cache entries
+   */
+  async getAll() {
+    const allKeys = await (0, import_idb_keyval.keys)();
+    const cacheKeys = allKeys.filter(
+      (key) => typeof key === "string" && key.startsWith(this.prefix)
+    );
+    const entries = [];
+    for (const key of cacheKeys) {
+      const entry = await (0, import_idb_keyval.get)(key);
+      if (entry) {
+        entries.push(entry);
+      }
+    }
+    return entries;
+  }
+  /**
+   * Delete a cache entry
+   */
+  async delete(id) {
+    await (0, import_idb_keyval.del)(this.getKey(id));
+  }
+  /**
+   * Clear all cache entries
+   */
+  async clear() {
+    const allKeys = await (0, import_idb_keyval.keys)();
+    const cacheKeys = allKeys.filter(
+      (key) => typeof key === "string" && key.startsWith(this.prefix)
+    );
+    for (const key of cacheKeys) {
+      await (0, import_idb_keyval.del)(key);
+    }
+  }
+  /**
+   * Remove expired entries based on maxAge
+   */
+  async removeExpired(maxAge) {
+    const now = Date.now();
+    const entries = await this.getAll();
+    let removedCount = 0;
+    for (const entry of entries) {
+      if (now - entry.timestamp > maxAge) {
+        const id = this.hashPrompt(entry.prompt);
+        await this.delete(id);
+        removedCount++;
+      }
+    }
+    return removedCount;
+  }
+  /**
+   * Simple hash function for prompt
+   */
+  hashPrompt(prompt) {
+    let hash = 0;
+    for (let i = 0; i < prompt.length; i++) {
+      const char = prompt.charCodeAt(i);
+      hash = (hash << 5) - hash + char;
+      hash = hash & hash;
+    }
+    return Math.abs(hash).toString(36);
+  }
+  /**
+   * Get cache statistics
+   */
+  async getStats() {
+    const entries = await this.getAll();
+    if (entries.length === 0) {
+      return {
+        totalEntries: 0,
+        oldestEntry: null,
+        newestEntry: null
+      };
+    }
+    const timestamps = entries.map((e) => e.timestamp);
+    return {
+      totalEntries: entries.length,
+      oldestEntry: Math.min(...timestamps),
+      newestEntry: Math.max(...timestamps)
+    };
+  }
+};
+// src/embeddings.ts
+var import_transformers = require("@xenova/transformers");
+// src/similarity.ts
+function calculateSimilarity(vecA, vecB) {
+  if (vecA.length !== vecB.length) {
+    throw new Error("Vectors must have the same length");
+  }
+  if (vecA.length === 0) {
+    return 0;
+  }
+  let dotProduct = 0;
+  let magnitudeA = 0;
+  let magnitudeB = 0;
+  for (let i = 0; i < vecA.length; i++) {
+    dotProduct += vecA[i] * vecB[i];
+    magnitudeA += vecA[i] * vecA[i];
+    magnitudeB += vecB[i] * vecB[i];
+  }
+  magnitudeA = Math.sqrt(magnitudeA);
+  magnitudeB = Math.sqrt(magnitudeB);
+  if (magnitudeA === 0 || magnitudeB === 0) {
+    return 0;
+  }
+  const similarity = dotProduct / (magnitudeA * magnitudeB);
+  return Math.max(0, Math.min(1, similarity));
+}
+function normalizeVector(vec) {
+  const magnitude = Math.sqrt(vec.reduce((sum, val) => sum + val * val, 0));
+  if (magnitude === 0) {
+    return vec;
+  }
+  return vec.map((val) => val / magnitude);
+}
+function toArray(arrayLike) {
+  return Array.from(arrayLike);
+}
+// src/embeddings.ts
+import_transformers.env.allowLocalModels = false;
+import_transformers.env.useBrowserCache = true;
+var EmbeddingGenerator = class {
+  constructor(modelName = "Xenova/all-MiniLM-L6-v2", debug = false) {
+    this.model = null;
+    this.loading = null;
+    this.modelName = modelName;
+    this.debug = debug;
+  }
+  /**
+   * Initialize the embedding model (lazy loading)
+   */
+  async initialize() {
+    if (this.model) {
+      return;
+    }
+    if (this.loading) {
+      await this.loading;
+      return;
+    }
+    this.loading = (async () => {
+      try {
+        if (this.debug) {
+          console.log(`[NanoCache] Loading embedding model: ${this.modelName}`);
+        }
+        this.model = await (0, import_transformers.pipeline)("feature-extraction", this.modelName);
+        if (this.debug) {
+          console.log("[NanoCache] Embedding model loaded successfully");
+        }
+      } catch (error) {
+        this.loading = null;
+        throw new Error(`Failed to load embedding model: ${error}`);
+      }
+    })();
+    await this.loading;
+  }
+  /**
+   * Generate embedding for a text prompt
+   */
+  async generate(text) {
+    await this.initialize();
+    if (!this.model) {
+      throw new Error("Embedding model not initialized");
+    }
+    try {
+      const output = await this.model(text, {
+        pooling: "mean",
+        normalize: true
+      });
+      let embedding;
+      if (output.data) {
+        embedding = toArray(output.data);
+      } else if (Array.isArray(output)) {
+        embedding = output;
+      } else {
+        throw new Error("Unexpected embedding output format");
+      }
+      if (this.debug) {
+        console.log(`[NanoCache] Generated embedding of length ${embedding.length}`);
+      }
+      return embedding;
+    } catch (error) {
+      throw new Error(`Failed to generate embedding: ${error}`);
+    }
+  }
+  /**
+   * Generate embeddings for multiple texts in batch
+   */
+  async generateBatch(texts) {
+    await this.initialize();
+    if (!this.model) {
+      throw new Error("Embedding model not initialized");
+    }
+    const embeddings = [];
+    for (const text of texts) {
+      const embedding = await this.generate(text);
+      embeddings.push(embedding);
+    }
+    return embeddings;
+  }
+  /**
+   * Check if model is loaded
+   */
+  isLoaded() {
+    return this.model !== null;
+  }
+  /**
+   * Unload the model to free memory
+   */
+  async unload() {
+    this.model = null;
+    this.loading = null;
+    if (this.debug) {
+      console.log("[NanoCache] Embedding model unloaded");
+    }
+  }
+};
+// src/cache.ts
+var NanoCache = class {
+  constructor(config = {}) {
+    this.config = {
+      similarityThreshold: config.similarityThreshold ?? 0.95,
+      maxAge: config.maxAge ?? 0,
+      modelName: config.modelName ?? "Xenova/all-MiniLM-L6-v2",
+      debug: config.debug ?? false,
+      storagePrefix: config.storagePrefix ?? "nano-llm-cache"
+    };
+    this.storage = new CacheStorage(this.config.storagePrefix);
+    this.embeddings = new EmbeddingGenerator(this.config.modelName, this.config.debug);
+  }
+  /**
+   * Query the cache for a similar prompt
+   */
+  async query(prompt) {
+    try {
+      if (this.config.maxAge > 0) {
+        await this.storage.removeExpired(this.config.maxAge);
+      }
+      const queryEmbedding = await this.embeddings.generate(prompt);
+      const entries = await this.storage.getAll();
+      if (entries.length === 0) {
+        if (this.config.debug) {
+          console.log("[NanoCache] Cache is empty");
+        }
+        return { hit: false };
+      }
+      let bestMatch = null;
+      let bestSimilarity = 0;
+      for (const entry of entries) {
+        const similarity = calculateSimilarity(queryEmbedding, entry.embedding);
+        if (similarity > bestSimilarity) {
+          bestSimilarity = similarity;
+          bestMatch = entry;
+        }
+      }
+      if (bestMatch && bestSimilarity >= this.config.similarityThreshold) {
+        if (this.config.debug) {
+          console.log(`[NanoCache] Cache HIT! Similarity: ${bestSimilarity.toFixed(4)}`);
+          console.log(`[NanoCache] Original: "${bestMatch.prompt}"`);
+          console.log(`[NanoCache] Query: "${prompt}"`);
+        }
+        return {
+          hit: true,
+          response: bestMatch.response,
+          similarity: bestSimilarity,
+          entry: bestMatch
+        };
+      }
+      if (this.config.debug) {
+        console.log(`[NanoCache] Cache MISS. Best similarity: ${bestSimilarity.toFixed(4)}`);
+      }
+      return { hit: false, similarity: bestSimilarity };
+    } catch (error) {
+      console.error("[NanoCache] Query error:", error);
+      return { hit: false };
+    }
+  }
+  /**
+   * Save a prompt-response pair to the cache
+   */
+  async save(prompt, response, metadata) {
+    try {
+      const embedding = await this.embeddings.generate(prompt);
+      const entry = {
+        prompt,
+        embedding,
+        response,
+        timestamp: Date.now(),
+        metadata
+      };
+      const id = this.hashPrompt(prompt);
+      await this.storage.save(id, entry);
+      if (this.config.debug) {
+        console.log(`[NanoCache] Saved entry for prompt: "${prompt}"`);
+      }
+    } catch (error) {
+      console.error("[NanoCache] Save error:", error);
+      throw error;
+    }
+  }
+  /**
+   * Clear all cached entries
+   */
+  async clear() {
+    await this.storage.clear();
+    if (this.config.debug) {
+      console.log("[NanoCache] Cache cleared");
+    }
+  }
+  /**
+   * Get cache statistics
+   */
+  async getStats() {
+    return await this.storage.getStats();
+  }
+  /**
+   * Check if embedding model is loaded
+   */
+  isModelLoaded() {
+    return this.embeddings.isLoaded();
+  }
+  /**
+   * Preload the embedding model
+   */
+  async preloadModel() {
+    await this.embeddings.generate("warmup");
+    if (this.config.debug) {
+      console.log("[NanoCache] Model preloaded");
+    }
+  }
+  /**
+   * Unload the embedding model to free memory
+   */
+  async unloadModel() {
+    await this.embeddings.unload();
+  }
+  /**
+   * Simple hash function for prompt
+   */
+  hashPrompt(prompt) {
+    let hash = 0;
+    for (let i = 0; i < prompt.length; i++) {
+      const char = prompt.charCodeAt(i);
+      hash = (hash << 5) - hash + char;
+      hash = hash & hash;
+    }
+    return Math.abs(hash).toString(36);
+  }
+  /**
+   * Create a wrapper for OpenAI-compatible chat completion
+   * This allows drop-in replacement of openai.chat.completions.create
+   */
+  createChatWrapper(originalFn) {
+    const self = this;
+    return (async function wrappedCreate(request) {
+      const userMessage = request.messages.filter((m) => m.role === "user").map((m) => m.content).join("\n");
+      if (!userMessage) {
+        return await originalFn(request);
+      }
+      const cacheResult = await self.query(userMessage);
+      if (cacheResult.hit && cacheResult.response) {
+        return {
+          id: `nano-cache-${Date.now()}`,
+          object: "chat.completion",
+          created: Math.floor(Date.now() / 1e3),
+          model: request.model,
+          choices: [
+            {
+              index: 0,
+              message: {
+                role: "assistant",
+                content: cacheResult.response
+              },
+              finish_reason: "stop"
+            }
+          ]
+        };
+      }
+      const response = await originalFn(request);
+      const assistantMessage = response.choices[0]?.message?.content;
+      if (assistantMessage) {
+        await self.save(userMessage, assistantMessage, {
+          model: request.model,
+          timestamp: response.created
+        });
+      }
+      return response;
+    });
+  }
+};
+// Annotate the CommonJS export names for ESM import in node:
+0 && (module.exports = {
+  NanoCache,
+  calculateSimilarity,
+  normalizeVector
+});