npm - @mostafa.hanafy/prompt-cache - Versions diffs - 0.1.0 - Mend

@mostafa.hanafy/prompt-cache 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,203 @@
+# @mostafa.hanafy/prompt-cache
+![npm](https://img.shields.io/npm/v/@mostafa.hanafy/prompt-cache)
+![npm downloads](https://img.shields.io/npm/dm/@mostafa.hanafy/prompt-cache)
+![license](https://img.shields.io/npm/l/@mostafa.hanafy/prompt-cache)
+Reliable caching layer for LLM API calls to reduce cost, avoid duplicate requests, and improve latency.
+## Why
+LLM applications often repeat the same requests:
+- retries from clients
+- repeated user actions
+- identical prompts within short time windows
+- shared context across workflows
+Without caching, you pay for the same request multiple times.
+`@mostafa.hanafy/prompt-cache` helps you:
+- avoid duplicate API calls
+- reduce token usage cost
+- improve response latency
+- keep caching logic simple and deterministic
+## Install
+```bash
+npm install @mostafa.hanafy/prompt-cache
+# or
+pnpm add @mostafa.hanafy/prompt-cache
+# or
+bun add @mostafa.hanafy/prompt-cache
+```
+## Key capabilities
+- deterministic cache keys (`key` / `keyParts`)
+- TTL-based expiration
+- in-memory cache adapter
+- **in-flight request deduplication (prevents duplicate concurrent calls)**
+- pluggable cache adapters
+- lifecycle hooks
+## Quick start
+```ts
+import { withPromptCache } from "@mostafa.hanafy/prompt-cache";
+const response = await withPromptCache({
+  keyParts: ["openai", "gpt-4o-mini", prompt, context],
+  ttlSeconds: 60,
+  call: () =>
+    openai.responses.create({
+      model: "gpt-4o-mini",
+      input: prompt,
+    }),
+});
+```
+## Example use case
+Avoid duplicate AI calls in high-traffic APIs:
+```ts
+await withPromptCache({
+  keyParts: ["chat", userId, prompt],
+  ttlSeconds: 30,
+  call: () => generateResponse(prompt),
+});
+```
+If 10 users trigger the same request at the same time:
+- without cache → 10 API calls ❌
+- with @mostafa.hanafy/prompt-cache → 1 API call ✅
+## How it works
+```text
+request
+  ↓
+generate key
+  ↓
+cache lookup
+  ↓
+hit  → return cached result
+miss → execute call
+  ↓
+store result
+```
+## API
+### `withPromptCache(options)`
+Wraps an async LLM call with cache lookup, in-flight deduplication, and optional hooks.
+```ts
+await withPromptCache({
+  key?: string,
+  keyParts?: unknown[],
+  ttlSeconds?: number,
+  cache?: CacheAdapter,
+  shouldCache?: (value) => boolean,
+  onHit?: (meta) => void,
+  onMiss?: (meta) => void,
+  onSet?: (meta) => void,
+  onError?: (error, meta) => void,
+  call: () => Promise<T>,
+});
+```
+Options:
+- `key`: explicit cache key (takes precedence over `keyParts`)
+- `keyParts`: parts that are deterministically hashed into a key
+- `ttlSeconds`: cache TTL (default `60`)
+- `cache`: custom adapter (`memoryCache` is default)
+- `shouldCache`: return `false` to skip writing a result
+- `onHit` / `onMiss` / `onSet` / `onError`: lifecycle hooks
+- `call`: async operation to execute on cache miss
+### `createCacheKey(parts)`
+Creates a deterministic key by stable-stringifying input and hashing it.
+Use this when you want to precompute or inspect keys directly.
+### `createMemoryCache()` and `memoryCache`
+- `createMemoryCache()`: creates a new isolated in-memory cache adapter
+- `memoryCache`: shared default singleton adapter used by `withPromptCache`
+## Lifecycle hooks
+```ts
+await withPromptCache({
+  keyParts: [prompt],
+  onHit: (meta) => console.log("cache hit", meta),
+  onMiss: (meta) => console.log("cache miss", meta),
+  onSet: (meta) => console.log("stored in cache", meta),
+  onError: (error, meta) => console.error("cache error", error, meta),
+  call: async () => aiCall(),
+});
+```
+## Key and TTL guidance
+- Include all request inputs in `keyParts` (model, prompt, context, options).
+- In-memory cache is suitable for single-instance apps.
+- Use a custom adapter (for example Redis) in distributed deployments.
+- Set `ttlSeconds` to match freshness requirements.
+## Testing and verification
+```bash
+bun test
+bun run typecheck
+bun run lint
+bun run build
+```
+## Performance verification (PRD §10)
+Run benchmark harness:
+```bash
+# preferred
+bun bench/overhead.ts
+# fallback
+tsx bench/overhead.ts
+```
+It reports `avg/p50/p95/max` latency for:
+- cache-hit path
+- miss path
+- concurrent dedup path
+Pass criterion: cache-hit path `p95 < 1ms`.
+The benchmark exits with a non-zero code if this criterion fails.
+## Limitations
+- In-memory cache does not work across multiple instances
+- TTL-based invalidation only (no advanced invalidation yet)
+- Cache key must include all relevant inputs
+## Related packages
+Part of a small AI developer toolkit:
+- token-budget-guard — enforce token budgets for LLM calls
+- llm-retry-guard — safe retry wrapper for LLM APIs
+- ai-request-logger — structured logging for AI requests
+- @mostafa.hanafy/prompt-cache — avoid duplicate LLM calls
+## License
+MIT

package/dist/index.cjs ADDED Viewed

@@ -0,0 +1,134 @@
+"use strict";
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+var __copyProps = (to, from, except, desc) => {
+  if (from && typeof from === "object" || typeof from === "function") {
+    for (let key of __getOwnPropNames(from))
+      if (!__hasOwnProp.call(to, key) && key !== except)
+        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
+  }
+  return to;
+};
+var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+// src/index.ts
+var index_exports = {};
+__export(index_exports, {
+  createCacheKey: () => createCacheKey,
+  createMemoryCache: () => createMemoryCache,
+  memoryCache: () => memoryCache,
+  withPromptCache: () => withPromptCache
+});
+module.exports = __toCommonJS(index_exports);
+// src/key.ts
+var stableStringify = (value) => {
+  if (value === null || typeof value !== "object") {
+    return JSON.stringify(value);
+  }
+  if (Array.isArray(value)) {
+    return `[${value.map(stableStringify).join(",")}]`;
+  }
+  const entries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b)).map(([k, v]) => `${JSON.stringify(k)}:${stableStringify(v)}`);
+  return `{${entries.join(",")}}`;
+};
+var hashString = (input) => {
+  let h = 2166136261;
+  for (let i = 0; i < input.length; i++) {
+    h ^= input.charCodeAt(i);
+    h = Math.imul(h, 16777619);
+  }
+  return (h >>> 0).toString(16);
+};
+var createCacheKey = (parts) => {
+  return hashString(stableStringify(parts));
+};
+// src/memory.ts
+var createMemoryCache = () => {
+  const store = /* @__PURE__ */ new Map();
+  return {
+    async get(key) {
+      const item = store.get(key);
+      if (!item) return void 0;
+      if (item.expiresAt && Date.now() > item.expiresAt) {
+        store.delete(key);
+        return void 0;
+      }
+      return item.value;
+    },
+    async set(key, value, ttlSeconds) {
+      const expiresAt = typeof ttlSeconds === "number" ? Date.now() + ttlSeconds * 1e3 : void 0;
+      store.set(key, { value, expiresAt });
+    },
+    async delete(key) {
+      store.delete(key);
+    }
+  };
+};
+var memoryCache = createMemoryCache();
+// src/cache.ts
+var inFlight = /* @__PURE__ */ new Map();
+var withPromptCache = async (options) => {
+  const {
+    key,
+    keyParts,
+    ttlSeconds = 60,
+    cache = memoryCache,
+    shouldCache,
+    onHit,
+    onMiss,
+    onSet,
+    onError,
+    call
+  } = options;
+  const cacheKey = key ?? createCacheKey(keyParts ?? []);
+  if (!cacheKey) {
+    throw new Error("Provide `key` or `keyParts`");
+  }
+  const meta = { key: cacheKey, ttlSeconds };
+  try {
+    const cached = await cache.get(cacheKey);
+    if (cached !== void 0) {
+      onHit?.(meta);
+      return cached;
+    }
+    onMiss?.(meta);
+    const existing = inFlight.get(cacheKey);
+    if (existing) {
+      return existing;
+    }
+    const request = (async () => {
+      const result = await call();
+      const canCache = shouldCache ? shouldCache(result) : true;
+      if (canCache) {
+        await cache.set(cacheKey, result, ttlSeconds);
+        onSet?.(meta);
+      }
+      return result;
+    })();
+    inFlight.set(cacheKey, request);
+    try {
+      return await request;
+    } finally {
+      inFlight.delete(cacheKey);
+    }
+  } catch (error) {
+    onError?.(error, meta);
+    throw error;
+  }
+};
+// Annotate the CommonJS export names for ESM import in node:
+0 && (module.exports = {
+  createCacheKey,
+  createMemoryCache,
+  memoryCache,
+  withPromptCache
+});

package/dist/index.d.cts ADDED Viewed

@@ -0,0 +1,30 @@
+type CacheAdapter = {
+    get: (key: string) => Promise<unknown | undefined>;
+    set: (key: string, value: unknown, ttlSeconds?: number) => Promise<void>;
+    delete?: (key: string) => Promise<void>;
+};
+type CacheEventMeta = {
+    key: string;
+    ttlSeconds?: number;
+};
+type PromptCacheOptions<T> = {
+    key?: string;
+    keyParts?: unknown[];
+    ttlSeconds?: number;
+    cache?: CacheAdapter;
+    shouldCache?: (value: T) => boolean;
+    onHit?: (meta: CacheEventMeta) => void;
+    onMiss?: (meta: CacheEventMeta) => void;
+    onSet?: (meta: CacheEventMeta) => void;
+    onError?: (error: unknown, meta: CacheEventMeta) => void;
+    call: () => Promise<T>;
+};
+declare const withPromptCache: <T>(options: PromptCacheOptions<T>) => Promise<T>;
+declare const createCacheKey: (parts: unknown[]) => string;
+declare const createMemoryCache: () => CacheAdapter;
+declare const memoryCache: CacheAdapter;
+export { type CacheAdapter, type PromptCacheOptions, createCacheKey, createMemoryCache, memoryCache, withPromptCache };

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,30 @@
+type CacheAdapter = {
+    get: (key: string) => Promise<unknown | undefined>;
+    set: (key: string, value: unknown, ttlSeconds?: number) => Promise<void>;
+    delete?: (key: string) => Promise<void>;
+};
+type CacheEventMeta = {
+    key: string;
+    ttlSeconds?: number;
+};
+type PromptCacheOptions<T> = {
+    key?: string;
+    keyParts?: unknown[];
+    ttlSeconds?: number;
+    cache?: CacheAdapter;
+    shouldCache?: (value: T) => boolean;
+    onHit?: (meta: CacheEventMeta) => void;
+    onMiss?: (meta: CacheEventMeta) => void;
+    onSet?: (meta: CacheEventMeta) => void;
+    onError?: (error: unknown, meta: CacheEventMeta) => void;
+    call: () => Promise<T>;
+};
+declare const withPromptCache: <T>(options: PromptCacheOptions<T>) => Promise<T>;
+declare const createCacheKey: (parts: unknown[]) => string;
+declare const createMemoryCache: () => CacheAdapter;
+declare const memoryCache: CacheAdapter;
+export { type CacheAdapter, type PromptCacheOptions, createCacheKey, createMemoryCache, memoryCache, withPromptCache };

package/dist/index.js ADDED Viewed

@@ -0,0 +1,104 @@
+// src/key.ts
+var stableStringify = (value) => {
+  if (value === null || typeof value !== "object") {
+    return JSON.stringify(value);
+  }
+  if (Array.isArray(value)) {
+    return `[${value.map(stableStringify).join(",")}]`;
+  }
+  const entries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b)).map(([k, v]) => `${JSON.stringify(k)}:${stableStringify(v)}`);
+  return `{${entries.join(",")}}`;
+};
+var hashString = (input) => {
+  let h = 2166136261;
+  for (let i = 0; i < input.length; i++) {
+    h ^= input.charCodeAt(i);
+    h = Math.imul(h, 16777619);
+  }
+  return (h >>> 0).toString(16);
+};
+var createCacheKey = (parts) => {
+  return hashString(stableStringify(parts));
+};
+// src/memory.ts
+var createMemoryCache = () => {
+  const store = /* @__PURE__ */ new Map();
+  return {
+    async get(key) {
+      const item = store.get(key);
+      if (!item) return void 0;
+      if (item.expiresAt && Date.now() > item.expiresAt) {
+        store.delete(key);
+        return void 0;
+      }
+      return item.value;
+    },
+    async set(key, value, ttlSeconds) {
+      const expiresAt = typeof ttlSeconds === "number" ? Date.now() + ttlSeconds * 1e3 : void 0;
+      store.set(key, { value, expiresAt });
+    },
+    async delete(key) {
+      store.delete(key);
+    }
+  };
+};
+var memoryCache = createMemoryCache();
+// src/cache.ts
+var inFlight = /* @__PURE__ */ new Map();
+var withPromptCache = async (options) => {
+  const {
+    key,
+    keyParts,
+    ttlSeconds = 60,
+    cache = memoryCache,
+    shouldCache,
+    onHit,
+    onMiss,
+    onSet,
+    onError,
+    call
+  } = options;
+  const cacheKey = key ?? createCacheKey(keyParts ?? []);
+  if (!cacheKey) {
+    throw new Error("Provide `key` or `keyParts`");
+  }
+  const meta = { key: cacheKey, ttlSeconds };
+  try {
+    const cached = await cache.get(cacheKey);
+    if (cached !== void 0) {
+      onHit?.(meta);
+      return cached;
+    }
+    onMiss?.(meta);
+    const existing = inFlight.get(cacheKey);
+    if (existing) {
+      return existing;
+    }
+    const request = (async () => {
+      const result = await call();
+      const canCache = shouldCache ? shouldCache(result) : true;
+      if (canCache) {
+        await cache.set(cacheKey, result, ttlSeconds);
+        onSet?.(meta);
+      }
+      return result;
+    })();
+    inFlight.set(cacheKey, request);
+    try {
+      return await request;
+    } finally {
+      inFlight.delete(cacheKey);
+    }
+  } catch (error) {
+    onError?.(error, meta);
+    throw error;
+  }
+};
+export {
+  createCacheKey,
+  createMemoryCache,
+  memoryCache,
+  withPromptCache
+};

package/package.json ADDED Viewed

@@ -0,0 +1,62 @@
+{
+  "name": "@mostafa.hanafy/prompt-cache",
+  "version": "0.1.0",
+  "description": "Lightweight caching layer for LLM API calls with deterministic keys and in-flight deduplication.",
+  "license": "MIT",
+  "type": "module",
+  "main": "./dist/index.cjs",
+  "module": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "import": "./dist/index.js",
+      "require": "./dist/index.cjs"
+    }
+  },
+  "files": [
+    "dist",
+    "README.md"
+  ],
+  "sideEffects": false,
+  "engines": {
+    "node": ">=18"
+  },
+  "scripts": {
+    "build": "tsup src/index.ts --format esm,cjs --dts --clean",
+    "dev": "tsup src/index.ts --format esm,cjs --dts --watch",
+    "test": "bun test",
+    "test:watch": "bun test --watch",
+    "typecheck": "tsc --noEmit -p tsconfig.json",
+    "lint": "biome lint src tests bench example.ts",
+    "biome": "biome check src tests bench example.ts README.md package.json tsconfig.json",
+    "format": "biome format --write .",
+    "bench": "bun bench/overhead.ts",
+    "prepublishOnly": "bun run test && bun run typecheck && bun run lint && bun run build"
+  },
+  "funding": {
+    "type": "individual",
+    "url": "https://buymeacoffee.com/mostafahanafy"
+  },
+  "publishConfig": {
+    "access": "public"
+  },
+  "keywords": [
+    "ai",
+    "llm",
+    "cache",
+    "prompt-cache",
+    "openai",
+    "anthropic",
+    "ai-performance",
+    "ai-cost",
+    "typescript",
+    "nodejs"
+  ],
+  "devDependencies": {
+    "@biomejs/biome": "^1.9.4",
+    "@types/bun": "^1.3.11",
+    "tsup": "^8.3.6",
+    "typescript": "^5.7.3"
+  }
+}