npm - pi-cache-optimizer - Versions diffs - 2.4.0 → 2.4.2 - Mend

pi-cache-optimizer 2.4.0 → 2.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -82,6 +82,7 @@ After installation, `PI_CACHE_RETENTION=long` is applied automatically, the syst
 | Env var | Effect |
 |---------|--------|
+| `PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1` | Skip all `before_agent_start` prompt mutations (churn strip, skill compression, stable-prefix reorder); footer stats and `prompt_cache_key` fallback remain active |
 | `PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION=1` | Keep pi's verbose `<available_skills>` XML (opt out of one-line index) |
 | `PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY=0` | Disable the OpenAI-family `prompt_cache_key` fallback (default is enabled) |
 | `PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY=1` | Disable the OpenAI-family `prompt_cache_key` fallback |

package/README.zh-CN.md CHANGED Viewed

@@ -85,6 +85,7 @@ pi install npm:pi-cache-optimizer
 | 环境变量 | 作用 |
 |---------|------|
+| `PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1` | 跳过所有 `before_agent_start` prompt 修改（session-overview 字段剥离、skills 压缩、稳定前缀重排）；底部统计和 `prompt_cache_key` 兜底仍然生效 |
 | `PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION=1` | 保留 pi 的 verbose `<available_skills>` XML（退出一行索引模式） |
 | `PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY=0` | 禁用 OpenAI-family `prompt_cache_key` 兜底（默认启用） |
 | `PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY=1` | 禁用 OpenAI-family `prompt_cache_key` 兜底 |

package/index.ts CHANGED Viewed

@@ -37,6 +37,7 @@ const OPENAI_CACHE_KEY_ENV = "PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY";
 const NO_OPENAI_CACHE_KEY_ENV = "PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY";
 const OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH = 64;
 const NO_SKILL_COMPRESSION_ENV = "PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION";
+const NO_PROMPT_REWRITE_ENV = "PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE";
 // WORM-flag: if optimizeSystemPrompt ever detects that its blind-replace
 // logic has accidentally truncated a structural marker (any XML tag or
@@ -102,6 +103,18 @@ type PersistedCacheStatsV2 = {
   statsByProvider: Partial<Record<CacheProviderId, CacheStats>>;
 };
+/** Per-model-key scoped state. Used in memory and for v3 persistence. */
+type CacheStatsState = {
+  statsByModel: Record<string, CacheStats>;
+  legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
+};
+type PersistedCacheStatsV3 = {
+  version: 3;
+  statsByModel: Record<string, CacheStats>;
+  legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
+};
 type UsageSnapshot = {
   cacheRead: number;
   cacheWrite: number;
@@ -831,7 +844,7 @@ function describeMissingOpenAIFamilyProxyCompat(model: PiModel): string[] {
   const missing: string[] = [];
   if (!isOpenAIFamilyModel(model)) return missing;
-  if (model.api !== "openai-completions") return missing;
+  if (lower(model.api) !== "openai-completions") return missing;
   if (isOfficialOpenAIBaseUrl(model)) return missing;
   if (compat.supportsLongCacheRetention !== true) {
@@ -844,6 +857,43 @@ function describeMissingOpenAIFamilyProxyCompat(model: PiModel): string[] {
   return missing;
 }
+/**
+ * Build the warning text displayed to users when an OpenAI-family third-party
+ * proxy is missing one or more cache/session-affinity compat flags.
+ *
+ * The returned string contains a parseable JSON object (via JSON.stringify)
+ * listing only the missing flags with recommended value `true`. Inline
+ * explanations for each flag follow the JSON snippet as separate prose lines,
+ * so the JSON remains valid and copyable.
+ *
+ * Expected use: the openai adapter's warningText calls this function; tests
+ * exercise it via __internals_for_tests.
+ */
+function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): string {
+  const suggestion: Record<string, boolean> = {};
+  for (const flag of missing) {
+    suggestion[flag] = true;
+  }
+  const lines: string[] = [
+    `💡 pi-cache-optimizer: ${key} is a third-party GPT/OpenAI-compatible proxy but merged compat lacks ${missing.join(" and ")}.`,
+    `Add under the model's compat in ~/.pi/agent/models.json (only if the endpoint supports them):`,
+    ``,
+    JSON.stringify(suggestion, null, 2),
+    ``,
+  ];
+  for (const flag of missing) {
+    if (flag === "supportsLongCacheRetention") {
+      lines.push("- supportsLongCacheRetention: confirm your endpoint or proxy supports long prompt cache retention.");
+    } else if (flag === "sendSessionAffinityHeaders") {
+      lines.push("- sendSessionAffinityHeaders: keeps requests on the same backend for proxy cache locality (session affinity).");
+    }
+  }
+  return lines.join("\n");
+}
 function describeMissingDeepSeekCompat(model: PiModel): string[] {
   const compat = getCompat(model);
   const missing: string[] = [];
@@ -923,11 +973,7 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
     warningText(model) {
       const missing = describeMissingOpenAIFamilyProxyCompat(model);
       if (missing.length === 0) return undefined;
-      return (
-        `💡 pi-cache-optimizer: ${modelKey(model)} looks like a third-party GPT/OpenAI-compatible proxy but merged compat lacks ${missing.join(" and ")}. ` +
-        `For better cache locality, add compat: { "supportsLongCacheRetention": true, "sendSessionAffinityHeaders": true } in ~/.pi/agent/models.json when the endpoint supports these fields.`
-      );
+      return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
     },
   },
   {
@@ -1061,30 +1107,56 @@ function parseCacheStats(value: unknown): CacheStats | undefined {
   };
 }
-function parsePersistedCacheStats(value: unknown): Partial<Record<CacheProviderId, CacheStats>> | undefined {
+function parsePersistedCacheStats(value: unknown): CacheStatsState | undefined {
   const record = asRecord(value);
   if (!record) return undefined;
-  if (record.version === 1) {
-    const migrated = parseCacheStats(record.stats);
-    return migrated ? { deepseek: migrated } : undefined;
-  }
+  // version 3: model-scoped stats + legacy family fallback
+  if (record.version === 3) {
+    const statsByModel: Record<string, CacheStats> = {};
+    const rawModelMap = asRecord(record.statsByModel);
+    if (rawModelMap) {
+      for (const [key, val] of Object.entries(rawModelMap)) {
+        const parsed = parseCacheStats(val);
+        if (parsed) statsByModel[key] = parsed;
+      }
+    }
-  if (record.version !== 2) return undefined;
+    const legacyFamily: Partial<Record<CacheProviderId, CacheStats>> = {};
+    const rawFamily = asRecord(record.legacyFamily);
+    if (rawFamily) {
+      for (const id of CACHE_PROVIDER_IDS) {
+        const stats = parseCacheStats(rawFamily[id]);
+        if (stats) legacyFamily[id] = stats;
+      }
+    }
-  const statsByProvider = asRecord(record.statsByProvider);
-  if (!statsByProvider) return undefined;
+    return { statsByModel, legacyFamily };
+  }
-  const parsed: Partial<Record<CacheProviderId, CacheStats>> = {};
-  for (const id of CACHE_PROVIDER_IDS) {
-    const stats = parseCacheStats(statsByProvider[id]);
-    if (stats) parsed[id] = stats;
+  // version 2: migrate statsByProvider into legacyFamily
+  if (record.version === 2) {
+    const statsByProvider = asRecord(record.statsByProvider);
+    const legacyFamily: Partial<Record<CacheProviderId, CacheStats>> = {};
+    if (statsByProvider) {
+      for (const id of CACHE_PROVIDER_IDS) {
+        const stats = parseCacheStats(statsByProvider[id]);
+        if (stats) legacyFamily[id] = stats;
+      }
+    }
+    return { statsByModel: {}, legacyFamily };
   }
-  return parsed;
+  // version 1: single DeepSeek stats -> migrate to legacyFamily.deepseek
+  if (record.version === 1) {
+    const migrated = parseCacheStats(record.stats);
+    return migrated ? { statsByModel: {}, legacyFamily: { deepseek: migrated } } : undefined;
+  }
+  return undefined;
 }
-async function readPersistedCacheStats(): Promise<Partial<Record<CacheProviderId, CacheStats>> | undefined> {
+async function readPersistedCacheStats(): Promise<CacheStatsState | undefined> {
   try {
     const raw = await readFile(STATE_FILE_PATH, "utf8");
     return parsePersistedCacheStats(JSON.parse(raw));
@@ -1124,9 +1196,13 @@ async function readPersistedCacheStats(): Promise<Partial<Record<CacheProviderId
   return undefined;
 }
-async function writePersistedCacheStats(statsByProvider: Partial<Record<CacheProviderId, CacheStats>>): Promise<void> {
+async function writePersistedCacheStats(state: CacheStatsState): Promise<void> {
   await mkdir(STATE_DIR, { recursive: true });
-  const payload: PersistedCacheStatsV2 = { version: 2, statsByProvider };
+  const payload: PersistedCacheStatsV3 = {
+    version: 3,
+    statsByModel: state.statsByModel,
+    legacyFamily: state.legacyFamily,
+  };
   const tempPath = `${STATE_FILE_PATH}.${process.pid}.${Date.now()}.tmp`;
   await writeFile(tempPath, JSON.stringify(payload, null, 2) + "\n", "utf8");
@@ -1148,6 +1224,8 @@ export const __internals_for_tests = {
   compressSkillsInSystemPrompt,
   MIN_STABLE_CANDIDATE_LENGTH,
   SKILL_COMPRESSION_MIN_COUNT,
+  NO_PROMPT_REWRITE_ENV,
+  isEnabledEnv,
   // OpenAI-family cache-key helpers
   addOpenAIPromptCacheKey,
   clampPromptCacheKey,
@@ -1160,30 +1238,64 @@ export const __internals_for_tests = {
   isOpenAIFamilyToken,
   describeMissingOpenAIFamilyProxyCompat,
   isOfficialOpenAIBaseUrl,
+  buildOpenAIProxyCompatWarningText,
   getModelIdNameTokenValues,
   getAssistantMessageModelTokenValues,
   getCompat,
   modelKey,
+  // Cache stats helpers (module-level, usable from verify script)
+  addUsageToCacheStats,
+  formatCacheStats,
+  emptyCacheStats,
+  emptyAllCacheStats,
+  parseCacheStats,
+  parsePersistedCacheStats,
 };
 export default function (pi: ExtensionAPI) {
   const warnedModels = new Set<string>();
-  let cacheStatsByProvider: Partial<Record<CacheProviderId, CacheStats>> = emptyAllCacheStats();
+  let cacheStatsByModel: Record<string, CacheStats> = {};
+  let cacheStatsLegacyFamily: Partial<Record<CacheProviderId, CacheStats>> = emptyAllCacheStats();
   let lastStatusText: string | undefined;
   let persistenceWarningShown = false;
+  let persistTimer: ReturnType<typeof setTimeout> | null = null;
+  const PERSIST_DEBOUNCE_MS = 2000;
+  function getCacheStatsState(): CacheStatsState {
+    return { statsByModel: cacheStatsByModel, legacyFamily: cacheStatsLegacyFamily };
+  }
+  /** Look up active stats for a model, falling back to legacy family. */
+  function getStatsForModel(model: PiModel | undefined, adapter: CacheProviderAdapter): CacheStats {
+    if (model) {
+      const key = modelKey(model);
+      const existing = cacheStatsByModel[key];
+      if (existing) return existing;
+    }
-  function getStatsForAdapter(adapter: CacheProviderAdapter): CacheStats {
-    const existing = cacheStatsByProvider[adapter.id];
+    // Fallback: legacy family bucket — used when model key is unknown
+    // or this model hasn't been seen yet in this session.
+    const family = cacheStatsLegacyFamily[adapter.id];
+    if (family) return family;
+    const created = emptyCacheStats();
+    cacheStatsLegacyFamily[adapter.id] = created;
+    return created;
+  }
+  /** Get or create a stats entry for the given model key. */
+  function getOrCreateStatsByModelKey(key: string): CacheStats {
+    const existing = cacheStatsByModel[key];
     if (existing) return existing;
     const created = emptyCacheStats();
-    cacheStatsByProvider[adapter.id] = created;
+    cacheStatsByModel[key] = created;
     return created;
   }
   async function persistCacheStats(ctx?: ExtensionContext): Promise<void> {
     try {
-      await writePersistedCacheStats(cacheStatsByProvider);
+      await writePersistedCacheStats(getCacheStatsState());
     } catch (error) {
       console.warn(`${LOG_PREFIX}: failed to persist cache stats`, error);
       if (!persistenceWarningShown) {
@@ -1196,14 +1308,48 @@ export default function (pi: ExtensionAPI) {
     }
   }
+  /** Schedule a debounced persist. Coalesces rapid message_end writes
+   *  into a single disk write after PERSIST_DEBOUNCE_MS of silence.
+   *  In-memory stats remain instantly up-to-date for the footer; only
+   *  the on-disk persistence is delayed. */
+  function schedulePersistCacheStats(ctx?: ExtensionContext): void {
+    if (persistTimer !== null) clearTimeout(persistTimer);
+    persistTimer = setTimeout(() => {
+      persistTimer = null;
+      persistCacheStats(ctx).catch((err) => {
+        console.warn(`${LOG_PREFIX}: debounced persist failed`, err);
+      });
+    }, PERSIST_DEBOUNCE_MS);
+  }
+  /** Flush any pending debounced persist immediately (cancels timer + writes).
+   *  Used on reload and day-rollover where immediate durability matters. */
+  async function flushPersistCacheStats(ctx?: ExtensionContext): Promise<void> {
+    if (persistTimer !== null) {
+      clearTimeout(persistTimer);
+      persistTimer = null;
+    }
+    await persistCacheStats(ctx);
+  }
   async function rollOverStatsIfNeeded(ctx?: ExtensionContext): Promise<void> {
     const day = currentLocalDay();
     let changed = false;
+    // Roll over per-model entries.
+    for (const key of Object.keys(cacheStatsByModel)) {
+      const stats = cacheStatsByModel[key];
+      if (stats && stats.day !== day) {
+        cacheStatsByModel[key] = emptyCacheStats(day);
+        changed = true;
+      }
+    }
+    // Roll over legacy family entries.
     for (const id of CACHE_PROVIDER_IDS) {
-      const stats = cacheStatsByProvider[id];
+      const stats = cacheStatsLegacyFamily[id];
       if (stats && stats.day !== day) {
-        cacheStatsByProvider[id] = emptyCacheStats(day);
+        cacheStatsLegacyFamily[id] = emptyCacheStats(day);
         changed = true;
       }
     }
@@ -1216,13 +1362,21 @@ export default function (pi: ExtensionAPI) {
   async function restoreCacheStats(reason: string, ctx: ExtensionContext): Promise<void> {
     if (reason === "reload") {
-      cacheStatsByProvider = emptyAllCacheStats();
+      cacheStatsByModel = {};
+      cacheStatsLegacyFamily = emptyAllCacheStats();
       lastStatusText = undefined;
-      await persistCacheStats(ctx);
+      await flushPersistCacheStats(ctx);
       return;
     }
-    cacheStatsByProvider = (await readPersistedCacheStats()) ?? emptyAllCacheStats();
+    const persisted = await readPersistedCacheStats();
+    if (persisted) {
+      cacheStatsByModel = persisted.statsByModel;
+      cacheStatsLegacyFamily = persisted.legacyFamily;
+    } else {
+      cacheStatsByModel = {};
+      cacheStatsLegacyFamily = emptyAllCacheStats();
+    }
     lastStatusText = undefined;
     await rollOverStatsIfNeeded(ctx);
   }
@@ -1231,7 +1385,17 @@ export default function (pi: ExtensionAPI) {
     await rollOverStatsIfNeeded(ctx);
     const adapter = selectAdapterForModel(model);
-    let statusText: string | undefined = adapter ? formatCacheStats(adapter, getStatsForAdapter(adapter)) : undefined;
+    let statusText: string | undefined;
+    if (adapter) {
+      // Display only per-model scoped stats. A model that has never been
+      // used in this session shows 0/0 rather than falling back to legacy
+      // family aggregated stats (which could span different providers with
+      // the same model-family name). The message_end hook populates
+      // cacheStatsByModel[key] on first use with that model.
+      const key = model ? modelKey(model) : undefined;
+      const stats = key ? cacheStatsByModel[key] : undefined;
+      statusText = formatCacheStats(adapter, stats ?? emptyCacheStats());
+    }
     // If optimizeSystemPrompt detected structural truncation on this or
     // a recent turn, flag it once in the footer so the user knows to
@@ -1294,6 +1458,14 @@ export default function (pi: ExtensionAPI) {
       }
     }
+    // Global opt-out: PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 bypasses all
+    // prompt mutations below (session-overview churn strip, skill compression,
+    // and stable-prefix reordering). Footer stats and the OpenAI
+    // prompt_cache_key fallback remain active.
+    if (isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV])) {
+      return {};
+    }
     // Step 1: strip per-turn churn from <session-overview>.
     // Removing RECENT COMMITS, Working directory status, and
     // Journal line count makes more of the session-overview stable
@@ -1351,8 +1523,17 @@ export default function (pi: ExtensionAPI) {
     if (!usage) return;
     await rollOverStatsIfNeeded(ctx);
-    addUsageToCacheStats(getStatsForAdapter(adapter), usage);
-    await persistCacheStats(ctx);
+    // Update stats scoped to the active model (provider/id key).
+    // Falls back to legacy family when ctx.model is undefined.
+    if (ctx.model) {
+      const key = modelKey(ctx.model);
+      addUsageToCacheStats(getOrCreateStatsByModelKey(key), usage);
+    } else {
+      addUsageToCacheStats(getStatsForModel(undefined, adapter), usage);
+    }
+    schedulePersistCacheStats(ctx);
     await publishStatus(ctx);
   });
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-cache-optimizer",
-  "version": "2.4.0",
+  "version": "2.4.2",
   "description": "Pi extension that improves provider-side KV/prompt cache hit rates (DeepSeek, OpenAI, Claude, Gemini) by reordering the system prompt, requesting long retention, and showing footer cache stats. Renamed from pi-deepseek-cache-optimizer.",
   "keywords": [
     "pi-package",