npm - pi-cache-optimizer - Versions diffs - 2.5.1 → 2.5.3 - Mend

pi-cache-optimizer 2.5.1 → 2.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/index.ts CHANGED Viewed

@@ -4,6 +4,39 @@ import { homedir } from "node:os";
 import { dirname, join } from "node:path";
 import type { BuildSystemPromptOptions, ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
+type MutableEnv = Record<string, string | undefined>;
+type CacheRetentionEnvSnapshot = {
+  wasSet: boolean;
+  value?: string;
+};
+const PI_CACHE_RETENTION_ENV = "PI_CACHE_RETENTION";
+const LONG_CACHE_RETENTION_VALUE = "long";
+function captureCacheRetentionEnv(env: MutableEnv = process.env): CacheRetentionEnvSnapshot {
+  return {
+    wasSet: Object.prototype.hasOwnProperty.call(env, PI_CACHE_RETENTION_ENV),
+    value: env[PI_CACHE_RETENTION_ENV],
+  };
+}
+function requestLongCacheRetention(env: MutableEnv = process.env): void {
+  if (!env[PI_CACHE_RETENTION_ENV] || env[PI_CACHE_RETENTION_ENV] !== LONG_CACHE_RETENTION_VALUE) {
+    env[PI_CACHE_RETENTION_ENV] = LONG_CACHE_RETENTION_VALUE;
+  }
+}
+function restoreCacheRetentionEnv(snapshot: CacheRetentionEnvSnapshot, env: MutableEnv = process.env): void {
+  if (snapshot.wasSet) {
+    env[PI_CACHE_RETENTION_ENV] = snapshot.value;
+  } else {
+    delete env[PI_CACHE_RETENTION_ENV];
+  }
+}
+const STARTUP_CACHE_RETENTION_ENV = captureCacheRetentionEnv();
 /**
  * Pi Cache Optimizer (formerly pi-deepseek-cache-optimizer)
  *
@@ -19,10 +52,9 @@ import type { BuildSystemPromptOptions, ExtensionAPI, ExtensionContext } from "@
 // ============================================================
 // Automatically request long prompt-cache retention when Pi supports it.
+// /cache-optimizer disable restores the startup value for this Pi process.
 // ============================================================
-if (!process.env.PI_CACHE_RETENTION || process.env.PI_CACHE_RETENTION !== "long") {
-  process.env.PI_CACHE_RETENTION = "long";
-}
+requestLongCacheRetention();
 type PiModel = NonNullable<ExtensionContext["model"]>;
 type UnknownRecord = Record<string, unknown>;
@@ -40,6 +72,8 @@ const OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH = 64;
 const NO_SKILL_COMPRESSION_ENV = "PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION";
 const NO_PROMPT_REWRITE_ENV = "PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE";
+let runtimeOptimizerEnabled = true;
 // WORM-flag: if optimizeSystemPrompt ever detects that its blind-replace
 // logic has accidentally truncated a structural marker (any XML tag or
 // HTML comment boundary marker present in the original prompt), we flip
@@ -657,11 +691,46 @@ function isDisabledEnv(value: string | undefined): boolean {
 }
 function shouldInjectOpenAIPromptCacheKey(): boolean {
+  if (!runtimeOptimizerEnabled) return false;
   if (isEnabledEnv(process.env[NO_OPENAI_CACHE_KEY_ENV])) return false;
   if (isDisabledEnv(process.env[OPENAI_CACHE_KEY_ENV])) return false;
   return true;
 }
+function setRuntimeOptimizerEnabled(enabled: boolean, env: MutableEnv = process.env): void {
+  runtimeOptimizerEnabled = enabled;
+  if (enabled) {
+    requestLongCacheRetention(env);
+  } else {
+    restoreCacheRetentionEnv(STARTUP_CACHE_RETENTION_ENV, env);
+  }
+}
+function isRuntimeOptimizerEnabled(): boolean {
+  return runtimeOptimizerEnabled;
+}
+function getOptimizerRuntimeModeLines(): string[] {
+  const state = runtimeOptimizerEnabled ? "enabled" : "disabled";
+  const lines: string[] = [];
+  lines.push(`Runtime state: ${state}`);
+  lines.push(`• Prompt rewrite: ${runtimeOptimizerEnabled && !isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV]) ? "on" : "off"}`);
+  lines.push(`• OpenAI prompt_cache_key fallback: ${shouldInjectOpenAIPromptCacheKey() ? "on" : "off"}`);
+  lines.push(`• Footer cache stats: on${runtimeOptimizerEnabled ? "" : " (comparison mode)"}`);
+  lines.push(`• Compat warnings: ${runtimeOptimizerEnabled ? "on" : "off"}`);
+  lines.push(`• ${PI_CACHE_RETENTION_ENV}: ${process.env[PI_CACHE_RETENTION_ENV] ?? "(unset)"}`);
+  if (!runtimeOptimizerEnabled) {
+    lines.push("This is a current-process switch. Run /reload or restart Pi to return to startup behavior.");
+  } else if (isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV]) || !shouldInjectOpenAIPromptCacheKey()) {
+    lines.push("Some features are still disabled by environment variables.");
+  }
+  return lines;
+}
+function formatOptimizerRuntimeMode(): string {
+  return getOptimizerRuntimeModeLines().join("\n");
+}
 function isAssistantMessage(message: unknown): boolean {
   return asRecord(message)?.role === "assistant";
 }
@@ -1443,6 +1512,40 @@ function describeMissingOpenAICompatibleProxyCompat(model: PiModel): string[] {
   return missing;
 }
+function buildSafeOpenAIProxyCompatSuggestion(missing: string[]): Record<string, boolean> {
+  const suggestion: Record<string, boolean> = {};
+  if (missing.includes("sendSessionAffinityHeaders")) {
+    suggestion.sendSessionAffinityHeaders = true;
+  }
+  return suggestion;
+}
+function getPromptCacheRetentionUnsupportedHint(): string {
+  return "If this channel returns `400 Unsupported parameter: prompt_cache_retention`, remove/avoid `supportsLongCacheRetention`; this extension does not write that field directly, but Pi may send it when long retention is requested and compat says the proxy supports it.";
+}
+function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[], options: { includeJsonIntro?: boolean } = {}): void {
+  const suggestion = buildSafeOpenAIProxyCompatSuggestion(missing);
+  const hasSafeSuggestion = Object.keys(suggestion).length > 0;
+  if (hasSafeSuggestion) {
+    if (options.includeJsonIntro !== false) {
+      lines.push("Safe default suggestion:");
+    }
+    lines.push(JSON.stringify(suggestion, null, 2));
+  } else if (missing.includes("supportsLongCacheRetention")) {
+    lines.push("No safe automatic JSON change is recommended for `supportsLongCacheRetention`.");
+  }
+  if (missing.includes("sendSessionAffinityHeaders")) {
+    lines.push("- sendSessionAffinityHeaders: recommended for third-party proxies when supported; it helps keep one Pi session on the same upstream/backend.");
+  }
+  if (missing.includes("supportsLongCacheRetention")) {
+    lines.push("- supportsLongCacheRetention: optional. Enable only after your endpoint/proxy explicitly supports OpenAI long prompt cache retention.");
+    lines.push(`- ${getPromptCacheRetentionUnsupportedHint()}`);
+  }
+}
 /**
  * Build the warning text displayed to users when an OpenAI-family third-party
  * proxy is missing one or more cache/session-affinity compat flags.
@@ -1456,11 +1559,6 @@ function describeMissingOpenAICompatibleProxyCompat(model: PiModel): string[] {
  * exercise it via __internals_for_tests.
  */
 function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): string {
-  const suggestion: Record<string, boolean> = {};
-  for (const flag of missing) {
-    suggestion[flag] = true;
-  }
   // Extract provider id from the model key (e.g. "otokapi/gpt-5.5" -> "otokapi").
   // If no slash is found, fall back to the key itself.
   const slashIdx = key.indexOf("/");
@@ -1469,19 +1567,11 @@ function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): stri
   const modelsJsonPath = getModelsJsonDisplayPath();
   const lines: string[] = [
     `💡 pi-cache-optimizer: ${key} is a third-party GPT/OpenAI-compatible proxy but merged compat lacks ${missing.join(" and ")}.`,
-    `Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (at the same level as baseUrl/api/apiKey/models):`,
-    ``,
-    JSON.stringify(suggestion, null, 2),
+    `Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (at the same level as baseUrl/api/apiKey/models).`,
     ``,
   ];
-  for (const flag of missing) {
-    if (flag === "supportsLongCacheRetention") {
-      lines.push("- supportsLongCacheRetention: confirm your endpoint or proxy supports long prompt cache retention.");
-    } else if (flag === "sendSessionAffinityHeaders") {
-      lines.push("- sendSessionAffinityHeaders: keeps requests on the same backend for proxy cache locality (session affinity).");
-    }
-  }
+  appendOpenAIProxyCompatAdviceLines(lines, missing);
   return lines.join("\n");
 }
@@ -3070,7 +3160,10 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
       "   • Return cache usage fields (prompt_cache_hit_tokens, etc.) in the response.",
     );
     notes.push(
-      `   Example compat: { "sendSessionAffinityHeaders": true, "supportsLongCacheRetention": true }`,
+      `   Safe compat default: { "sendSessionAffinityHeaders": true }`,
+    );
+    notes.push(
+      `   Add supportsLongCacheRetention only if the proxy explicitly supports prompt_cache_retention.`,
     );
     return notes;
@@ -3103,7 +3196,7 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
   return notes;
 }
-function buildDoctorDiagnosis(model: PiModel): string {
+function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400?: boolean } = {}): string {
   const lines: string[] = [];
   lines.push(`Provider: ${model.provider}`);
   lines.push(`Model:    ${model.id}`);
@@ -3120,16 +3213,25 @@ function buildDoctorDiagnosis(model: PiModel): string {
     const key = modelKey(model);
     const slashIdx = key.indexOf("/");
     const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
-    const suggestion = Object.fromEntries(missing.map((f) => [f, true]));
     const modelsJsonPath = getModelsJsonDisplayPath();
-    lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (same level as baseUrl/api/apiKey/models):`);
-    lines.push(JSON.stringify(suggestion, null, 2));
+    lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (same level as baseUrl/api/apiKey/models).`);
+    appendOpenAIProxyCompatAdviceLines(lines, missing);
   } else if (isCompatCheckApplicable(model)) {
     lines.push("✅ Compat fully configured.");
   } else {
     lines.push("ℹ️ Compat check not applicable for this model.");
   }
+  if (isCompatCheckApplicable(model) && compat.supportsLongCacheRetention === true) {
+    lines.push("");
+    if (options.promptCacheRetention400) {
+      lines.push("⚠️  A 400 response was observed while supportsLongCacheRetention is enabled.");
+      lines.push(`   ${getPromptCacheRetentionUnsupportedHint()}`);
+    } else {
+      lines.push(`ℹ️ Long retention is enabled. ${getPromptCacheRetentionUnsupportedHint()}`);
+    }
+  }
   // ── Router/channel diagnostics ──
   const routerNotes = describeRouterChannelDiagnostics(model);
   if (routerNotes.length > 0) {
@@ -3280,22 +3382,22 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
   if (missing.length > 0) {
     const slashIdx = key.indexOf("/");
     const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
-    const suggestion = Object.fromEntries(missing.map((f) => [f, true]));
     const modelsJsonPath = getModelsJsonDisplayPath();
     lines.push(`Active model: ${key}`);
     lines.push(`Missing: ${missing.join(", ")}`);
     lines.push("");
     lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
-    lines.push(`(at the same level as baseUrl/api/apiKey/models) and add:`);
-    lines.push(JSON.stringify(suggestion, null, 2));
-    lines.push("");
-    lines.push(`Only enable if your endpoint supports them.`);
+    lines.push(`(at the same level as baseUrl/api/apiKey/models).`);
+    appendOpenAIProxyCompatAdviceLines(lines, missing);
   }
   // When compat is fully configured but router notes exist, prefix the status.
   if (routerNotes.length > 0 && missing.length === 0) {
     if (isCompatCheckApplicable(model)) {
       lines.push("✅ Compat fully configured.");
+      if (getCompat(model).supportsLongCacheRetention === true) {
+        lines.push(getPromptCacheRetentionUnsupportedHint());
+      }
     } else {
       lines.push("ℹ️ Compat check not applicable for this model.");
     }
@@ -3339,6 +3441,8 @@ export const __internals_for_tests = {
   isOpenAIFamilyToken,
   describeMissingOpenAIFamilyProxyCompat,
   describeMissingOpenAICompatibleProxyCompat,
+  buildSafeOpenAIProxyCompatSuggestion,
+  getPromptCacheRetentionUnsupportedHint,
   isOfficialOpenAIBaseUrl,
   // Non-GPT OpenAI-compatible model detection
   isKimiLikeModel,
@@ -3454,6 +3558,15 @@ export const __internals_for_tests = {
   modelKey,
   // Platform-friendly path helper
   getModelsJsonDisplayPath,
+  captureCacheRetentionEnv,
+  requestLongCacheRetention,
+  restoreCacheRetentionEnv,
+  setRuntimeOptimizerEnabled,
+  isRuntimeOptimizerEnabled,
+  getOptimizerRuntimeModeLines,
+  formatOptimizerRuntimeMode,
+  PI_CACHE_RETENTION_ENV,
+  LONG_CACHE_RETENTION_VALUE,
   // Integrity diagnostics
   getLastPromptIntegrityWarningAt,
   // Diagnostic command helpers
@@ -3492,6 +3605,8 @@ export const __internals_for_tests = {
 export default function (pi: ExtensionAPI) {
   const warnedModels = new Set<string>();
+  const promptCacheRetention400Models = new Set<string>();
+  const warnedPromptCacheRetention400Models = new Set<string>();
   let cacheStatsByModel: Record<string, CacheStats> = {};
   let cacheStatsLegacyFamily: Partial<Record<CacheProviderId, CacheStats>> = emptyAllCacheStats();
   let lastStatusText: string | undefined;
@@ -3582,6 +3697,17 @@ export default function (pi: ExtensionAPI) {
     return created;
   }
+  function resetCurrentSessionStats(): void {
+    const prefix = `${currentSessionHash || "_nosession"}:`;
+    for (const key of Object.keys(cacheStatsByModel)) {
+      if (key.startsWith(prefix)) delete cacheStatsByModel[key];
+    }
+    for (const key of Array.from(recentSamplesByModelKey.keys())) {
+      if (key.startsWith(prefix)) recentSamplesByModelKey.delete(key);
+    }
+    lastStatusText = undefined;
+  }
   async function persistCacheStats(ctx?: ExtensionContext): Promise<void> {
     try {
       await writePersistedCacheStats(getCacheStatsState(), currentSessionHashSet ? currentSessionHash : undefined);
@@ -3745,7 +3871,8 @@ export default function (pi: ExtensionAPI) {
       // cacheStatsByModel[sessionModelKey(model)] on first use.
       const sk = model ? sessionModelKey(model) : undefined;
       const stats = sk ? cacheStatsByModel[sk] : undefined;
-      statusText = formatCacheStats(adapter, stats ?? emptyCacheStats());
+      const statsText = formatCacheStats(adapter, stats ?? emptyCacheStats());
+      statusText = runtimeOptimizerEnabled ? statsText : `Cache Optimizer disabled · ${statsText}`;
     }
     // If optimizeSystemPrompt detected structural truncation on this or
@@ -3779,7 +3906,7 @@ export default function (pi: ExtensionAPI) {
     // update so the marker persists through stats changes and day
     // rollovers. Redundant setStatus calls are blocked by the
     // `lastStatusText` early return above.
-    if (statusText !== undefined && model) {
+    if (runtimeOptimizerEnabled && statusText !== undefined && model) {
       const compatMissing = describeMissingOpenAICompatibleProxyCompat(model);
       if (compatMissing.length > 0) {
         statusText = statusText + " ⚠️ compat";
@@ -3794,12 +3921,12 @@ export default function (pi: ExtensionAPI) {
   pi.on("session_start", async (event, ctx) => {
     await restoreCacheStats(event.reason, ctx);
-    notifyCacheCompatIfNeeded(ctx.model, ctx, warnedModels);
+    if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(ctx.model, ctx, warnedModels);
     await publishStatus(ctx);
   });
   pi.on("model_select", async (event, ctx) => {
-    notifyCacheCompatIfNeeded(event.model, ctx, warnedModels);
+    if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(event.model, ctx, warnedModels);
     await publishStatus(ctx, event.model);
   });
@@ -3838,6 +3965,8 @@ export default function (pi: ExtensionAPI) {
       }
     }
+    if (!runtimeOptimizerEnabled) return {};
     // Global opt-out: PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 bypasses all
     // prompt mutations below (session-overview churn strip, skill compression,
     // and stable-prefix reordering). Footer stats and the OpenAI
@@ -3894,6 +4023,25 @@ export default function (pi: ExtensionAPI) {
     return addOpenAIPromptCacheKey(event.payload, getSessionPromptCacheKey(ctx));
   });
+  pi.on("after_provider_response", (event, ctx) => {
+    const model = ctx.model;
+    if (!runtimeOptimizerEnabled || !model) return;
+    if (event.status !== 400) return;
+    if (!isCompatCheckApplicable(model)) return;
+    if (getCompat(model).supportsLongCacheRetention !== true) return;
+    const key = modelKey(model);
+    promptCacheRetention400Models.add(key);
+    if (warnedPromptCacheRetention400Models.has(key)) return;
+    warnedPromptCacheRetention400Models.add(key);
+    ctx.ui.notify(
+      `⚠️ ${LOG_PREFIX}: ${key} returned HTTP 400 while supportsLongCacheRetention is enabled. ` +
+      getPromptCacheRetentionUnsupportedHint() +
+      ` Run /cache-optimizer doctor for the exact edit location.`,
+      "warning",
+    );
+  });
   pi.on("message_end", async (event, ctx) => {
     const adapter = selectAdapterForAssistantMessage(event.message, ctx.model);
     if (!adapter) return;
@@ -3929,6 +4077,8 @@ export default function (pi: ExtensionAPI) {
   // ────────────────────────────────────────────────────────────────
   // Register /cache-optimizer command
   // Subcommands:
+  //   enable  — enable runtime prompt/cache optimizations for this process
+  //   disable — disable runtime prompt/cache optimizations for this process
   //   doctor  — show current model/provider/api/baseUrl/compat status
   //             with low-hit diagnosis
   //   stats   — show active model stats bucket, recent trend, usage
@@ -3942,12 +4092,24 @@ export default function (pi: ExtensionAPI) {
       const model = cmdCtx.model;
       const subcommand = args.trim().toLowerCase().split(/\s+/)[0] || "help";
-      if (subcommand === "doctor") {
+      if (subcommand === "enable") {
+        setRuntimeOptimizerEnabled(true);
+        resetCurrentSessionStats();
+        await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
+        await publishStatus(cmdCtx as unknown as ExtensionContext, model);
+        cmdCtx.ui.notify(`✅ Pi Cache Optimizer enabled for this Pi process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
+      } else if (subcommand === "disable") {
+        setRuntimeOptimizerEnabled(false);
+        resetCurrentSessionStats();
+        await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
+        await publishStatus(cmdCtx as unknown as ExtensionContext, model);
+        cmdCtx.ui.notify(`⏸️ Pi Cache Optimizer disabled for this Pi process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
+      } else if (subcommand === "doctor") {
         if (!model) {
           cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
           return;
         }
-        const diagnosis = buildDoctorDiagnosis(model);
+        const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
         const adapter = selectAdapterForModel(model);
         const sk = model ? sessionModelKey(model) : undefined;
         const statsState = sk ? cacheStatsByModel[sk] : undefined;
@@ -4020,18 +4182,32 @@ export default function (pi: ExtensionAPI) {
         // Try interactive selection menu when UI supports it
         if (cmdCtx.hasUI) {
           const menuOptions = [
-            "🩺 Doctor — Show current model cache configuration",
-            "📊 Stats — Show active model stats bucket and trend",
-            "⚙️  Compat — Show compat suggestion with edit instructions",
-            "🔄 Reset — Reset local session stats for current model",
-            "❌ Cancel",
+            "Enable — Turn on runtime optimizations",
+            "Disable — Turn off runtime optimizations",
+            "Doctor — Show cache configuration",
+            "Stats — Show cache stats and trend",
+            "Compat — Show compat suggestion",
+            "Reset — Reset local session stats",
+            "Cancel",
           ];
           const choice = await cmdCtx.ui.select("Cache Optimizer", menuOptions);
           if (choice === menuOptions[0]) {
+            setRuntimeOptimizerEnabled(true);
+            resetCurrentSessionStats();
+            await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
+            await publishStatus(cmdCtx as unknown as ExtensionContext, model);
+            cmdCtx.ui.notify(`✅ Pi Cache Optimizer enabled for this Pi process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
+          } else if (choice === menuOptions[1]) {
+            setRuntimeOptimizerEnabled(false);
+            resetCurrentSessionStats();
+            await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
+            await publishStatus(cmdCtx as unknown as ExtensionContext, model);
+            cmdCtx.ui.notify(`⏸️ Pi Cache Optimizer disabled for this Pi process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
+          } else if (choice === menuOptions[2]) {
             if (!model) {
               cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
             } else {
-              const diagnosis = buildDoctorDiagnosis(model);
+              const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
               const adapter = selectAdapterForModel(model);
               const sk = model ? sessionModelKey(model) : undefined;
               const statsState = sk ? cacheStatsByModel[sk] : undefined;
@@ -4042,7 +4218,7 @@ export default function (pi: ExtensionAPI) {
                 : diagnosis;
               cmdCtx.ui.notify(fullDiagnosis, "info");
             }
-          } else if (choice === menuOptions[1]) {
+          } else if (choice === menuOptions[3]) {
             if (!model) {
               cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
             } else {
@@ -4053,7 +4229,7 @@ export default function (pi: ExtensionAPI) {
               const output = buildStatsOutput(model, adapter, statsState, samples);
               cmdCtx.ui.notify(output, "info");
             }
-          } else if (choice === menuOptions[2]) {
+          } else if (choice === menuOptions[4]) {
             if (!model) {
               cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
             } else {
@@ -4069,7 +4245,7 @@ export default function (pi: ExtensionAPI) {
                 );
               }
             }
-          } else if (choice === menuOptions[3]) {
+          } else if (choice === menuOptions[5]) {
             if (!model) {
               cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
             } else {
@@ -4098,11 +4274,15 @@ export default function (pi: ExtensionAPI) {
         // Fallback: text help when no interactive UI
         const diagnosis: string[] = [];
         diagnosis.push("📋 /cache-optimizer commands:");
+        diagnosis.push("  enable  — Enable prompt/cache optimizations for this Pi process");
+        diagnosis.push("  disable — Disable prompt/cache optimizations for this Pi process");
         diagnosis.push("  doctor  — Show current model/provider/api/baseUrl/compat and low-hit diagnosis");
         diagnosis.push("  stats   — Show active model stats bucket and recent trend");
         diagnosis.push("  compat  — Show compat suggestion with edit location");
         diagnosis.push("  reset   — Reset local session stats for current model (does not affect upstream)");
         diagnosis.push("");
+        diagnosis.push(formatOptimizerRuntimeMode());
+        diagnosis.push("");
         if (model) {
           const displayKey = modelKey(model);
           const missing = describeMissingOpenAICompatibleProxyCompat(model);

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "pi-cache-optimizer",
-  "version": "2.5.1",
-  "description": "Pi extension that improves provider-side KV/prompt cache hit rates (DeepSeek, OpenAI, Claude, Gemini) by reordering the system prompt, requesting long retention, and showing footer cache stats. Renamed from pi-deepseek-cache-optimizer.",
+  "version": "2.5.3",
+  "description": "Improve Pi prompt/KV cache hit rates with stable prompts, OpenAI-compatible cache keys, proxy compat warnings, and footer cache stats.",
   "keywords": [
     "pi-package",
     "deepseek",