npm - pi-cache-optimizer - Versions diffs - 2.5.4 → 2.5.6 - Mend

pi-cache-optimizer 2.5.4 → 2.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -101,6 +101,42 @@ Notes:
 - For DeepSeek models, the Pi Mono guidance expects `compat.requiresReasoningContentOnAssistantMessages: true` and `compat.thinkingFormat: "deepseek"` alongside cache/session-affinity flags when the endpoint supports them.
 - This extension only advises; it does not edit `models.json`.
+### Channels without a `models.json` provider entry
+Some Pi channels may be available even when there is no provider block in `~/.pi/agent/models.json` yet. Keep existing authentication as-is and do not copy credentials, tokens, or API keys. Add only cache/routing compatibility overrides in `models.json`.
+Provider-level minimal override:
+```json
+{
+  "providers": {
+    "your-provider-id": {
+      "compat": {
+        "sendSessionAffinityHeaders": true
+      }
+    }
+  }
+}
+```
+If only one model should change, use `modelOverrides`:
+```json
+{
+  "providers": {
+    "your-provider-id": {
+      "modelOverrides": {
+        "gpt-5.5": {
+          "compat": {
+            "sendSessionAffinityHeaders": true
+          }
+        }
+      }
+    }
+  }
+}
+```
 ## Footer stats
 Stats are read-only local counters stored at `~/.pi/agent/pi-cache-optimizer-stats.json` and scoped by Pi session + provider/model. They contain only dates and numeric counters — no API keys, prompts, payloads, headers, responses, or model output.

package/README.zh-CN.md CHANGED Viewed

@@ -101,6 +101,42 @@ LiteLLM / OneAPI / NewAPI / 类 OpenRouter 渠道等第三方 `openai-completion
 - 对 DeepSeek 模型，Pi Mono 指南期望在支持时同时设置 `compat.requiresReasoningContentOnAssistantMessages: true` 和 `compat.thinkingFormat: "deepseek"`，再配合缓存 / session-affinity 相关 compat。
 - 本扩展只给建议，不会修改 `models.json`。
+### 没有 `models.json` provider entry 的渠道
+有些 Pi 渠道可用时，`~/.pi/agent/models.json` 里可能还没有对应 provider block。保留现有认证方式，不要复制 credential、token 或 API key。只在 `models.json` 里添加缓存 / 路由兼容覆盖。
+Provider 级最小 override：
+```json
+{
+  "providers": {
+    "your-provider-id": {
+      "compat": {
+        "sendSessionAffinityHeaders": true
+      }
+    }
+  }
+}
+```
+如果只想影响单个模型，用 `modelOverrides`：
+```json
+{
+  "providers": {
+    "your-provider-id": {
+      "modelOverrides": {
+        "gpt-5.5": {
+          "compat": {
+            "sendSessionAffinityHeaders": true
+          }
+        }
+      }
+    }
+  }
+}
+```
 ## Footer 统计
 统计是只读本地计数，保存在 `~/.pi/agent/pi-cache-optimizer-stats.json`，按 Pi session + provider/model 隔离。文件只包含日期和数字计数，不包含 API key、prompt、payload、headers、响应或模型输出。

package/index.ts CHANGED Viewed

@@ -1476,7 +1476,9 @@ function isNonEmptyString(value: unknown): boolean {
 function isOfficialOpenAIBaseUrl(model: PiModel): boolean {
   const value = lower(model.baseUrl).trim();
-  if (!value) return false;
+  if (!value) {
+    return lower(model.provider) === "openai";
+  }
   try {
     return new URL(value).hostname === "api.openai.com";
@@ -1538,7 +1540,76 @@ function getPromptCacheRetentionUnsupportedHint(): string {
   return "If this channel returns `400 Unsupported parameter: prompt_cache_retention`, remove/avoid `supportsLongCacheRetention`; this extension does not write that field directly, but Pi may send it when long retention is requested and compat says the proxy supports it.";
 }
-function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[], options: { includeJsonIntro?: boolean } = {}): void {
+function hasPromptCacheRetentionUnsupportedSignal(headers: Record<string, string> | undefined): boolean {
+  if (!headers) return false;
+  const normalized = Object.entries(headers)
+    .map(([key, value]) => `${lower(key)}: ${lower(value)}`)
+    .join("\n");
+  if (!normalized.includes("prompt_cache_retention")) return false;
+  return [
+    "unsupported parameter",
+    "unsupported_parameter",
+    "unknown parameter",
+    "not supported",
+    "unsupported field",
+  ].some((needle) => normalized.includes(needle));
+}
+type CompatAdvicePlacement = {
+  providerLabel?: string;
+  modelId?: string;
+};
+function buildProviderCompatOverride(providerLabel: string, compat: Record<string, unknown>): Record<string, unknown> {
+  return {
+    providers: {
+      [providerLabel]: {
+        compat,
+      },
+    },
+  };
+}
+function buildModelCompatOverride(providerLabel: string, modelId: string, compat: Record<string, unknown>): Record<string, unknown> {
+  return {
+    providers: {
+      [providerLabel]: {
+        modelOverrides: {
+          [modelId]: {
+            compat,
+          },
+        },
+      },
+    },
+  };
+}
+function appendCredentialSafeProviderGuidance(lines: string[], placement: CompatAdvicePlacement, compatSuggestion: Record<string, unknown>): void {
+  const providerLabel = placement.providerLabel;
+  if (!providerLabel) return;
+  lines.push("");
+  lines.push("If this channel has no models.json provider entry yet:");
+  lines.push("- Keep existing authentication as-is; do not copy credentials, tokens, or API keys.");
+  lines.push(`- Add only cache/routing compat overrides in ${getModelsJsonDisplayPath()}.`);
+  if (Object.keys(compatSuggestion).length === 0) {
+    lines.push("- No safe copyable override is available for the missing flags shown above.");
+    return;
+  }
+  lines.push("Provider-level minimal override:");
+  lines.push(JSON.stringify(buildProviderCompatOverride(providerLabel, compatSuggestion), null, 2));
+  if (placement.modelId) {
+    lines.push("Single-model override (use this if only this model should change):");
+    lines.push(JSON.stringify(buildModelCompatOverride(providerLabel, placement.modelId, compatSuggestion), null, 2));
+  }
+}
+function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[], options: { includeJsonIntro?: boolean } & CompatAdvicePlacement = {}): void {
   const suggestion = buildSafeOpenAIProxyCompatSuggestion(missing);
   const hasSafeSuggestion = Object.keys(suggestion).length > 0;
@@ -1558,6 +1629,8 @@ function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[],
     lines.push("- supportsLongCacheRetention: optional. Enable only after your endpoint/proxy explicitly supports OpenAI long prompt cache retention.");
     lines.push(`- ${getPromptCacheRetentionUnsupportedHint()}`);
   }
+  appendCredentialSafeProviderGuidance(lines, options, suggestion);
 }
 /**
@@ -1577,6 +1650,7 @@ function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): stri
   // If no slash is found, fall back to the key itself.
   const slashIdx = key.indexOf("/");
   const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
+  const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
   const modelsJsonPath = getModelsJsonDisplayPath();
   const lines: string[] = [
@@ -1585,7 +1659,7 @@ function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): stri
     ``,
   ];
-  appendOpenAIProxyCompatAdviceLines(lines, missing);
+  appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId });
   return lines.join("\n");
 }
@@ -1647,7 +1721,7 @@ function buildDeepSeekCompatSuggestion(missing: string[]): Record<string, unknow
   return suggestion;
 }
-function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[]): void {
+function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[], placement: CompatAdvicePlacement = {}): void {
   const suggestion = buildDeepSeekCompatSuggestion(missing);
   if (Object.keys(suggestion).length > 0) {
     lines.push("Recommended DeepSeek compat snippet:");
@@ -1669,11 +1743,14 @@ function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[]): vo
   if (missing.includes("supportsLongCacheRetention")) {
     lines.push("- supportsLongCacheRetention: enable for DeepSeek-compatible endpoints that support long cache retention.");
   }
+  appendCredentialSafeProviderGuidance(lines, placement, suggestion);
 }
 function buildDeepSeekCompatWarningText(key: string, missing: string[]): string {
   const slashIdx = key.indexOf("/");
   const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
+  const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
   const modelsJsonPath = getModelsJsonDisplayPath();
   const lines: string[] = [
     `💡 pi-cache-optimizer: ${key} is DeepSeek-like but merged compat lacks ${missing.join(" and ")}.`,
@@ -1681,7 +1758,7 @@ function buildDeepSeekCompatWarningText(key: string, missing: string[]): string
     "",
   ];
-  appendDeepSeekCompatAdviceLines(lines, missing);
+  appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId });
   return lines.join("\n");
 }
@@ -3001,6 +3078,29 @@ async function readPersistedCacheStats(): Promise<CacheStatsState | undefined> {
   return undefined;
 }
+function filterRestorableStatsForSession(
+  persisted: CacheStatsState | undefined,
+  currentSessionHash?: string,
+): Record<string, CacheStats> {
+  if (!persisted || !currentSessionHash) return {};
+  const prefix = `${currentSessionHash}:`;
+  const filteredModelStats: Record<string, CacheStats> = {};
+  for (const [fullKey, stats] of Object.entries(persisted.statsByModel)) {
+    if (fullKey.startsWith(prefix)) {
+      filteredModelStats[fullKey] = stats;
+    } else if (!fullKey.includes(":")) {
+      // Legacy v3-style key without session hash — migrate to current session.
+      filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
+    } else if (fullKey.startsWith("_nosession:")) {
+      // Transitional _nosession bucket — migrate to current session.
+      filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
+    }
+  }
+  return filteredModelStats;
+}
 /**
  * The closure-internal writer. Since the closure has access to currentSessionHash,
  * it passes the hash and statsByModel here. This function wraps them in the v4
@@ -3326,9 +3426,9 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
     const modelsJsonPath = getModelsJsonDisplayPath();
     lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (same level as baseUrl/api/apiKey/models).`);
     if (deepSeekCompatApplicable) {
-      appendDeepSeekCompatAdviceLines(lines, missing);
+      appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
     } else {
-      appendOpenAIProxyCompatAdviceLines(lines, missing);
+      appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
     }
   } else if (deepSeekCompatApplicable || isCompatCheckApplicable(model)) {
     lines.push("✅ Compat fully configured.");
@@ -3411,6 +3511,13 @@ function buildLowHitDiagnosis(
   const hasRouterRisk = routerNotes.length > 0;
   const hasUsageMissing = missingUsageSamples > 0;
+  // Today's cached-token ratio is used both inside and outside the recent-sample
+  // branch. Keep it block-external so doctor/stats never throw for low-hit
+  // models that have persisted counters but no recent in-memory samples.
+  const todayHitRatio = todayStats.totalInputTokens > 0
+    ? Math.round((todayStats.cachedInputTokens / todayStats.totalInputTokens) * 100)
+    : 0;
   // Determine if there are actual issues worth flagging
   const hasActualIssues = hasMissingCompat || hasUsageMissing ||
     // Low hit trend (today total > 3 and hit ratio < 30%)
@@ -3451,10 +3558,6 @@ function buildLowHitDiagnosis(
   // Priority 4: recent trend low
   if (recent10Total > 0) {
     const hitRatio = recent10Input > 0 ? Math.round((recent10Cached / recent10Input) * 100) : 0;
-    const todayHitRatio = todayStats.totalInputTokens > 0
-      ? Math.round((todayStats.cachedInputTokens / todayStats.totalInputTokens) * 100)
-      : 0;
     if (recent10Hits === 0 && todayStats.totalRequests > 3 && todayHitRatio < 30) {
       lines.push(`📉 Cache hit rate is low: ${todayHitRatio}% today (${recent10Total} recent samples).`);
       lines.push("   Likely causes: proxy routing to different backends per request,");
@@ -3504,9 +3607,9 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
     lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
     lines.push(`(at the same level as baseUrl/api/apiKey/models).`);
     if (deepSeekCompatApplicable) {
-      appendDeepSeekCompatAdviceLines(lines, missing);
+      appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
     } else {
-      appendOpenAIProxyCompatAdviceLines(lines, missing);
+      appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
     }
   }
@@ -3570,6 +3673,7 @@ export const __internals_for_tests = {
   isOfficialOpenAIBaseUrl,
   isCompatCheckApplicable,
   isPromptCacheRetention400Applicable,
+  hasPromptCacheRetentionUnsupportedSignal,
   // Non-GPT OpenAI-compatible model detection
   isKimiLikeModel,
   isKimiLikeAssistantMessage,
@@ -3686,8 +3790,10 @@ export const __internals_for_tests = {
   getAssistantMessageModelTokenValues,
   getCompat,
   modelKey,
-  // Platform-friendly path helper
+  // Platform-friendly path helpers
   getModelsJsonDisplayPath,
+  buildProviderCompatOverride,
+  buildModelCompatOverride,
   captureCacheRetentionEnv,
   requestLongCacheRetention,
   restoreCacheRetentionEnv,
@@ -3724,6 +3830,7 @@ export const __internals_for_tests = {
   hashSessionId,
   makeSessionModelKey,
   modelKeyFromSessionKey,
+  filterRestorableStatsForSession,
   // Persistence helpers (for reload/reset tests)
   mergeCacheSessions,
   writePersistedCacheStats,
@@ -3750,6 +3857,15 @@ export default function (pi: ExtensionAPI) {
   /** In-memory recent usage samples per model key (not persisted, cleared on reload). */
   const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
+  function syncSessionHash(ctx: Pick<ExtensionContext, "sessionManager">): void {
+    const sid = ctx.sessionManager.getSessionId();
+    if (sid && (sid !== currentSessionId || !currentSessionHashSet)) {
+      currentSessionId = sid;
+      currentSessionHash = hashSessionId(sid);
+      currentSessionHashSet = true;
+    }
+  }
   /**
    * Build a session-scoped stats key from the current session hash + model key.
    * Returns `${sessionHash}:${provider}/${id}`.
@@ -3906,13 +4022,7 @@ export default function (pi: ExtensionAPI) {
   }
   async function restoreCacheStats(reason: string, ctx: ExtensionContext): Promise<void> {
-    // Set session id on first load and on reload (same session).
-    const sid = ctx.sessionManager.getSessionId();
-    if (sid && (sid !== currentSessionId || !currentSessionHashSet)) {
-      currentSessionId = sid;
-      currentSessionHash = hashSessionId(sid);
-      currentSessionHashSet = true;
-    }
+    syncSessionHash(ctx);
     if (reason === "reload") {
       // /reload: preserve session-scoped stats (same session hash).
@@ -3924,73 +4034,31 @@ export default function (pi: ExtensionAPI) {
       clearRecentSamples();
       const persisted = await readPersistedCacheStats();
-      if (persisted && currentSessionHash) {
-        const prefix = `${currentSessionHash}:`;
-        const filteredModelStats: Record<string, CacheStats> = {};
-        for (const [fullKey, stats] of Object.entries(persisted.statsByModel)) {
-          if (fullKey.startsWith(prefix)) {
-            // Current session's data
-            filteredModelStats[fullKey] = stats;
-          } else if (!fullKey.includes(":")) {
-            // Legacy v3-style key without session hash — migrate to current session
-            filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
-          } else if (fullKey.startsWith("_nosession:")) {
-            // _nosession migration remnant from old-path v4 write — migrate to current session
-            filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
-          }
-        }
-        cacheStatsByModel = filteredModelStats;
-        cacheStatsLegacyFamily = persisted.legacyFamily;
-      } else if (persisted) {
-        cacheStatsByModel = persisted.statsByModel;
-        cacheStatsLegacyFamily = persisted.legacyFamily;
-      } else {
-        cacheStatsByModel = {};
-        cacheStatsLegacyFamily = emptyAllCacheStats();
-      }
+      cacheStatsByModel = filterRestorableStatsForSession(
+        persisted,
+        currentSessionHashSet ? currentSessionHash : undefined,
+      );
+      cacheStatsLegacyFamily = persisted?.legacyFamily ?? emptyAllCacheStats();
       await rollOverStatsIfNeeded(ctx);
       return;
     }
     // First load / process start: read persisted stats and filter for
-    // this session's entries. If the session has no persisted data yet,
-    // start fresh.
+    // this session's entries. If the session hash is unavailable, start
+    // fresh instead of loading all persisted session buckets.
     const persisted = await readPersistedCacheStats();
-    if (persisted && currentSessionHash) {
-      const prefix = `${currentSessionHash}:`;
-      const filteredModelStats: Record<string, CacheStats> = {};
-      for (const [fullKey, stats] of Object.entries(persisted.statsByModel)) {
-        if (fullKey.startsWith(prefix)) {
-          // Current session's data — load it.
-          filteredModelStats[fullKey] = stats;
-        } else if (!fullKey.includes(":")) {
-          // Legacy v3-style key without session hash (e.g. "otokapi/gpt-5.5").
-          // Migrate to current session by prefixing with the session hash.
-          filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
-        } else if (fullKey.startsWith("_nosession:")) {
-          // _nosession migration remnant from old-path v4 write — migrate to current session
-          filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
-        }
-        // Other sessions' entries are preserved in the file but not loaded
-        // into memory; they'll be rewritten on next persist.
-      }
-      cacheStatsByModel = filteredModelStats;
-      cacheStatsLegacyFamily = persisted.legacyFamily;
-    } else if (persisted) {
-      // Persisted data exists but no session hash set yet.
-      // This shouldn't normally happen — use the data as-is.
-      cacheStatsByModel = persisted.statsByModel;
-      cacheStatsLegacyFamily = persisted.legacyFamily;
-    } else {
-      cacheStatsByModel = {};
-      cacheStatsLegacyFamily = emptyAllCacheStats();
-    }
+    cacheStatsByModel = filterRestorableStatsForSession(
+      persisted,
+      currentSessionHashSet ? currentSessionHash : undefined,
+    );
+    cacheStatsLegacyFamily = persisted?.legacyFamily ?? emptyAllCacheStats();
     lastStatusText = undefined;
     await rollOverStatsIfNeeded(ctx);
   }
   async function publishStatus(ctx: ExtensionContext, model: PiModel | undefined = ctx.model): Promise<void> {
+    syncSessionHash(ctx);
     await rollOverStatsIfNeeded(ctx);
     const adapter = selectAdapterForModel(model);
@@ -4158,6 +4226,7 @@ export default function (pi: ExtensionAPI) {
     if (!runtimeOptimizerEnabled || !model) return;
     if (event.status !== 400) return;
     if (!isPromptCacheRetention400Applicable(model)) return;
+    if (!hasPromptCacheRetentionUnsupportedSignal(event.headers)) return;
     const key = modelKey(model);
     promptCacheRetention400Models.add(key);
@@ -4172,6 +4241,7 @@ export default function (pi: ExtensionAPI) {
   });
   pi.on("message_end", async (event, ctx) => {
+    syncSessionHash(ctx);
     const adapter = selectAdapterForAssistantMessage(event.message, ctx.model);
     if (!adapter) return;
@@ -4218,6 +4288,7 @@ export default function (pi: ExtensionAPI) {
   pi.registerCommand("cache-optimizer", {
     description: "Diagnose Pi cache configuration",
     handler: async (args: string, cmdCtx) => {
+      syncSessionHash(cmdCtx);
       const model = cmdCtx.model;
       const subcommand = args.trim().toLowerCase().split(/\s+/)[0] || "help";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-cache-optimizer",
-  "version": "2.5.4",
+  "version": "2.5.6",
   "description": "Improve Pi prompt/KV cache hit rates with stable prompts, OpenAI-compatible cache keys, proxy compat warnings, and footer cache stats.",
   "keywords": [
     "pi-package",