pi-cache-optimizer 2.5.5 → 2.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -101,6 +101,42 @@ Notes:
101
101
  - For DeepSeek models, the Pi Mono guidance expects `compat.requiresReasoningContentOnAssistantMessages: true` and `compat.thinkingFormat: "deepseek"` alongside cache/session-affinity flags when the endpoint supports them.
102
102
  - This extension only advises; it does not edit `models.json`.
103
103
 
104
+ ### Channels without a `models.json` provider entry
105
+
106
+ Some Pi channels may be available even when there is no provider block in `~/.pi/agent/models.json` yet. Keep existing authentication as-is and do not copy credentials, tokens, or API keys. Add only cache/routing compatibility overrides in `models.json`.
107
+
108
+ Provider-level minimal override:
109
+
110
+ ```json
111
+ {
112
+ "providers": {
113
+ "your-provider-id": {
114
+ "compat": {
115
+ "sendSessionAffinityHeaders": true
116
+ }
117
+ }
118
+ }
119
+ }
120
+ ```
121
+
122
+ If only one model should change, use `modelOverrides`:
123
+
124
+ ```json
125
+ {
126
+ "providers": {
127
+ "your-provider-id": {
128
+ "modelOverrides": {
129
+ "gpt-5.5": {
130
+ "compat": {
131
+ "sendSessionAffinityHeaders": true
132
+ }
133
+ }
134
+ }
135
+ }
136
+ }
137
+ }
138
+ ```
139
+
104
140
  ## Footer stats
105
141
 
106
142
  Stats are read-only local counters stored at `~/.pi/agent/pi-cache-optimizer-stats.json` and scoped by Pi session + provider/model. They contain only dates and numeric counters — no API keys, prompts, payloads, headers, responses, or model output.
package/README.zh-CN.md CHANGED
@@ -101,6 +101,42 @@ LiteLLM / OneAPI / NewAPI / 类 OpenRouter 渠道等第三方 `openai-completion
101
101
  - 对 DeepSeek 模型,Pi Mono 指南期望在支持时同时设置 `compat.requiresReasoningContentOnAssistantMessages: true` 和 `compat.thinkingFormat: "deepseek"`,再配合缓存 / session-affinity 相关 compat。
102
102
  - 本扩展只给建议,不会修改 `models.json`。
103
103
 
104
+ ### 没有 `models.json` provider entry 的渠道
105
+
106
+ 有些 Pi 渠道可用时,`~/.pi/agent/models.json` 里可能还没有对应 provider block。保留现有认证方式,不要复制 credential、token 或 API key。只在 `models.json` 里添加缓存 / 路由兼容覆盖。
107
+
108
+ Provider 级最小 override:
109
+
110
+ ```json
111
+ {
112
+ "providers": {
113
+ "your-provider-id": {
114
+ "compat": {
115
+ "sendSessionAffinityHeaders": true
116
+ }
117
+ }
118
+ }
119
+ }
120
+ ```
121
+
122
+ 如果只想影响单个模型,用 `modelOverrides`:
123
+
124
+ ```json
125
+ {
126
+ "providers": {
127
+ "your-provider-id": {
128
+ "modelOverrides": {
129
+ "gpt-5.5": {
130
+ "compat": {
131
+ "sendSessionAffinityHeaders": true
132
+ }
133
+ }
134
+ }
135
+ }
136
+ }
137
+ }
138
+ ```
139
+
104
140
  ## Footer 统计
105
141
 
106
142
  统计是只读本地计数,保存在 `~/.pi/agent/pi-cache-optimizer-stats.json`,按 Pi session + provider/model 隔离。文件只包含日期和数字计数,不包含 API key、prompt、payload、headers、响应或模型输出。
package/index.ts CHANGED
@@ -775,6 +775,19 @@ function isOpenAICompatibleApi(api: unknown): boolean {
775
775
  return value === "openai-completions" || value === "openai-responses";
776
776
  }
777
777
 
778
+ function isOpenAICompatibleProxyApi(api: unknown): boolean {
779
+ return lower(api) === "openai-completions";
780
+ }
781
+
782
+ function isResponsesPromptRewriteBypassApi(api: unknown): boolean {
783
+ const value = lower(api);
784
+ return value === "openai-codex-responses" || value === "openai-responses" || value === "azure-openai-responses";
785
+ }
786
+
787
+ function isMistralConversationsApi(api: unknown): boolean {
788
+ return lower(api) === "mistral-conversations";
789
+ }
790
+
778
791
  function isOpenAIFamilyToken(token: string): boolean {
779
792
  return token.includes("gpt-") || token.includes("chatgpt") || OPENAI_REASONING_MODEL_PATTERN.test(token);
780
793
  }
@@ -1476,7 +1489,9 @@ function isNonEmptyString(value: unknown): boolean {
1476
1489
 
1477
1490
  function isOfficialOpenAIBaseUrl(model: PiModel): boolean {
1478
1491
  const value = lower(model.baseUrl).trim();
1479
- if (!value) return false;
1492
+ if (!value) {
1493
+ return lower(model.provider) === "openai";
1494
+ }
1480
1495
 
1481
1496
  try {
1482
1497
  return new URL(value).hostname === "api.openai.com";
@@ -1490,7 +1505,7 @@ function describeMissingOpenAIFamilyProxyCompat(model: PiModel): string[] {
1490
1505
  const missing: string[] = [];
1491
1506
 
1492
1507
  if (!isOpenAIFamilyModel(model)) return missing;
1493
- if (lower(model.api) !== "openai-completions") return missing;
1508
+ if (!isOpenAICompatibleProxyApi(model.api)) return missing;
1494
1509
  if (isOfficialOpenAIBaseUrl(model)) return missing;
1495
1510
 
1496
1511
  if (compat.supportsLongCacheRetention !== true) {
@@ -1513,7 +1528,7 @@ function describeMissingOpenAICompatibleProxyCompat(model: PiModel): string[] {
1513
1528
  const compat = getCompat(model);
1514
1529
  const missing: string[] = [];
1515
1530
 
1516
- if (lower(model.api) !== "openai-completions") return missing;
1531
+ if (!isOpenAICompatibleProxyApi(model.api)) return missing;
1517
1532
  if (isOfficialOpenAIBaseUrl(model)) return missing;
1518
1533
 
1519
1534
  if (compat.supportsLongCacheRetention !== true) {
@@ -1538,7 +1553,76 @@ function getPromptCacheRetentionUnsupportedHint(): string {
1538
1553
  return "If this channel returns `400 Unsupported parameter: prompt_cache_retention`, remove/avoid `supportsLongCacheRetention`; this extension does not write that field directly, but Pi may send it when long retention is requested and compat says the proxy supports it.";
1539
1554
  }
1540
1555
 
1541
- function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[], options: { includeJsonIntro?: boolean } = {}): void {
1556
+ function hasPromptCacheRetentionUnsupportedSignal(headers: Record<string, string> | undefined): boolean {
1557
+ if (!headers) return false;
1558
+
1559
+ const normalized = Object.entries(headers)
1560
+ .map(([key, value]) => `${lower(key)}: ${lower(value)}`)
1561
+ .join("\n");
1562
+ if (!normalized.includes("prompt_cache_retention")) return false;
1563
+
1564
+ return [
1565
+ "unsupported parameter",
1566
+ "unsupported_parameter",
1567
+ "unknown parameter",
1568
+ "not supported",
1569
+ "unsupported field",
1570
+ ].some((needle) => normalized.includes(needle));
1571
+ }
1572
+
1573
+ type CompatAdvicePlacement = {
1574
+ providerLabel?: string;
1575
+ modelId?: string;
1576
+ };
1577
+
1578
+ function buildProviderCompatOverride(providerLabel: string, compat: Record<string, unknown>): Record<string, unknown> {
1579
+ return {
1580
+ providers: {
1581
+ [providerLabel]: {
1582
+ compat,
1583
+ },
1584
+ },
1585
+ };
1586
+ }
1587
+
1588
+ function buildModelCompatOverride(providerLabel: string, modelId: string, compat: Record<string, unknown>): Record<string, unknown> {
1589
+ return {
1590
+ providers: {
1591
+ [providerLabel]: {
1592
+ modelOverrides: {
1593
+ [modelId]: {
1594
+ compat,
1595
+ },
1596
+ },
1597
+ },
1598
+ },
1599
+ };
1600
+ }
1601
+
1602
+ function appendCredentialSafeProviderGuidance(lines: string[], placement: CompatAdvicePlacement, compatSuggestion: Record<string, unknown>): void {
1603
+ const providerLabel = placement.providerLabel;
1604
+ if (!providerLabel) return;
1605
+
1606
+ lines.push("");
1607
+ lines.push("If this channel has no models.json provider entry yet:");
1608
+ lines.push("- Keep existing authentication as-is; do not copy credentials, tokens, or API keys.");
1609
+ lines.push(`- Add only cache/routing compat overrides in ${getModelsJsonDisplayPath()}.`);
1610
+
1611
+ if (Object.keys(compatSuggestion).length === 0) {
1612
+ lines.push("- No safe copyable override is available for the missing flags shown above.");
1613
+ return;
1614
+ }
1615
+
1616
+ lines.push("Provider-level minimal override:");
1617
+ lines.push(JSON.stringify(buildProviderCompatOverride(providerLabel, compatSuggestion), null, 2));
1618
+
1619
+ if (placement.modelId) {
1620
+ lines.push("Single-model override (use this if only this model should change):");
1621
+ lines.push(JSON.stringify(buildModelCompatOverride(providerLabel, placement.modelId, compatSuggestion), null, 2));
1622
+ }
1623
+ }
1624
+
1625
+ function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[], options: { includeJsonIntro?: boolean } & CompatAdvicePlacement = {}): void {
1542
1626
  const suggestion = buildSafeOpenAIProxyCompatSuggestion(missing);
1543
1627
  const hasSafeSuggestion = Object.keys(suggestion).length > 0;
1544
1628
 
@@ -1558,6 +1642,8 @@ function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[],
1558
1642
  lines.push("- supportsLongCacheRetention: optional. Enable only after your endpoint/proxy explicitly supports OpenAI long prompt cache retention.");
1559
1643
  lines.push(`- ${getPromptCacheRetentionUnsupportedHint()}`);
1560
1644
  }
1645
+
1646
+ appendCredentialSafeProviderGuidance(lines, options, suggestion);
1561
1647
  }
1562
1648
 
1563
1649
  /**
@@ -1577,6 +1663,7 @@ function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): stri
1577
1663
  // If no slash is found, fall back to the key itself.
1578
1664
  const slashIdx = key.indexOf("/");
1579
1665
  const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
1666
+ const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
1580
1667
 
1581
1668
  const modelsJsonPath = getModelsJsonDisplayPath();
1582
1669
  const lines: string[] = [
@@ -1585,7 +1672,7 @@ function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): stri
1585
1672
  ``,
1586
1673
  ];
1587
1674
 
1588
- appendOpenAIProxyCompatAdviceLines(lines, missing);
1675
+ appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId });
1589
1676
 
1590
1677
  return lines.join("\n");
1591
1678
  }
@@ -1647,7 +1734,7 @@ function buildDeepSeekCompatSuggestion(missing: string[]): Record<string, unknow
1647
1734
  return suggestion;
1648
1735
  }
1649
1736
 
1650
- function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[]): void {
1737
+ function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[], placement: CompatAdvicePlacement = {}): void {
1651
1738
  const suggestion = buildDeepSeekCompatSuggestion(missing);
1652
1739
  if (Object.keys(suggestion).length > 0) {
1653
1740
  lines.push("Recommended DeepSeek compat snippet:");
@@ -1669,11 +1756,14 @@ function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[]): vo
1669
1756
  if (missing.includes("supportsLongCacheRetention")) {
1670
1757
  lines.push("- supportsLongCacheRetention: enable for DeepSeek-compatible endpoints that support long cache retention.");
1671
1758
  }
1759
+
1760
+ appendCredentialSafeProviderGuidance(lines, placement, suggestion);
1672
1761
  }
1673
1762
 
1674
1763
  function buildDeepSeekCompatWarningText(key: string, missing: string[]): string {
1675
1764
  const slashIdx = key.indexOf("/");
1676
1765
  const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
1766
+ const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
1677
1767
  const modelsJsonPath = getModelsJsonDisplayPath();
1678
1768
  const lines: string[] = [
1679
1769
  `💡 pi-cache-optimizer: ${key} is DeepSeek-like but merged compat lacks ${missing.join(" and ")}.`,
@@ -1681,7 +1771,7 @@ function buildDeepSeekCompatWarningText(key: string, missing: string[]): string
1681
1771
  "",
1682
1772
  ];
1683
1773
 
1684
- appendDeepSeekCompatAdviceLines(lines, missing);
1774
+ appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId });
1685
1775
 
1686
1776
  return lines.join("\n");
1687
1777
  }
@@ -3001,6 +3091,29 @@ async function readPersistedCacheStats(): Promise<CacheStatsState | undefined> {
3001
3091
  return undefined;
3002
3092
  }
3003
3093
 
3094
+ function filterRestorableStatsForSession(
3095
+ persisted: CacheStatsState | undefined,
3096
+ currentSessionHash?: string,
3097
+ ): Record<string, CacheStats> {
3098
+ if (!persisted || !currentSessionHash) return {};
3099
+
3100
+ const prefix = `${currentSessionHash}:`;
3101
+ const filteredModelStats: Record<string, CacheStats> = {};
3102
+ for (const [fullKey, stats] of Object.entries(persisted.statsByModel)) {
3103
+ if (fullKey.startsWith(prefix)) {
3104
+ filteredModelStats[fullKey] = stats;
3105
+ } else if (!fullKey.includes(":")) {
3106
+ // Legacy v3-style key without session hash — migrate to current session.
3107
+ filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
3108
+ } else if (fullKey.startsWith("_nosession:")) {
3109
+ // Transitional _nosession bucket — migrate to current session.
3110
+ filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
3111
+ }
3112
+ }
3113
+
3114
+ return filteredModelStats;
3115
+ }
3116
+
3004
3117
  /**
3005
3118
  * The closure-internal writer. Since the closure has access to currentSessionHash,
3006
3119
  * it passes the hash and statsByModel here. This function wraps them in the v4
@@ -3128,7 +3241,7 @@ async function writePersistedCacheStats(state: CacheStatsState, currentSessionHa
3128
3241
 
3129
3242
 
3130
3243
  function isCompatCheckApplicable(model: PiModel): boolean {
3131
- return lower(model.api) === "openai-completions" && !isOfficialOpenAIBaseUrl(model);
3244
+ return isOpenAICompatibleProxyApi(model.api) && !isOfficialOpenAIBaseUrl(model);
3132
3245
  }
3133
3246
 
3134
3247
  function isPromptCacheRetention400Applicable(model: PiModel): boolean {
@@ -3163,10 +3276,10 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
3163
3276
  const baseUrl = lower(model.baseUrl || "");
3164
3277
  const provider = lower(model.provider);
3165
3278
 
3166
- // Only OpenAI-compatible APIs are applicable for router/channel diagnostics.
3167
- // Custom transports like kiro-api, anthropic-messages, bedrock-converse-stream
3168
- // or non-OpenAI APIs are excluded.
3169
- if (api !== "openai-completions" && api !== "openai-responses") {
3279
+ // Router/channel diagnostics only apply to OpenAI-compatible proxy APIs.
3280
+ // Native APIs like mistral-conversations, azure-openai-responses,
3281
+ // anthropic-messages, or bedrock-converse-stream are intentionally excluded.
3282
+ if (api === "azure-openai-responses" || isMistralConversationsApi(api) || !isOpenAICompatibleApi(api)) {
3170
3283
  return notes;
3171
3284
  }
3172
3285
 
@@ -3305,6 +3418,33 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
3305
3418
  return notes;
3306
3419
  }
3307
3420
 
3421
+ function getCompatCheckNotApplicableLines(model: PiModel): string[] {
3422
+ const api = lower(model.api);
3423
+
3424
+ if (isMistralConversationsApi(api)) {
3425
+ return [
3426
+ "ℹ️ Compat check not applicable for this model.",
3427
+ " Native Mistral `mistral-conversations` uses provider-native transport; OpenAI-compatible proxy compat flags do not apply.",
3428
+ ];
3429
+ }
3430
+
3431
+ if (api === "azure-openai-responses") {
3432
+ return [
3433
+ "ℹ️ Compat check not applicable for this model.",
3434
+ " Native Azure OpenAI Responses uses the Responses transport; OpenAI-compatible proxy compat flags do not apply.",
3435
+ ];
3436
+ }
3437
+
3438
+ if (api === "openai-codex-responses" || (api === "openai-responses" && isOfficialOpenAIBaseUrl(model))) {
3439
+ return [
3440
+ "ℹ️ Compat check not applicable for this model.",
3441
+ " Native Responses transports already use Pi core request handling; OpenAI-compatible proxy compat flags do not apply.",
3442
+ ];
3443
+ }
3444
+
3445
+ return ["ℹ️ Compat check not applicable for this model."];
3446
+ }
3447
+
3308
3448
  function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400?: boolean } = {}): string {
3309
3449
  const lines: string[] = [];
3310
3450
  lines.push(`Provider: ${model.provider}`);
@@ -3326,14 +3466,14 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
3326
3466
  const modelsJsonPath = getModelsJsonDisplayPath();
3327
3467
  lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (same level as baseUrl/api/apiKey/models).`);
3328
3468
  if (deepSeekCompatApplicable) {
3329
- appendDeepSeekCompatAdviceLines(lines, missing);
3469
+ appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
3330
3470
  } else {
3331
- appendOpenAIProxyCompatAdviceLines(lines, missing);
3471
+ appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
3332
3472
  }
3333
3473
  } else if (deepSeekCompatApplicable || isCompatCheckApplicable(model)) {
3334
3474
  lines.push("✅ Compat fully configured.");
3335
3475
  } else {
3336
- lines.push("ℹ️ Compat check not applicable for this model.");
3476
+ lines.push(...getCompatCheckNotApplicableLines(model));
3337
3477
  }
3338
3478
 
3339
3479
  if (isPromptCacheRetention400Applicable(model)) {
@@ -3507,9 +3647,9 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
3507
3647
  lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
3508
3648
  lines.push(`(at the same level as baseUrl/api/apiKey/models).`);
3509
3649
  if (deepSeekCompatApplicable) {
3510
- appendDeepSeekCompatAdviceLines(lines, missing);
3650
+ appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
3511
3651
  } else {
3512
- appendOpenAIProxyCompatAdviceLines(lines, missing);
3652
+ appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
3513
3653
  }
3514
3654
  }
3515
3655
 
@@ -3521,7 +3661,7 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
3521
3661
  lines.push(getPromptCacheRetentionUnsupportedHint());
3522
3662
  }
3523
3663
  } else {
3524
- lines.push("ℹ️ Compat check not applicable for this model.");
3664
+ lines.push(...getCompatCheckNotApplicableLines(model));
3525
3665
  }
3526
3666
  lines.push("");
3527
3667
  }
@@ -3558,6 +3698,9 @@ export const __internals_for_tests = {
3558
3698
  isNonEmptyString,
3559
3699
  shouldInjectOpenAIPromptCacheKey,
3560
3700
  isOpenAICompatibleApi,
3701
+ isOpenAICompatibleProxyApi,
3702
+ isResponsesPromptRewriteBypassApi,
3703
+ isMistralConversationsApi,
3561
3704
  isOpenAIFamilyModel,
3562
3705
  isOpenAIFamilyAssistantMessage,
3563
3706
  isOpenAIFamilyToken,
@@ -3573,6 +3716,7 @@ export const __internals_for_tests = {
3573
3716
  isOfficialOpenAIBaseUrl,
3574
3717
  isCompatCheckApplicable,
3575
3718
  isPromptCacheRetention400Applicable,
3719
+ hasPromptCacheRetentionUnsupportedSignal,
3576
3720
  // Non-GPT OpenAI-compatible model detection
3577
3721
  isKimiLikeModel,
3578
3722
  isKimiLikeAssistantMessage,
@@ -3689,8 +3833,10 @@ export const __internals_for_tests = {
3689
3833
  getAssistantMessageModelTokenValues,
3690
3834
  getCompat,
3691
3835
  modelKey,
3692
- // Platform-friendly path helper
3836
+ // Platform-friendly path helpers
3693
3837
  getModelsJsonDisplayPath,
3838
+ buildProviderCompatOverride,
3839
+ buildModelCompatOverride,
3694
3840
  captureCacheRetentionEnv,
3695
3841
  requestLongCacheRetention,
3696
3842
  restoreCacheRetentionEnv,
@@ -3727,6 +3873,7 @@ export const __internals_for_tests = {
3727
3873
  hashSessionId,
3728
3874
  makeSessionModelKey,
3729
3875
  modelKeyFromSessionKey,
3876
+ filterRestorableStatsForSession,
3730
3877
  // Persistence helpers (for reload/reset tests)
3731
3878
  mergeCacheSessions,
3732
3879
  writePersistedCacheStats,
@@ -3753,6 +3900,15 @@ export default function (pi: ExtensionAPI) {
3753
3900
  /** In-memory recent usage samples per model key (not persisted, cleared on reload). */
3754
3901
  const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
3755
3902
 
3903
+ function syncSessionHash(ctx: Pick<ExtensionContext, "sessionManager">): void {
3904
+ const sid = ctx.sessionManager.getSessionId();
3905
+ if (sid && (sid !== currentSessionId || !currentSessionHashSet)) {
3906
+ currentSessionId = sid;
3907
+ currentSessionHash = hashSessionId(sid);
3908
+ currentSessionHashSet = true;
3909
+ }
3910
+ }
3911
+
3756
3912
  /**
3757
3913
  * Build a session-scoped stats key from the current session hash + model key.
3758
3914
  * Returns `${sessionHash}:${provider}/${id}`.
@@ -3909,13 +4065,7 @@ export default function (pi: ExtensionAPI) {
3909
4065
  }
3910
4066
 
3911
4067
  async function restoreCacheStats(reason: string, ctx: ExtensionContext): Promise<void> {
3912
- // Set session id on first load and on reload (same session).
3913
- const sid = ctx.sessionManager.getSessionId();
3914
- if (sid && (sid !== currentSessionId || !currentSessionHashSet)) {
3915
- currentSessionId = sid;
3916
- currentSessionHash = hashSessionId(sid);
3917
- currentSessionHashSet = true;
3918
- }
4068
+ syncSessionHash(ctx);
3919
4069
 
3920
4070
  if (reason === "reload") {
3921
4071
  // /reload: preserve session-scoped stats (same session hash).
@@ -3927,73 +4077,31 @@ export default function (pi: ExtensionAPI) {
3927
4077
  clearRecentSamples();
3928
4078
 
3929
4079
  const persisted = await readPersistedCacheStats();
3930
- if (persisted && currentSessionHash) {
3931
- const prefix = `${currentSessionHash}:`;
3932
- const filteredModelStats: Record<string, CacheStats> = {};
3933
- for (const [fullKey, stats] of Object.entries(persisted.statsByModel)) {
3934
- if (fullKey.startsWith(prefix)) {
3935
- // Current session's data
3936
- filteredModelStats[fullKey] = stats;
3937
- } else if (!fullKey.includes(":")) {
3938
- // Legacy v3-style key without session hash — migrate to current session
3939
- filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
3940
- } else if (fullKey.startsWith("_nosession:")) {
3941
- // _nosession migration remnant from old-path v4 write — migrate to current session
3942
- filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
3943
- }
3944
- }
3945
- cacheStatsByModel = filteredModelStats;
3946
- cacheStatsLegacyFamily = persisted.legacyFamily;
3947
- } else if (persisted) {
3948
- cacheStatsByModel = persisted.statsByModel;
3949
- cacheStatsLegacyFamily = persisted.legacyFamily;
3950
- } else {
3951
- cacheStatsByModel = {};
3952
- cacheStatsLegacyFamily = emptyAllCacheStats();
3953
- }
4080
+ cacheStatsByModel = filterRestorableStatsForSession(
4081
+ persisted,
4082
+ currentSessionHashSet ? currentSessionHash : undefined,
4083
+ );
4084
+ cacheStatsLegacyFamily = persisted?.legacyFamily ?? emptyAllCacheStats();
3954
4085
 
3955
4086
  await rollOverStatsIfNeeded(ctx);
3956
4087
  return;
3957
4088
  }
3958
4089
 
3959
4090
  // First load / process start: read persisted stats and filter for
3960
- // this session's entries. If the session has no persisted data yet,
3961
- // start fresh.
4091
+ // this session's entries. If the session hash is unavailable, start
4092
+ // fresh instead of loading all persisted session buckets.
3962
4093
  const persisted = await readPersistedCacheStats();
3963
- if (persisted && currentSessionHash) {
3964
- const prefix = `${currentSessionHash}:`;
3965
- const filteredModelStats: Record<string, CacheStats> = {};
3966
- for (const [fullKey, stats] of Object.entries(persisted.statsByModel)) {
3967
- if (fullKey.startsWith(prefix)) {
3968
- // Current session's data — load it.
3969
- filteredModelStats[fullKey] = stats;
3970
- } else if (!fullKey.includes(":")) {
3971
- // Legacy v3-style key without session hash (e.g. "otokapi/gpt-5.5").
3972
- // Migrate to current session by prefixing with the session hash.
3973
- filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
3974
- } else if (fullKey.startsWith("_nosession:")) {
3975
- // _nosession migration remnant from old-path v4 write — migrate to current session
3976
- filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
3977
- }
3978
- // Other sessions' entries are preserved in the file but not loaded
3979
- // into memory; they'll be rewritten on next persist.
3980
- }
3981
- cacheStatsByModel = filteredModelStats;
3982
- cacheStatsLegacyFamily = persisted.legacyFamily;
3983
- } else if (persisted) {
3984
- // Persisted data exists but no session hash set yet.
3985
- // This shouldn't normally happen — use the data as-is.
3986
- cacheStatsByModel = persisted.statsByModel;
3987
- cacheStatsLegacyFamily = persisted.legacyFamily;
3988
- } else {
3989
- cacheStatsByModel = {};
3990
- cacheStatsLegacyFamily = emptyAllCacheStats();
3991
- }
4094
+ cacheStatsByModel = filterRestorableStatsForSession(
4095
+ persisted,
4096
+ currentSessionHashSet ? currentSessionHash : undefined,
4097
+ );
4098
+ cacheStatsLegacyFamily = persisted?.legacyFamily ?? emptyAllCacheStats();
3992
4099
  lastStatusText = undefined;
3993
4100
  await rollOverStatsIfNeeded(ctx);
3994
4101
  }
3995
4102
 
3996
4103
  async function publishStatus(ctx: ExtensionContext, model: PiModel | undefined = ctx.model): Promise<void> {
4104
+ syncSessionHash(ctx);
3997
4105
  await rollOverStatsIfNeeded(ctx);
3998
4106
 
3999
4107
  const adapter = selectAdapterForModel(model);
@@ -4065,7 +4173,7 @@ export default function (pi: ExtensionAPI) {
4065
4173
 
4066
4174
  pi.on("before_agent_start", async (event, _ctx) => {
4067
4175
  // ────────────────────────────────────────────────────────────────
4068
- // OpenAI Responses API bypass (codex-responses + responses)
4176
+ // OpenAI Responses-family bypass (codex-responses + responses + azure responses)
4069
4177
  //
4070
4178
  // OpenAI's Responses API endpoints — both the Codex backend
4071
4179
  // (openai-codex-responses, chatgpt.com) and the public
@@ -4091,11 +4199,8 @@ export default function (pi: ExtensionAPI) {
4091
4199
  // that use openai-completions are unaffected.
4092
4200
  // ────────────────────────────────────────────────────────────────
4093
4201
  const model = _ctx.model;
4094
- if (model) {
4095
- const api = lower(model.api);
4096
- if (api === "openai-codex-responses" || api === "openai-responses") {
4097
- return {};
4098
- }
4202
+ if (model && isResponsesPromptRewriteBypassApi(model.api)) {
4203
+ return {};
4099
4204
  }
4100
4205
 
4101
4206
  if (!runtimeOptimizerEnabled) return {};
@@ -4161,6 +4266,7 @@ export default function (pi: ExtensionAPI) {
4161
4266
  if (!runtimeOptimizerEnabled || !model) return;
4162
4267
  if (event.status !== 400) return;
4163
4268
  if (!isPromptCacheRetention400Applicable(model)) return;
4269
+ if (!hasPromptCacheRetentionUnsupportedSignal(event.headers)) return;
4164
4270
 
4165
4271
  const key = modelKey(model);
4166
4272
  promptCacheRetention400Models.add(key);
@@ -4175,6 +4281,7 @@ export default function (pi: ExtensionAPI) {
4175
4281
  });
4176
4282
 
4177
4283
  pi.on("message_end", async (event, ctx) => {
4284
+ syncSessionHash(ctx);
4178
4285
  const adapter = selectAdapterForAssistantMessage(event.message, ctx.model);
4179
4286
  if (!adapter) return;
4180
4287
 
@@ -4221,6 +4328,7 @@ export default function (pi: ExtensionAPI) {
4221
4328
  pi.registerCommand("cache-optimizer", {
4222
4329
  description: "Diagnose Pi cache configuration",
4223
4330
  handler: async (args: string, cmdCtx) => {
4331
+ syncSessionHash(cmdCtx);
4224
4332
  const model = cmdCtx.model;
4225
4333
  const subcommand = args.trim().toLowerCase().split(/\s+/)[0] || "help";
4226
4334
 
@@ -4274,7 +4382,7 @@ export default function (pi: ExtensionAPI) {
4274
4382
  cmdCtx.ui.notify(
4275
4383
  isDeepSeekCompatCheckApplicable(model) || isCompatCheckApplicable(model)
4276
4384
  ? "✅ Compat fully configured."
4277
- : "ℹ️ Compat check not applicable for this model.",
4385
+ : getCompatCheckNotApplicableLines(model).join("\n"),
4278
4386
  "info",
4279
4387
  );
4280
4388
  }
@@ -4372,7 +4480,7 @@ export default function (pi: ExtensionAPI) {
4372
4480
  cmdCtx.ui.notify(
4373
4481
  isDeepSeekCompatCheckApplicable(model) || isCompatCheckApplicable(model)
4374
4482
  ? "✅ Compat fully configured."
4375
- : "ℹ️ Compat check not applicable for this model.",
4483
+ : getCompatCheckNotApplicableLines(model).join("\n"),
4376
4484
  "info",
4377
4485
  );
4378
4486
  }
@@ -4425,6 +4533,8 @@ export default function (pi: ExtensionAPI) {
4425
4533
  diagnosis.push(`✅ Active model "${displayKey}": compat fully configured.`);
4426
4534
  } else {
4427
4535
  diagnosis.push(`ℹ️ Active model "${displayKey}": compat check not applicable.`);
4536
+ const detailLines = getCompatCheckNotApplicableLines(model).slice(1);
4537
+ for (const line of detailLines) diagnosis.push(line);
4428
4538
  }
4429
4539
  } else {
4430
4540
  diagnosis.push("No active model selected.");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-cache-optimizer",
3
- "version": "2.5.5",
3
+ "version": "2.5.7",
4
4
  "description": "Improve Pi prompt/KV cache hit rates with stable prompts, OpenAI-compatible cache keys, proxy compat warnings, and footer cache stats.",
5
5
  "keywords": [
6
6
  "pi-package",