pi-cache-optimizer 2.5.4 → 2.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -101,6 +101,42 @@ Notes:
101
101
  - For DeepSeek models, the Pi Mono guidance expects `compat.requiresReasoningContentOnAssistantMessages: true` and `compat.thinkingFormat: "deepseek"` alongside cache/session-affinity flags when the endpoint supports them.
102
102
  - This extension only advises; it does not edit `models.json`.
103
103
 
104
+ ### Channels without a `models.json` provider entry
105
+
106
+ Some Pi channels may be available even when there is no provider block in `~/.pi/agent/models.json` yet. Keep existing authentication as-is and do not copy credentials, tokens, or API keys. Add only cache/routing compatibility overrides in `models.json`.
107
+
108
+ Provider-level minimal override:
109
+
110
+ ```json
111
+ {
112
+ "providers": {
113
+ "your-provider-id": {
114
+ "compat": {
115
+ "sendSessionAffinityHeaders": true
116
+ }
117
+ }
118
+ }
119
+ }
120
+ ```
121
+
122
+ If only one model should change, use `modelOverrides`:
123
+
124
+ ```json
125
+ {
126
+ "providers": {
127
+ "your-provider-id": {
128
+ "modelOverrides": {
129
+ "gpt-5.5": {
130
+ "compat": {
131
+ "sendSessionAffinityHeaders": true
132
+ }
133
+ }
134
+ }
135
+ }
136
+ }
137
+ }
138
+ ```
139
+
104
140
  ## Footer stats
105
141
 
106
142
  Stats are read-only local counters stored at `~/.pi/agent/pi-cache-optimizer-stats.json` and scoped by Pi session + provider/model. They contain only dates and numeric counters — no API keys, prompts, payloads, headers, responses, or model output.
package/README.zh-CN.md CHANGED
@@ -101,6 +101,42 @@ LiteLLM / OneAPI / NewAPI / 类 OpenRouter 渠道等第三方 `openai-completion
101
101
  - 对 DeepSeek 模型,Pi Mono 指南期望在支持时同时设置 `compat.requiresReasoningContentOnAssistantMessages: true` 和 `compat.thinkingFormat: "deepseek"`,再配合缓存 / session-affinity 相关 compat。
102
102
  - 本扩展只给建议,不会修改 `models.json`。
103
103
 
104
+ ### 没有 `models.json` provider entry 的渠道
105
+
106
+ 有些 Pi 渠道可用时,`~/.pi/agent/models.json` 里可能还没有对应 provider block。保留现有认证方式,不要复制 credential、token 或 API key。只在 `models.json` 里添加缓存 / 路由兼容覆盖。
107
+
108
+ Provider 级最小 override:
109
+
110
+ ```json
111
+ {
112
+ "providers": {
113
+ "your-provider-id": {
114
+ "compat": {
115
+ "sendSessionAffinityHeaders": true
116
+ }
117
+ }
118
+ }
119
+ }
120
+ ```
121
+
122
+ 如果只想影响单个模型,用 `modelOverrides`:
123
+
124
+ ```json
125
+ {
126
+ "providers": {
127
+ "your-provider-id": {
128
+ "modelOverrides": {
129
+ "gpt-5.5": {
130
+ "compat": {
131
+ "sendSessionAffinityHeaders": true
132
+ }
133
+ }
134
+ }
135
+ }
136
+ }
137
+ }
138
+ ```
139
+
104
140
  ## Footer 统计
105
141
 
106
142
  统计是只读本地计数,保存在 `~/.pi/agent/pi-cache-optimizer-stats.json`,按 Pi session + provider/model 隔离。文件只包含日期和数字计数,不包含 API key、prompt、payload、headers、响应或模型输出。
package/index.ts CHANGED
@@ -1476,7 +1476,9 @@ function isNonEmptyString(value: unknown): boolean {
1476
1476
 
1477
1477
  function isOfficialOpenAIBaseUrl(model: PiModel): boolean {
1478
1478
  const value = lower(model.baseUrl).trim();
1479
- if (!value) return false;
1479
+ if (!value) {
1480
+ return lower(model.provider) === "openai";
1481
+ }
1480
1482
 
1481
1483
  try {
1482
1484
  return new URL(value).hostname === "api.openai.com";
@@ -1538,7 +1540,76 @@ function getPromptCacheRetentionUnsupportedHint(): string {
1538
1540
  return "If this channel returns `400 Unsupported parameter: prompt_cache_retention`, remove/avoid `supportsLongCacheRetention`; this extension does not write that field directly, but Pi may send it when long retention is requested and compat says the proxy supports it.";
1539
1541
  }
1540
1542
 
1541
- function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[], options: { includeJsonIntro?: boolean } = {}): void {
1543
+ function hasPromptCacheRetentionUnsupportedSignal(headers: Record<string, string> | undefined): boolean {
1544
+ if (!headers) return false;
1545
+
1546
+ const normalized = Object.entries(headers)
1547
+ .map(([key, value]) => `${lower(key)}: ${lower(value)}`)
1548
+ .join("\n");
1549
+ if (!normalized.includes("prompt_cache_retention")) return false;
1550
+
1551
+ return [
1552
+ "unsupported parameter",
1553
+ "unsupported_parameter",
1554
+ "unknown parameter",
1555
+ "not supported",
1556
+ "unsupported field",
1557
+ ].some((needle) => normalized.includes(needle));
1558
+ }
1559
+
1560
+ type CompatAdvicePlacement = {
1561
+ providerLabel?: string;
1562
+ modelId?: string;
1563
+ };
1564
+
1565
+ function buildProviderCompatOverride(providerLabel: string, compat: Record<string, unknown>): Record<string, unknown> {
1566
+ return {
1567
+ providers: {
1568
+ [providerLabel]: {
1569
+ compat,
1570
+ },
1571
+ },
1572
+ };
1573
+ }
1574
+
1575
+ function buildModelCompatOverride(providerLabel: string, modelId: string, compat: Record<string, unknown>): Record<string, unknown> {
1576
+ return {
1577
+ providers: {
1578
+ [providerLabel]: {
1579
+ modelOverrides: {
1580
+ [modelId]: {
1581
+ compat,
1582
+ },
1583
+ },
1584
+ },
1585
+ },
1586
+ };
1587
+ }
1588
+
1589
+ function appendCredentialSafeProviderGuidance(lines: string[], placement: CompatAdvicePlacement, compatSuggestion: Record<string, unknown>): void {
1590
+ const providerLabel = placement.providerLabel;
1591
+ if (!providerLabel) return;
1592
+
1593
+ lines.push("");
1594
+ lines.push("If this channel has no models.json provider entry yet:");
1595
+ lines.push("- Keep existing authentication as-is; do not copy credentials, tokens, or API keys.");
1596
+ lines.push(`- Add only cache/routing compat overrides in ${getModelsJsonDisplayPath()}.`);
1597
+
1598
+ if (Object.keys(compatSuggestion).length === 0) {
1599
+ lines.push("- No safe copyable override is available for the missing flags shown above.");
1600
+ return;
1601
+ }
1602
+
1603
+ lines.push("Provider-level minimal override:");
1604
+ lines.push(JSON.stringify(buildProviderCompatOverride(providerLabel, compatSuggestion), null, 2));
1605
+
1606
+ if (placement.modelId) {
1607
+ lines.push("Single-model override (use this if only this model should change):");
1608
+ lines.push(JSON.stringify(buildModelCompatOverride(providerLabel, placement.modelId, compatSuggestion), null, 2));
1609
+ }
1610
+ }
1611
+
1612
+ function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[], options: { includeJsonIntro?: boolean } & CompatAdvicePlacement = {}): void {
1542
1613
  const suggestion = buildSafeOpenAIProxyCompatSuggestion(missing);
1543
1614
  const hasSafeSuggestion = Object.keys(suggestion).length > 0;
1544
1615
 
@@ -1558,6 +1629,8 @@ function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[],
1558
1629
  lines.push("- supportsLongCacheRetention: optional. Enable only after your endpoint/proxy explicitly supports OpenAI long prompt cache retention.");
1559
1630
  lines.push(`- ${getPromptCacheRetentionUnsupportedHint()}`);
1560
1631
  }
1632
+
1633
+ appendCredentialSafeProviderGuidance(lines, options, suggestion);
1561
1634
  }
1562
1635
 
1563
1636
  /**
@@ -1577,6 +1650,7 @@ function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): stri
1577
1650
  // If no slash is found, fall back to the key itself.
1578
1651
  const slashIdx = key.indexOf("/");
1579
1652
  const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
1653
+ const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
1580
1654
 
1581
1655
  const modelsJsonPath = getModelsJsonDisplayPath();
1582
1656
  const lines: string[] = [
@@ -1585,7 +1659,7 @@ function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): stri
1585
1659
  ``,
1586
1660
  ];
1587
1661
 
1588
- appendOpenAIProxyCompatAdviceLines(lines, missing);
1662
+ appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId });
1589
1663
 
1590
1664
  return lines.join("\n");
1591
1665
  }
@@ -1647,7 +1721,7 @@ function buildDeepSeekCompatSuggestion(missing: string[]): Record<string, unknow
1647
1721
  return suggestion;
1648
1722
  }
1649
1723
 
1650
- function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[]): void {
1724
+ function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[], placement: CompatAdvicePlacement = {}): void {
1651
1725
  const suggestion = buildDeepSeekCompatSuggestion(missing);
1652
1726
  if (Object.keys(suggestion).length > 0) {
1653
1727
  lines.push("Recommended DeepSeek compat snippet:");
@@ -1669,11 +1743,14 @@ function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[]): vo
1669
1743
  if (missing.includes("supportsLongCacheRetention")) {
1670
1744
  lines.push("- supportsLongCacheRetention: enable for DeepSeek-compatible endpoints that support long cache retention.");
1671
1745
  }
1746
+
1747
+ appendCredentialSafeProviderGuidance(lines, placement, suggestion);
1672
1748
  }
1673
1749
 
1674
1750
  function buildDeepSeekCompatWarningText(key: string, missing: string[]): string {
1675
1751
  const slashIdx = key.indexOf("/");
1676
1752
  const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
1753
+ const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
1677
1754
  const modelsJsonPath = getModelsJsonDisplayPath();
1678
1755
  const lines: string[] = [
1679
1756
  `💡 pi-cache-optimizer: ${key} is DeepSeek-like but merged compat lacks ${missing.join(" and ")}.`,
@@ -1681,7 +1758,7 @@ function buildDeepSeekCompatWarningText(key: string, missing: string[]): string
1681
1758
  "",
1682
1759
  ];
1683
1760
 
1684
- appendDeepSeekCompatAdviceLines(lines, missing);
1761
+ appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId });
1685
1762
 
1686
1763
  return lines.join("\n");
1687
1764
  }
@@ -3001,6 +3078,29 @@ async function readPersistedCacheStats(): Promise<CacheStatsState | undefined> {
3001
3078
  return undefined;
3002
3079
  }
3003
3080
 
3081
+ function filterRestorableStatsForSession(
3082
+ persisted: CacheStatsState | undefined,
3083
+ currentSessionHash?: string,
3084
+ ): Record<string, CacheStats> {
3085
+ if (!persisted || !currentSessionHash) return {};
3086
+
3087
+ const prefix = `${currentSessionHash}:`;
3088
+ const filteredModelStats: Record<string, CacheStats> = {};
3089
+ for (const [fullKey, stats] of Object.entries(persisted.statsByModel)) {
3090
+ if (fullKey.startsWith(prefix)) {
3091
+ filteredModelStats[fullKey] = stats;
3092
+ } else if (!fullKey.includes(":")) {
3093
+ // Legacy v3-style key without session hash — migrate to current session.
3094
+ filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
3095
+ } else if (fullKey.startsWith("_nosession:")) {
3096
+ // Transitional _nosession bucket — migrate to current session.
3097
+ filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
3098
+ }
3099
+ }
3100
+
3101
+ return filteredModelStats;
3102
+ }
3103
+
3004
3104
  /**
3005
3105
  * The closure-internal writer. Since the closure has access to currentSessionHash,
3006
3106
  * it passes the hash and statsByModel here. This function wraps them in the v4
@@ -3326,9 +3426,9 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
3326
3426
  const modelsJsonPath = getModelsJsonDisplayPath();
3327
3427
  lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (same level as baseUrl/api/apiKey/models).`);
3328
3428
  if (deepSeekCompatApplicable) {
3329
- appendDeepSeekCompatAdviceLines(lines, missing);
3429
+ appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
3330
3430
  } else {
3331
- appendOpenAIProxyCompatAdviceLines(lines, missing);
3431
+ appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
3332
3432
  }
3333
3433
  } else if (deepSeekCompatApplicable || isCompatCheckApplicable(model)) {
3334
3434
  lines.push("✅ Compat fully configured.");
@@ -3411,6 +3511,13 @@ function buildLowHitDiagnosis(
3411
3511
  const hasRouterRisk = routerNotes.length > 0;
3412
3512
  const hasUsageMissing = missingUsageSamples > 0;
3413
3513
 
3514
+ // Today's cached-token ratio is used both inside and outside the recent-sample
3515
+ // branch. Keep it block-external so doctor/stats never throw for low-hit
3516
+ // models that have persisted counters but no recent in-memory samples.
3517
+ const todayHitRatio = todayStats.totalInputTokens > 0
3518
+ ? Math.round((todayStats.cachedInputTokens / todayStats.totalInputTokens) * 100)
3519
+ : 0;
3520
+
3414
3521
  // Determine if there are actual issues worth flagging
3415
3522
  const hasActualIssues = hasMissingCompat || hasUsageMissing ||
3416
3523
  // Low hit trend (today total > 3 and hit ratio < 30%)
@@ -3451,10 +3558,6 @@ function buildLowHitDiagnosis(
3451
3558
  // Priority 4: recent trend low
3452
3559
  if (recent10Total > 0) {
3453
3560
  const hitRatio = recent10Input > 0 ? Math.round((recent10Cached / recent10Input) * 100) : 0;
3454
- const todayHitRatio = todayStats.totalInputTokens > 0
3455
- ? Math.round((todayStats.cachedInputTokens / todayStats.totalInputTokens) * 100)
3456
- : 0;
3457
-
3458
3561
  if (recent10Hits === 0 && todayStats.totalRequests > 3 && todayHitRatio < 30) {
3459
3562
  lines.push(`📉 Cache hit rate is low: ${todayHitRatio}% today (${recent10Total} recent samples).`);
3460
3563
  lines.push(" Likely causes: proxy routing to different backends per request,");
@@ -3504,9 +3607,9 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
3504
3607
  lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
3505
3608
  lines.push(`(at the same level as baseUrl/api/apiKey/models).`);
3506
3609
  if (deepSeekCompatApplicable) {
3507
- appendDeepSeekCompatAdviceLines(lines, missing);
3610
+ appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
3508
3611
  } else {
3509
- appendOpenAIProxyCompatAdviceLines(lines, missing);
3612
+ appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
3510
3613
  }
3511
3614
  }
3512
3615
 
@@ -3570,6 +3673,7 @@ export const __internals_for_tests = {
3570
3673
  isOfficialOpenAIBaseUrl,
3571
3674
  isCompatCheckApplicable,
3572
3675
  isPromptCacheRetention400Applicable,
3676
+ hasPromptCacheRetentionUnsupportedSignal,
3573
3677
  // Non-GPT OpenAI-compatible model detection
3574
3678
  isKimiLikeModel,
3575
3679
  isKimiLikeAssistantMessage,
@@ -3686,8 +3790,10 @@ export const __internals_for_tests = {
3686
3790
  getAssistantMessageModelTokenValues,
3687
3791
  getCompat,
3688
3792
  modelKey,
3689
- // Platform-friendly path helper
3793
+ // Platform-friendly path helpers
3690
3794
  getModelsJsonDisplayPath,
3795
+ buildProviderCompatOverride,
3796
+ buildModelCompatOverride,
3691
3797
  captureCacheRetentionEnv,
3692
3798
  requestLongCacheRetention,
3693
3799
  restoreCacheRetentionEnv,
@@ -3724,6 +3830,7 @@ export const __internals_for_tests = {
3724
3830
  hashSessionId,
3725
3831
  makeSessionModelKey,
3726
3832
  modelKeyFromSessionKey,
3833
+ filterRestorableStatsForSession,
3727
3834
  // Persistence helpers (for reload/reset tests)
3728
3835
  mergeCacheSessions,
3729
3836
  writePersistedCacheStats,
@@ -3750,6 +3857,15 @@ export default function (pi: ExtensionAPI) {
3750
3857
  /** In-memory recent usage samples per model key (not persisted, cleared on reload). */
3751
3858
  const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
3752
3859
 
3860
+ function syncSessionHash(ctx: Pick<ExtensionContext, "sessionManager">): void {
3861
+ const sid = ctx.sessionManager.getSessionId();
3862
+ if (sid && (sid !== currentSessionId || !currentSessionHashSet)) {
3863
+ currentSessionId = sid;
3864
+ currentSessionHash = hashSessionId(sid);
3865
+ currentSessionHashSet = true;
3866
+ }
3867
+ }
3868
+
3753
3869
  /**
3754
3870
  * Build a session-scoped stats key from the current session hash + model key.
3755
3871
  * Returns `${sessionHash}:${provider}/${id}`.
@@ -3906,13 +4022,7 @@ export default function (pi: ExtensionAPI) {
3906
4022
  }
3907
4023
 
3908
4024
  async function restoreCacheStats(reason: string, ctx: ExtensionContext): Promise<void> {
3909
- // Set session id on first load and on reload (same session).
3910
- const sid = ctx.sessionManager.getSessionId();
3911
- if (sid && (sid !== currentSessionId || !currentSessionHashSet)) {
3912
- currentSessionId = sid;
3913
- currentSessionHash = hashSessionId(sid);
3914
- currentSessionHashSet = true;
3915
- }
4025
+ syncSessionHash(ctx);
3916
4026
 
3917
4027
  if (reason === "reload") {
3918
4028
  // /reload: preserve session-scoped stats (same session hash).
@@ -3924,73 +4034,31 @@ export default function (pi: ExtensionAPI) {
3924
4034
  clearRecentSamples();
3925
4035
 
3926
4036
  const persisted = await readPersistedCacheStats();
3927
- if (persisted && currentSessionHash) {
3928
- const prefix = `${currentSessionHash}:`;
3929
- const filteredModelStats: Record<string, CacheStats> = {};
3930
- for (const [fullKey, stats] of Object.entries(persisted.statsByModel)) {
3931
- if (fullKey.startsWith(prefix)) {
3932
- // Current session's data
3933
- filteredModelStats[fullKey] = stats;
3934
- } else if (!fullKey.includes(":")) {
3935
- // Legacy v3-style key without session hash — migrate to current session
3936
- filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
3937
- } else if (fullKey.startsWith("_nosession:")) {
3938
- // _nosession migration remnant from old-path v4 write — migrate to current session
3939
- filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
3940
- }
3941
- }
3942
- cacheStatsByModel = filteredModelStats;
3943
- cacheStatsLegacyFamily = persisted.legacyFamily;
3944
- } else if (persisted) {
3945
- cacheStatsByModel = persisted.statsByModel;
3946
- cacheStatsLegacyFamily = persisted.legacyFamily;
3947
- } else {
3948
- cacheStatsByModel = {};
3949
- cacheStatsLegacyFamily = emptyAllCacheStats();
3950
- }
4037
+ cacheStatsByModel = filterRestorableStatsForSession(
4038
+ persisted,
4039
+ currentSessionHashSet ? currentSessionHash : undefined,
4040
+ );
4041
+ cacheStatsLegacyFamily = persisted?.legacyFamily ?? emptyAllCacheStats();
3951
4042
 
3952
4043
  await rollOverStatsIfNeeded(ctx);
3953
4044
  return;
3954
4045
  }
3955
4046
 
3956
4047
  // First load / process start: read persisted stats and filter for
3957
- // this session's entries. If the session has no persisted data yet,
3958
- // start fresh.
4048
+ // this session's entries. If the session hash is unavailable, start
4049
+ // fresh instead of loading all persisted session buckets.
3959
4050
  const persisted = await readPersistedCacheStats();
3960
- if (persisted && currentSessionHash) {
3961
- const prefix = `${currentSessionHash}:`;
3962
- const filteredModelStats: Record<string, CacheStats> = {};
3963
- for (const [fullKey, stats] of Object.entries(persisted.statsByModel)) {
3964
- if (fullKey.startsWith(prefix)) {
3965
- // Current session's data — load it.
3966
- filteredModelStats[fullKey] = stats;
3967
- } else if (!fullKey.includes(":")) {
3968
- // Legacy v3-style key without session hash (e.g. "otokapi/gpt-5.5").
3969
- // Migrate to current session by prefixing with the session hash.
3970
- filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
3971
- } else if (fullKey.startsWith("_nosession:")) {
3972
- // _nosession migration remnant from old-path v4 write — migrate to current session
3973
- filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
3974
- }
3975
- // Other sessions' entries are preserved in the file but not loaded
3976
- // into memory; they'll be rewritten on next persist.
3977
- }
3978
- cacheStatsByModel = filteredModelStats;
3979
- cacheStatsLegacyFamily = persisted.legacyFamily;
3980
- } else if (persisted) {
3981
- // Persisted data exists but no session hash set yet.
3982
- // This shouldn't normally happen — use the data as-is.
3983
- cacheStatsByModel = persisted.statsByModel;
3984
- cacheStatsLegacyFamily = persisted.legacyFamily;
3985
- } else {
3986
- cacheStatsByModel = {};
3987
- cacheStatsLegacyFamily = emptyAllCacheStats();
3988
- }
4051
+ cacheStatsByModel = filterRestorableStatsForSession(
4052
+ persisted,
4053
+ currentSessionHashSet ? currentSessionHash : undefined,
4054
+ );
4055
+ cacheStatsLegacyFamily = persisted?.legacyFamily ?? emptyAllCacheStats();
3989
4056
  lastStatusText = undefined;
3990
4057
  await rollOverStatsIfNeeded(ctx);
3991
4058
  }
3992
4059
 
3993
4060
  async function publishStatus(ctx: ExtensionContext, model: PiModel | undefined = ctx.model): Promise<void> {
4061
+ syncSessionHash(ctx);
3994
4062
  await rollOverStatsIfNeeded(ctx);
3995
4063
 
3996
4064
  const adapter = selectAdapterForModel(model);
@@ -4158,6 +4226,7 @@ export default function (pi: ExtensionAPI) {
4158
4226
  if (!runtimeOptimizerEnabled || !model) return;
4159
4227
  if (event.status !== 400) return;
4160
4228
  if (!isPromptCacheRetention400Applicable(model)) return;
4229
+ if (!hasPromptCacheRetentionUnsupportedSignal(event.headers)) return;
4161
4230
 
4162
4231
  const key = modelKey(model);
4163
4232
  promptCacheRetention400Models.add(key);
@@ -4172,6 +4241,7 @@ export default function (pi: ExtensionAPI) {
4172
4241
  });
4173
4242
 
4174
4243
  pi.on("message_end", async (event, ctx) => {
4244
+ syncSessionHash(ctx);
4175
4245
  const adapter = selectAdapterForAssistantMessage(event.message, ctx.model);
4176
4246
  if (!adapter) return;
4177
4247
 
@@ -4218,6 +4288,7 @@ export default function (pi: ExtensionAPI) {
4218
4288
  pi.registerCommand("cache-optimizer", {
4219
4289
  description: "Diagnose Pi cache configuration",
4220
4290
  handler: async (args: string, cmdCtx) => {
4291
+ syncSessionHash(cmdCtx);
4221
4292
  const model = cmdCtx.model;
4222
4293
  const subcommand = args.trim().toLowerCase().split(/\s+/)[0] || "help";
4223
4294
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-cache-optimizer",
3
- "version": "2.5.4",
3
+ "version": "2.5.6",
4
4
  "description": "Improve Pi prompt/KV cache hit rates with stable prompts, OpenAI-compatible cache keys, proxy compat warnings, and footer cache stats.",
5
5
  "keywords": [
6
6
  "pi-package",