pi-cache-optimizer 2.5.4 → 2.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -0
- package/README.zh-CN.md +36 -0
- package/index.ts +147 -76
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -101,6 +101,42 @@ Notes:
|
|
|
101
101
|
- For DeepSeek models, the Pi Mono guidance expects `compat.requiresReasoningContentOnAssistantMessages: true` and `compat.thinkingFormat: "deepseek"` alongside cache/session-affinity flags when the endpoint supports them.
|
|
102
102
|
- This extension only advises; it does not edit `models.json`.
|
|
103
103
|
|
|
104
|
+
### Channels without a `models.json` provider entry
|
|
105
|
+
|
|
106
|
+
Some Pi channels may be available even when there is no provider block in `~/.pi/agent/models.json` yet. Keep existing authentication as-is and do not copy credentials, tokens, or API keys. Add only cache/routing compatibility overrides in `models.json`.
|
|
107
|
+
|
|
108
|
+
Provider-level minimal override:
|
|
109
|
+
|
|
110
|
+
```json
|
|
111
|
+
{
|
|
112
|
+
"providers": {
|
|
113
|
+
"your-provider-id": {
|
|
114
|
+
"compat": {
|
|
115
|
+
"sendSessionAffinityHeaders": true
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
If only one model should change, use `modelOverrides`:
|
|
123
|
+
|
|
124
|
+
```json
|
|
125
|
+
{
|
|
126
|
+
"providers": {
|
|
127
|
+
"your-provider-id": {
|
|
128
|
+
"modelOverrides": {
|
|
129
|
+
"gpt-5.5": {
|
|
130
|
+
"compat": {
|
|
131
|
+
"sendSessionAffinityHeaders": true
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
104
140
|
## Footer stats
|
|
105
141
|
|
|
106
142
|
Stats are read-only local counters stored at `~/.pi/agent/pi-cache-optimizer-stats.json` and scoped by Pi session + provider/model. They contain only dates and numeric counters — no API keys, prompts, payloads, headers, responses, or model output.
|
package/README.zh-CN.md
CHANGED
|
@@ -101,6 +101,42 @@ LiteLLM / OneAPI / NewAPI / 类 OpenRouter 渠道等第三方 `openai-completion
|
|
|
101
101
|
- 对 DeepSeek 模型,Pi Mono 指南期望在支持时同时设置 `compat.requiresReasoningContentOnAssistantMessages: true` 和 `compat.thinkingFormat: "deepseek"`,再配合缓存 / session-affinity 相关 compat。
|
|
102
102
|
- 本扩展只给建议,不会修改 `models.json`。
|
|
103
103
|
|
|
104
|
+
### 没有 `models.json` provider entry 的渠道
|
|
105
|
+
|
|
106
|
+
有些 Pi 渠道可用时,`~/.pi/agent/models.json` 里可能还没有对应 provider block。保留现有认证方式,不要复制 credential、token 或 API key。只在 `models.json` 里添加缓存 / 路由兼容覆盖。
|
|
107
|
+
|
|
108
|
+
Provider 级最小 override:
|
|
109
|
+
|
|
110
|
+
```json
|
|
111
|
+
{
|
|
112
|
+
"providers": {
|
|
113
|
+
"your-provider-id": {
|
|
114
|
+
"compat": {
|
|
115
|
+
"sendSessionAffinityHeaders": true
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
如果只想影响单个模型,用 `modelOverrides`:
|
|
123
|
+
|
|
124
|
+
```json
|
|
125
|
+
{
|
|
126
|
+
"providers": {
|
|
127
|
+
"your-provider-id": {
|
|
128
|
+
"modelOverrides": {
|
|
129
|
+
"gpt-5.5": {
|
|
130
|
+
"compat": {
|
|
131
|
+
"sendSessionAffinityHeaders": true
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
104
140
|
## Footer 统计
|
|
105
141
|
|
|
106
142
|
统计是只读本地计数,保存在 `~/.pi/agent/pi-cache-optimizer-stats.json`,按 Pi session + provider/model 隔离。文件只包含日期和数字计数,不包含 API key、prompt、payload、headers、响应或模型输出。
|
package/index.ts
CHANGED
|
@@ -1476,7 +1476,9 @@ function isNonEmptyString(value: unknown): boolean {
|
|
|
1476
1476
|
|
|
1477
1477
|
function isOfficialOpenAIBaseUrl(model: PiModel): boolean {
|
|
1478
1478
|
const value = lower(model.baseUrl).trim();
|
|
1479
|
-
if (!value)
|
|
1479
|
+
if (!value) {
|
|
1480
|
+
return lower(model.provider) === "openai";
|
|
1481
|
+
}
|
|
1480
1482
|
|
|
1481
1483
|
try {
|
|
1482
1484
|
return new URL(value).hostname === "api.openai.com";
|
|
@@ -1538,7 +1540,76 @@ function getPromptCacheRetentionUnsupportedHint(): string {
|
|
|
1538
1540
|
return "If this channel returns `400 Unsupported parameter: prompt_cache_retention`, remove/avoid `supportsLongCacheRetention`; this extension does not write that field directly, but Pi may send it when long retention is requested and compat says the proxy supports it.";
|
|
1539
1541
|
}
|
|
1540
1542
|
|
|
1541
|
-
function
|
|
1543
|
+
function hasPromptCacheRetentionUnsupportedSignal(headers: Record<string, string> | undefined): boolean {
|
|
1544
|
+
if (!headers) return false;
|
|
1545
|
+
|
|
1546
|
+
const normalized = Object.entries(headers)
|
|
1547
|
+
.map(([key, value]) => `${lower(key)}: ${lower(value)}`)
|
|
1548
|
+
.join("\n");
|
|
1549
|
+
if (!normalized.includes("prompt_cache_retention")) return false;
|
|
1550
|
+
|
|
1551
|
+
return [
|
|
1552
|
+
"unsupported parameter",
|
|
1553
|
+
"unsupported_parameter",
|
|
1554
|
+
"unknown parameter",
|
|
1555
|
+
"not supported",
|
|
1556
|
+
"unsupported field",
|
|
1557
|
+
].some((needle) => normalized.includes(needle));
|
|
1558
|
+
}
|
|
1559
|
+
|
|
1560
|
+
type CompatAdvicePlacement = {
|
|
1561
|
+
providerLabel?: string;
|
|
1562
|
+
modelId?: string;
|
|
1563
|
+
};
|
|
1564
|
+
|
|
1565
|
+
function buildProviderCompatOverride(providerLabel: string, compat: Record<string, unknown>): Record<string, unknown> {
|
|
1566
|
+
return {
|
|
1567
|
+
providers: {
|
|
1568
|
+
[providerLabel]: {
|
|
1569
|
+
compat,
|
|
1570
|
+
},
|
|
1571
|
+
},
|
|
1572
|
+
};
|
|
1573
|
+
}
|
|
1574
|
+
|
|
1575
|
+
function buildModelCompatOverride(providerLabel: string, modelId: string, compat: Record<string, unknown>): Record<string, unknown> {
|
|
1576
|
+
return {
|
|
1577
|
+
providers: {
|
|
1578
|
+
[providerLabel]: {
|
|
1579
|
+
modelOverrides: {
|
|
1580
|
+
[modelId]: {
|
|
1581
|
+
compat,
|
|
1582
|
+
},
|
|
1583
|
+
},
|
|
1584
|
+
},
|
|
1585
|
+
},
|
|
1586
|
+
};
|
|
1587
|
+
}
|
|
1588
|
+
|
|
1589
|
+
function appendCredentialSafeProviderGuidance(lines: string[], placement: CompatAdvicePlacement, compatSuggestion: Record<string, unknown>): void {
|
|
1590
|
+
const providerLabel = placement.providerLabel;
|
|
1591
|
+
if (!providerLabel) return;
|
|
1592
|
+
|
|
1593
|
+
lines.push("");
|
|
1594
|
+
lines.push("If this channel has no models.json provider entry yet:");
|
|
1595
|
+
lines.push("- Keep existing authentication as-is; do not copy credentials, tokens, or API keys.");
|
|
1596
|
+
lines.push(`- Add only cache/routing compat overrides in ${getModelsJsonDisplayPath()}.`);
|
|
1597
|
+
|
|
1598
|
+
if (Object.keys(compatSuggestion).length === 0) {
|
|
1599
|
+
lines.push("- No safe copyable override is available for the missing flags shown above.");
|
|
1600
|
+
return;
|
|
1601
|
+
}
|
|
1602
|
+
|
|
1603
|
+
lines.push("Provider-level minimal override:");
|
|
1604
|
+
lines.push(JSON.stringify(buildProviderCompatOverride(providerLabel, compatSuggestion), null, 2));
|
|
1605
|
+
|
|
1606
|
+
if (placement.modelId) {
|
|
1607
|
+
lines.push("Single-model override (use this if only this model should change):");
|
|
1608
|
+
lines.push(JSON.stringify(buildModelCompatOverride(providerLabel, placement.modelId, compatSuggestion), null, 2));
|
|
1609
|
+
}
|
|
1610
|
+
}
|
|
1611
|
+
|
|
1612
|
+
function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[], options: { includeJsonIntro?: boolean } & CompatAdvicePlacement = {}): void {
|
|
1542
1613
|
const suggestion = buildSafeOpenAIProxyCompatSuggestion(missing);
|
|
1543
1614
|
const hasSafeSuggestion = Object.keys(suggestion).length > 0;
|
|
1544
1615
|
|
|
@@ -1558,6 +1629,8 @@ function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[],
|
|
|
1558
1629
|
lines.push("- supportsLongCacheRetention: optional. Enable only after your endpoint/proxy explicitly supports OpenAI long prompt cache retention.");
|
|
1559
1630
|
lines.push(`- ${getPromptCacheRetentionUnsupportedHint()}`);
|
|
1560
1631
|
}
|
|
1632
|
+
|
|
1633
|
+
appendCredentialSafeProviderGuidance(lines, options, suggestion);
|
|
1561
1634
|
}
|
|
1562
1635
|
|
|
1563
1636
|
/**
|
|
@@ -1577,6 +1650,7 @@ function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): stri
|
|
|
1577
1650
|
// If no slash is found, fall back to the key itself.
|
|
1578
1651
|
const slashIdx = key.indexOf("/");
|
|
1579
1652
|
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
1653
|
+
const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
|
|
1580
1654
|
|
|
1581
1655
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
1582
1656
|
const lines: string[] = [
|
|
@@ -1585,7 +1659,7 @@ function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): stri
|
|
|
1585
1659
|
``,
|
|
1586
1660
|
];
|
|
1587
1661
|
|
|
1588
|
-
appendOpenAIProxyCompatAdviceLines(lines, missing);
|
|
1662
|
+
appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId });
|
|
1589
1663
|
|
|
1590
1664
|
return lines.join("\n");
|
|
1591
1665
|
}
|
|
@@ -1647,7 +1721,7 @@ function buildDeepSeekCompatSuggestion(missing: string[]): Record<string, unknow
|
|
|
1647
1721
|
return suggestion;
|
|
1648
1722
|
}
|
|
1649
1723
|
|
|
1650
|
-
function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[]): void {
|
|
1724
|
+
function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[], placement: CompatAdvicePlacement = {}): void {
|
|
1651
1725
|
const suggestion = buildDeepSeekCompatSuggestion(missing);
|
|
1652
1726
|
if (Object.keys(suggestion).length > 0) {
|
|
1653
1727
|
lines.push("Recommended DeepSeek compat snippet:");
|
|
@@ -1669,11 +1743,14 @@ function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[]): vo
|
|
|
1669
1743
|
if (missing.includes("supportsLongCacheRetention")) {
|
|
1670
1744
|
lines.push("- supportsLongCacheRetention: enable for DeepSeek-compatible endpoints that support long cache retention.");
|
|
1671
1745
|
}
|
|
1746
|
+
|
|
1747
|
+
appendCredentialSafeProviderGuidance(lines, placement, suggestion);
|
|
1672
1748
|
}
|
|
1673
1749
|
|
|
1674
1750
|
function buildDeepSeekCompatWarningText(key: string, missing: string[]): string {
|
|
1675
1751
|
const slashIdx = key.indexOf("/");
|
|
1676
1752
|
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
1753
|
+
const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
|
|
1677
1754
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
1678
1755
|
const lines: string[] = [
|
|
1679
1756
|
`💡 pi-cache-optimizer: ${key} is DeepSeek-like but merged compat lacks ${missing.join(" and ")}.`,
|
|
@@ -1681,7 +1758,7 @@ function buildDeepSeekCompatWarningText(key: string, missing: string[]): string
|
|
|
1681
1758
|
"",
|
|
1682
1759
|
];
|
|
1683
1760
|
|
|
1684
|
-
appendDeepSeekCompatAdviceLines(lines, missing);
|
|
1761
|
+
appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId });
|
|
1685
1762
|
|
|
1686
1763
|
return lines.join("\n");
|
|
1687
1764
|
}
|
|
@@ -3001,6 +3078,29 @@ async function readPersistedCacheStats(): Promise<CacheStatsState | undefined> {
|
|
|
3001
3078
|
return undefined;
|
|
3002
3079
|
}
|
|
3003
3080
|
|
|
3081
|
+
function filterRestorableStatsForSession(
|
|
3082
|
+
persisted: CacheStatsState | undefined,
|
|
3083
|
+
currentSessionHash?: string,
|
|
3084
|
+
): Record<string, CacheStats> {
|
|
3085
|
+
if (!persisted || !currentSessionHash) return {};
|
|
3086
|
+
|
|
3087
|
+
const prefix = `${currentSessionHash}:`;
|
|
3088
|
+
const filteredModelStats: Record<string, CacheStats> = {};
|
|
3089
|
+
for (const [fullKey, stats] of Object.entries(persisted.statsByModel)) {
|
|
3090
|
+
if (fullKey.startsWith(prefix)) {
|
|
3091
|
+
filteredModelStats[fullKey] = stats;
|
|
3092
|
+
} else if (!fullKey.includes(":")) {
|
|
3093
|
+
// Legacy v3-style key without session hash — migrate to current session.
|
|
3094
|
+
filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
|
|
3095
|
+
} else if (fullKey.startsWith("_nosession:")) {
|
|
3096
|
+
// Transitional _nosession bucket — migrate to current session.
|
|
3097
|
+
filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
|
|
3098
|
+
}
|
|
3099
|
+
}
|
|
3100
|
+
|
|
3101
|
+
return filteredModelStats;
|
|
3102
|
+
}
|
|
3103
|
+
|
|
3004
3104
|
/**
|
|
3005
3105
|
* The closure-internal writer. Since the closure has access to currentSessionHash,
|
|
3006
3106
|
* it passes the hash and statsByModel here. This function wraps them in the v4
|
|
@@ -3326,9 +3426,9 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
|
|
|
3326
3426
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
3327
3427
|
lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (same level as baseUrl/api/apiKey/models).`);
|
|
3328
3428
|
if (deepSeekCompatApplicable) {
|
|
3329
|
-
appendDeepSeekCompatAdviceLines(lines, missing);
|
|
3429
|
+
appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
|
|
3330
3430
|
} else {
|
|
3331
|
-
appendOpenAIProxyCompatAdviceLines(lines, missing);
|
|
3431
|
+
appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
|
|
3332
3432
|
}
|
|
3333
3433
|
} else if (deepSeekCompatApplicable || isCompatCheckApplicable(model)) {
|
|
3334
3434
|
lines.push("✅ Compat fully configured.");
|
|
@@ -3411,6 +3511,13 @@ function buildLowHitDiagnosis(
|
|
|
3411
3511
|
const hasRouterRisk = routerNotes.length > 0;
|
|
3412
3512
|
const hasUsageMissing = missingUsageSamples > 0;
|
|
3413
3513
|
|
|
3514
|
+
// Today's cached-token ratio is used both inside and outside the recent-sample
|
|
3515
|
+
// branch. Keep it block-external so doctor/stats never throw for low-hit
|
|
3516
|
+
// models that have persisted counters but no recent in-memory samples.
|
|
3517
|
+
const todayHitRatio = todayStats.totalInputTokens > 0
|
|
3518
|
+
? Math.round((todayStats.cachedInputTokens / todayStats.totalInputTokens) * 100)
|
|
3519
|
+
: 0;
|
|
3520
|
+
|
|
3414
3521
|
// Determine if there are actual issues worth flagging
|
|
3415
3522
|
const hasActualIssues = hasMissingCompat || hasUsageMissing ||
|
|
3416
3523
|
// Low hit trend (today total > 3 and hit ratio < 30%)
|
|
@@ -3451,10 +3558,6 @@ function buildLowHitDiagnosis(
|
|
|
3451
3558
|
// Priority 4: recent trend low
|
|
3452
3559
|
if (recent10Total > 0) {
|
|
3453
3560
|
const hitRatio = recent10Input > 0 ? Math.round((recent10Cached / recent10Input) * 100) : 0;
|
|
3454
|
-
const todayHitRatio = todayStats.totalInputTokens > 0
|
|
3455
|
-
? Math.round((todayStats.cachedInputTokens / todayStats.totalInputTokens) * 100)
|
|
3456
|
-
: 0;
|
|
3457
|
-
|
|
3458
3561
|
if (recent10Hits === 0 && todayStats.totalRequests > 3 && todayHitRatio < 30) {
|
|
3459
3562
|
lines.push(`📉 Cache hit rate is low: ${todayHitRatio}% today (${recent10Total} recent samples).`);
|
|
3460
3563
|
lines.push(" Likely causes: proxy routing to different backends per request,");
|
|
@@ -3504,9 +3607,9 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
|
|
|
3504
3607
|
lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
|
|
3505
3608
|
lines.push(`(at the same level as baseUrl/api/apiKey/models).`);
|
|
3506
3609
|
if (deepSeekCompatApplicable) {
|
|
3507
|
-
appendDeepSeekCompatAdviceLines(lines, missing);
|
|
3610
|
+
appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
|
|
3508
3611
|
} else {
|
|
3509
|
-
appendOpenAIProxyCompatAdviceLines(lines, missing);
|
|
3612
|
+
appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
|
|
3510
3613
|
}
|
|
3511
3614
|
}
|
|
3512
3615
|
|
|
@@ -3570,6 +3673,7 @@ export const __internals_for_tests = {
|
|
|
3570
3673
|
isOfficialOpenAIBaseUrl,
|
|
3571
3674
|
isCompatCheckApplicable,
|
|
3572
3675
|
isPromptCacheRetention400Applicable,
|
|
3676
|
+
hasPromptCacheRetentionUnsupportedSignal,
|
|
3573
3677
|
// Non-GPT OpenAI-compatible model detection
|
|
3574
3678
|
isKimiLikeModel,
|
|
3575
3679
|
isKimiLikeAssistantMessage,
|
|
@@ -3686,8 +3790,10 @@ export const __internals_for_tests = {
|
|
|
3686
3790
|
getAssistantMessageModelTokenValues,
|
|
3687
3791
|
getCompat,
|
|
3688
3792
|
modelKey,
|
|
3689
|
-
// Platform-friendly path
|
|
3793
|
+
// Platform-friendly path helpers
|
|
3690
3794
|
getModelsJsonDisplayPath,
|
|
3795
|
+
buildProviderCompatOverride,
|
|
3796
|
+
buildModelCompatOverride,
|
|
3691
3797
|
captureCacheRetentionEnv,
|
|
3692
3798
|
requestLongCacheRetention,
|
|
3693
3799
|
restoreCacheRetentionEnv,
|
|
@@ -3724,6 +3830,7 @@ export const __internals_for_tests = {
|
|
|
3724
3830
|
hashSessionId,
|
|
3725
3831
|
makeSessionModelKey,
|
|
3726
3832
|
modelKeyFromSessionKey,
|
|
3833
|
+
filterRestorableStatsForSession,
|
|
3727
3834
|
// Persistence helpers (for reload/reset tests)
|
|
3728
3835
|
mergeCacheSessions,
|
|
3729
3836
|
writePersistedCacheStats,
|
|
@@ -3750,6 +3857,15 @@ export default function (pi: ExtensionAPI) {
|
|
|
3750
3857
|
/** In-memory recent usage samples per model key (not persisted, cleared on reload). */
|
|
3751
3858
|
const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
|
|
3752
3859
|
|
|
3860
|
+
function syncSessionHash(ctx: Pick<ExtensionContext, "sessionManager">): void {
|
|
3861
|
+
const sid = ctx.sessionManager.getSessionId();
|
|
3862
|
+
if (sid && (sid !== currentSessionId || !currentSessionHashSet)) {
|
|
3863
|
+
currentSessionId = sid;
|
|
3864
|
+
currentSessionHash = hashSessionId(sid);
|
|
3865
|
+
currentSessionHashSet = true;
|
|
3866
|
+
}
|
|
3867
|
+
}
|
|
3868
|
+
|
|
3753
3869
|
/**
|
|
3754
3870
|
* Build a session-scoped stats key from the current session hash + model key.
|
|
3755
3871
|
* Returns `${sessionHash}:${provider}/${id}`.
|
|
@@ -3906,13 +4022,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
3906
4022
|
}
|
|
3907
4023
|
|
|
3908
4024
|
async function restoreCacheStats(reason: string, ctx: ExtensionContext): Promise<void> {
|
|
3909
|
-
|
|
3910
|
-
const sid = ctx.sessionManager.getSessionId();
|
|
3911
|
-
if (sid && (sid !== currentSessionId || !currentSessionHashSet)) {
|
|
3912
|
-
currentSessionId = sid;
|
|
3913
|
-
currentSessionHash = hashSessionId(sid);
|
|
3914
|
-
currentSessionHashSet = true;
|
|
3915
|
-
}
|
|
4025
|
+
syncSessionHash(ctx);
|
|
3916
4026
|
|
|
3917
4027
|
if (reason === "reload") {
|
|
3918
4028
|
// /reload: preserve session-scoped stats (same session hash).
|
|
@@ -3924,73 +4034,31 @@ export default function (pi: ExtensionAPI) {
|
|
|
3924
4034
|
clearRecentSamples();
|
|
3925
4035
|
|
|
3926
4036
|
const persisted = await readPersistedCacheStats();
|
|
3927
|
-
|
|
3928
|
-
|
|
3929
|
-
|
|
3930
|
-
|
|
3931
|
-
|
|
3932
|
-
// Current session's data
|
|
3933
|
-
filteredModelStats[fullKey] = stats;
|
|
3934
|
-
} else if (!fullKey.includes(":")) {
|
|
3935
|
-
// Legacy v3-style key without session hash — migrate to current session
|
|
3936
|
-
filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
|
|
3937
|
-
} else if (fullKey.startsWith("_nosession:")) {
|
|
3938
|
-
// _nosession migration remnant from old-path v4 write — migrate to current session
|
|
3939
|
-
filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
|
|
3940
|
-
}
|
|
3941
|
-
}
|
|
3942
|
-
cacheStatsByModel = filteredModelStats;
|
|
3943
|
-
cacheStatsLegacyFamily = persisted.legacyFamily;
|
|
3944
|
-
} else if (persisted) {
|
|
3945
|
-
cacheStatsByModel = persisted.statsByModel;
|
|
3946
|
-
cacheStatsLegacyFamily = persisted.legacyFamily;
|
|
3947
|
-
} else {
|
|
3948
|
-
cacheStatsByModel = {};
|
|
3949
|
-
cacheStatsLegacyFamily = emptyAllCacheStats();
|
|
3950
|
-
}
|
|
4037
|
+
cacheStatsByModel = filterRestorableStatsForSession(
|
|
4038
|
+
persisted,
|
|
4039
|
+
currentSessionHashSet ? currentSessionHash : undefined,
|
|
4040
|
+
);
|
|
4041
|
+
cacheStatsLegacyFamily = persisted?.legacyFamily ?? emptyAllCacheStats();
|
|
3951
4042
|
|
|
3952
4043
|
await rollOverStatsIfNeeded(ctx);
|
|
3953
4044
|
return;
|
|
3954
4045
|
}
|
|
3955
4046
|
|
|
3956
4047
|
// First load / process start: read persisted stats and filter for
|
|
3957
|
-
// this session's entries. If the session
|
|
3958
|
-
//
|
|
4048
|
+
// this session's entries. If the session hash is unavailable, start
|
|
4049
|
+
// fresh instead of loading all persisted session buckets.
|
|
3959
4050
|
const persisted = await readPersistedCacheStats();
|
|
3960
|
-
|
|
3961
|
-
|
|
3962
|
-
|
|
3963
|
-
|
|
3964
|
-
|
|
3965
|
-
// Current session's data — load it.
|
|
3966
|
-
filteredModelStats[fullKey] = stats;
|
|
3967
|
-
} else if (!fullKey.includes(":")) {
|
|
3968
|
-
// Legacy v3-style key without session hash (e.g. "otokapi/gpt-5.5").
|
|
3969
|
-
// Migrate to current session by prefixing with the session hash.
|
|
3970
|
-
filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
|
|
3971
|
-
} else if (fullKey.startsWith("_nosession:")) {
|
|
3972
|
-
// _nosession migration remnant from old-path v4 write — migrate to current session
|
|
3973
|
-
filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
|
|
3974
|
-
}
|
|
3975
|
-
// Other sessions' entries are preserved in the file but not loaded
|
|
3976
|
-
// into memory; they'll be rewritten on next persist.
|
|
3977
|
-
}
|
|
3978
|
-
cacheStatsByModel = filteredModelStats;
|
|
3979
|
-
cacheStatsLegacyFamily = persisted.legacyFamily;
|
|
3980
|
-
} else if (persisted) {
|
|
3981
|
-
// Persisted data exists but no session hash set yet.
|
|
3982
|
-
// This shouldn't normally happen — use the data as-is.
|
|
3983
|
-
cacheStatsByModel = persisted.statsByModel;
|
|
3984
|
-
cacheStatsLegacyFamily = persisted.legacyFamily;
|
|
3985
|
-
} else {
|
|
3986
|
-
cacheStatsByModel = {};
|
|
3987
|
-
cacheStatsLegacyFamily = emptyAllCacheStats();
|
|
3988
|
-
}
|
|
4051
|
+
cacheStatsByModel = filterRestorableStatsForSession(
|
|
4052
|
+
persisted,
|
|
4053
|
+
currentSessionHashSet ? currentSessionHash : undefined,
|
|
4054
|
+
);
|
|
4055
|
+
cacheStatsLegacyFamily = persisted?.legacyFamily ?? emptyAllCacheStats();
|
|
3989
4056
|
lastStatusText = undefined;
|
|
3990
4057
|
await rollOverStatsIfNeeded(ctx);
|
|
3991
4058
|
}
|
|
3992
4059
|
|
|
3993
4060
|
async function publishStatus(ctx: ExtensionContext, model: PiModel | undefined = ctx.model): Promise<void> {
|
|
4061
|
+
syncSessionHash(ctx);
|
|
3994
4062
|
await rollOverStatsIfNeeded(ctx);
|
|
3995
4063
|
|
|
3996
4064
|
const adapter = selectAdapterForModel(model);
|
|
@@ -4158,6 +4226,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
4158
4226
|
if (!runtimeOptimizerEnabled || !model) return;
|
|
4159
4227
|
if (event.status !== 400) return;
|
|
4160
4228
|
if (!isPromptCacheRetention400Applicable(model)) return;
|
|
4229
|
+
if (!hasPromptCacheRetentionUnsupportedSignal(event.headers)) return;
|
|
4161
4230
|
|
|
4162
4231
|
const key = modelKey(model);
|
|
4163
4232
|
promptCacheRetention400Models.add(key);
|
|
@@ -4172,6 +4241,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
4172
4241
|
});
|
|
4173
4242
|
|
|
4174
4243
|
pi.on("message_end", async (event, ctx) => {
|
|
4244
|
+
syncSessionHash(ctx);
|
|
4175
4245
|
const adapter = selectAdapterForAssistantMessage(event.message, ctx.model);
|
|
4176
4246
|
if (!adapter) return;
|
|
4177
4247
|
|
|
@@ -4218,6 +4288,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
4218
4288
|
pi.registerCommand("cache-optimizer", {
|
|
4219
4289
|
description: "Diagnose Pi cache configuration",
|
|
4220
4290
|
handler: async (args: string, cmdCtx) => {
|
|
4291
|
+
syncSessionHash(cmdCtx);
|
|
4221
4292
|
const model = cmdCtx.model;
|
|
4222
4293
|
const subcommand = args.trim().toLowerCase().split(/\s+/)[0] || "help";
|
|
4223
4294
|
|
package/package.json
CHANGED