pi-cache-optimizer 2.5.5 → 2.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -0
- package/README.zh-CN.md +36 -0
- package/index.ts +199 -89
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -101,6 +101,42 @@ Notes:
|
|
|
101
101
|
- For DeepSeek models, the Pi Mono guidance expects `compat.requiresReasoningContentOnAssistantMessages: true` and `compat.thinkingFormat: "deepseek"` alongside cache/session-affinity flags when the endpoint supports them.
|
|
102
102
|
- This extension only advises; it does not edit `models.json`.
|
|
103
103
|
|
|
104
|
+
### Channels without a `models.json` provider entry
|
|
105
|
+
|
|
106
|
+
Some Pi channels may be available even when there is no provider block in `~/.pi/agent/models.json` yet. Keep existing authentication as-is and do not copy credentials, tokens, or API keys. Add only cache/routing compatibility overrides in `models.json`.
|
|
107
|
+
|
|
108
|
+
Provider-level minimal override:
|
|
109
|
+
|
|
110
|
+
```json
|
|
111
|
+
{
|
|
112
|
+
"providers": {
|
|
113
|
+
"your-provider-id": {
|
|
114
|
+
"compat": {
|
|
115
|
+
"sendSessionAffinityHeaders": true
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
If only one model should change, use `modelOverrides`:
|
|
123
|
+
|
|
124
|
+
```json
|
|
125
|
+
{
|
|
126
|
+
"providers": {
|
|
127
|
+
"your-provider-id": {
|
|
128
|
+
"modelOverrides": {
|
|
129
|
+
"gpt-5.5": {
|
|
130
|
+
"compat": {
|
|
131
|
+
"sendSessionAffinityHeaders": true
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
104
140
|
## Footer stats
|
|
105
141
|
|
|
106
142
|
Stats are read-only local counters stored at `~/.pi/agent/pi-cache-optimizer-stats.json` and scoped by Pi session + provider/model. They contain only dates and numeric counters — no API keys, prompts, payloads, headers, responses, or model output.
|
package/README.zh-CN.md
CHANGED
|
@@ -101,6 +101,42 @@ LiteLLM / OneAPI / NewAPI / 类 OpenRouter 渠道等第三方 `openai-completion
|
|
|
101
101
|
- 对 DeepSeek 模型,Pi Mono 指南期望在支持时同时设置 `compat.requiresReasoningContentOnAssistantMessages: true` 和 `compat.thinkingFormat: "deepseek"`,再配合缓存 / session-affinity 相关 compat。
|
|
102
102
|
- 本扩展只给建议,不会修改 `models.json`。
|
|
103
103
|
|
|
104
|
+
### 没有 `models.json` provider entry 的渠道
|
|
105
|
+
|
|
106
|
+
有些 Pi 渠道可用时,`~/.pi/agent/models.json` 里可能还没有对应 provider block。保留现有认证方式,不要复制 credential、token 或 API key。只在 `models.json` 里添加缓存 / 路由兼容覆盖。
|
|
107
|
+
|
|
108
|
+
Provider 级最小 override:
|
|
109
|
+
|
|
110
|
+
```json
|
|
111
|
+
{
|
|
112
|
+
"providers": {
|
|
113
|
+
"your-provider-id": {
|
|
114
|
+
"compat": {
|
|
115
|
+
"sendSessionAffinityHeaders": true
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
如果只想影响单个模型,用 `modelOverrides`:
|
|
123
|
+
|
|
124
|
+
```json
|
|
125
|
+
{
|
|
126
|
+
"providers": {
|
|
127
|
+
"your-provider-id": {
|
|
128
|
+
"modelOverrides": {
|
|
129
|
+
"gpt-5.5": {
|
|
130
|
+
"compat": {
|
|
131
|
+
"sendSessionAffinityHeaders": true
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
104
140
|
## Footer 统计
|
|
105
141
|
|
|
106
142
|
统计是只读本地计数,保存在 `~/.pi/agent/pi-cache-optimizer-stats.json`,按 Pi session + provider/model 隔离。文件只包含日期和数字计数,不包含 API key、prompt、payload、headers、响应或模型输出。
|
package/index.ts
CHANGED
|
@@ -775,6 +775,19 @@ function isOpenAICompatibleApi(api: unknown): boolean {
|
|
|
775
775
|
return value === "openai-completions" || value === "openai-responses";
|
|
776
776
|
}
|
|
777
777
|
|
|
778
|
+
function isOpenAICompatibleProxyApi(api: unknown): boolean {
|
|
779
|
+
return lower(api) === "openai-completions";
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
function isResponsesPromptRewriteBypassApi(api: unknown): boolean {
|
|
783
|
+
const value = lower(api);
|
|
784
|
+
return value === "openai-codex-responses" || value === "openai-responses" || value === "azure-openai-responses";
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
function isMistralConversationsApi(api: unknown): boolean {
|
|
788
|
+
return lower(api) === "mistral-conversations";
|
|
789
|
+
}
|
|
790
|
+
|
|
778
791
|
function isOpenAIFamilyToken(token: string): boolean {
|
|
779
792
|
return token.includes("gpt-") || token.includes("chatgpt") || OPENAI_REASONING_MODEL_PATTERN.test(token);
|
|
780
793
|
}
|
|
@@ -1476,7 +1489,9 @@ function isNonEmptyString(value: unknown): boolean {
|
|
|
1476
1489
|
|
|
1477
1490
|
function isOfficialOpenAIBaseUrl(model: PiModel): boolean {
|
|
1478
1491
|
const value = lower(model.baseUrl).trim();
|
|
1479
|
-
if (!value)
|
|
1492
|
+
if (!value) {
|
|
1493
|
+
return lower(model.provider) === "openai";
|
|
1494
|
+
}
|
|
1480
1495
|
|
|
1481
1496
|
try {
|
|
1482
1497
|
return new URL(value).hostname === "api.openai.com";
|
|
@@ -1490,7 +1505,7 @@ function describeMissingOpenAIFamilyProxyCompat(model: PiModel): string[] {
|
|
|
1490
1505
|
const missing: string[] = [];
|
|
1491
1506
|
|
|
1492
1507
|
if (!isOpenAIFamilyModel(model)) return missing;
|
|
1493
|
-
if (
|
|
1508
|
+
if (!isOpenAICompatibleProxyApi(model.api)) return missing;
|
|
1494
1509
|
if (isOfficialOpenAIBaseUrl(model)) return missing;
|
|
1495
1510
|
|
|
1496
1511
|
if (compat.supportsLongCacheRetention !== true) {
|
|
@@ -1513,7 +1528,7 @@ function describeMissingOpenAICompatibleProxyCompat(model: PiModel): string[] {
|
|
|
1513
1528
|
const compat = getCompat(model);
|
|
1514
1529
|
const missing: string[] = [];
|
|
1515
1530
|
|
|
1516
|
-
if (
|
|
1531
|
+
if (!isOpenAICompatibleProxyApi(model.api)) return missing;
|
|
1517
1532
|
if (isOfficialOpenAIBaseUrl(model)) return missing;
|
|
1518
1533
|
|
|
1519
1534
|
if (compat.supportsLongCacheRetention !== true) {
|
|
@@ -1538,7 +1553,76 @@ function getPromptCacheRetentionUnsupportedHint(): string {
|
|
|
1538
1553
|
return "If this channel returns `400 Unsupported parameter: prompt_cache_retention`, remove/avoid `supportsLongCacheRetention`; this extension does not write that field directly, but Pi may send it when long retention is requested and compat says the proxy supports it.";
|
|
1539
1554
|
}
|
|
1540
1555
|
|
|
1541
|
-
function
|
|
1556
|
+
function hasPromptCacheRetentionUnsupportedSignal(headers: Record<string, string> | undefined): boolean {
|
|
1557
|
+
if (!headers) return false;
|
|
1558
|
+
|
|
1559
|
+
const normalized = Object.entries(headers)
|
|
1560
|
+
.map(([key, value]) => `${lower(key)}: ${lower(value)}`)
|
|
1561
|
+
.join("\n");
|
|
1562
|
+
if (!normalized.includes("prompt_cache_retention")) return false;
|
|
1563
|
+
|
|
1564
|
+
return [
|
|
1565
|
+
"unsupported parameter",
|
|
1566
|
+
"unsupported_parameter",
|
|
1567
|
+
"unknown parameter",
|
|
1568
|
+
"not supported",
|
|
1569
|
+
"unsupported field",
|
|
1570
|
+
].some((needle) => normalized.includes(needle));
|
|
1571
|
+
}
|
|
1572
|
+
|
|
1573
|
+
type CompatAdvicePlacement = {
|
|
1574
|
+
providerLabel?: string;
|
|
1575
|
+
modelId?: string;
|
|
1576
|
+
};
|
|
1577
|
+
|
|
1578
|
+
function buildProviderCompatOverride(providerLabel: string, compat: Record<string, unknown>): Record<string, unknown> {
|
|
1579
|
+
return {
|
|
1580
|
+
providers: {
|
|
1581
|
+
[providerLabel]: {
|
|
1582
|
+
compat,
|
|
1583
|
+
},
|
|
1584
|
+
},
|
|
1585
|
+
};
|
|
1586
|
+
}
|
|
1587
|
+
|
|
1588
|
+
function buildModelCompatOverride(providerLabel: string, modelId: string, compat: Record<string, unknown>): Record<string, unknown> {
|
|
1589
|
+
return {
|
|
1590
|
+
providers: {
|
|
1591
|
+
[providerLabel]: {
|
|
1592
|
+
modelOverrides: {
|
|
1593
|
+
[modelId]: {
|
|
1594
|
+
compat,
|
|
1595
|
+
},
|
|
1596
|
+
},
|
|
1597
|
+
},
|
|
1598
|
+
},
|
|
1599
|
+
};
|
|
1600
|
+
}
|
|
1601
|
+
|
|
1602
|
+
function appendCredentialSafeProviderGuidance(lines: string[], placement: CompatAdvicePlacement, compatSuggestion: Record<string, unknown>): void {
|
|
1603
|
+
const providerLabel = placement.providerLabel;
|
|
1604
|
+
if (!providerLabel) return;
|
|
1605
|
+
|
|
1606
|
+
lines.push("");
|
|
1607
|
+
lines.push("If this channel has no models.json provider entry yet:");
|
|
1608
|
+
lines.push("- Keep existing authentication as-is; do not copy credentials, tokens, or API keys.");
|
|
1609
|
+
lines.push(`- Add only cache/routing compat overrides in ${getModelsJsonDisplayPath()}.`);
|
|
1610
|
+
|
|
1611
|
+
if (Object.keys(compatSuggestion).length === 0) {
|
|
1612
|
+
lines.push("- No safe copyable override is available for the missing flags shown above.");
|
|
1613
|
+
return;
|
|
1614
|
+
}
|
|
1615
|
+
|
|
1616
|
+
lines.push("Provider-level minimal override:");
|
|
1617
|
+
lines.push(JSON.stringify(buildProviderCompatOverride(providerLabel, compatSuggestion), null, 2));
|
|
1618
|
+
|
|
1619
|
+
if (placement.modelId) {
|
|
1620
|
+
lines.push("Single-model override (use this if only this model should change):");
|
|
1621
|
+
lines.push(JSON.stringify(buildModelCompatOverride(providerLabel, placement.modelId, compatSuggestion), null, 2));
|
|
1622
|
+
}
|
|
1623
|
+
}
|
|
1624
|
+
|
|
1625
|
+
function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[], options: { includeJsonIntro?: boolean } & CompatAdvicePlacement = {}): void {
|
|
1542
1626
|
const suggestion = buildSafeOpenAIProxyCompatSuggestion(missing);
|
|
1543
1627
|
const hasSafeSuggestion = Object.keys(suggestion).length > 0;
|
|
1544
1628
|
|
|
@@ -1558,6 +1642,8 @@ function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[],
|
|
|
1558
1642
|
lines.push("- supportsLongCacheRetention: optional. Enable only after your endpoint/proxy explicitly supports OpenAI long prompt cache retention.");
|
|
1559
1643
|
lines.push(`- ${getPromptCacheRetentionUnsupportedHint()}`);
|
|
1560
1644
|
}
|
|
1645
|
+
|
|
1646
|
+
appendCredentialSafeProviderGuidance(lines, options, suggestion);
|
|
1561
1647
|
}
|
|
1562
1648
|
|
|
1563
1649
|
/**
|
|
@@ -1577,6 +1663,7 @@ function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): stri
|
|
|
1577
1663
|
// If no slash is found, fall back to the key itself.
|
|
1578
1664
|
const slashIdx = key.indexOf("/");
|
|
1579
1665
|
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
1666
|
+
const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
|
|
1580
1667
|
|
|
1581
1668
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
1582
1669
|
const lines: string[] = [
|
|
@@ -1585,7 +1672,7 @@ function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): stri
|
|
|
1585
1672
|
``,
|
|
1586
1673
|
];
|
|
1587
1674
|
|
|
1588
|
-
appendOpenAIProxyCompatAdviceLines(lines, missing);
|
|
1675
|
+
appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId });
|
|
1589
1676
|
|
|
1590
1677
|
return lines.join("\n");
|
|
1591
1678
|
}
|
|
@@ -1647,7 +1734,7 @@ function buildDeepSeekCompatSuggestion(missing: string[]): Record<string, unknow
|
|
|
1647
1734
|
return suggestion;
|
|
1648
1735
|
}
|
|
1649
1736
|
|
|
1650
|
-
function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[]): void {
|
|
1737
|
+
function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[], placement: CompatAdvicePlacement = {}): void {
|
|
1651
1738
|
const suggestion = buildDeepSeekCompatSuggestion(missing);
|
|
1652
1739
|
if (Object.keys(suggestion).length > 0) {
|
|
1653
1740
|
lines.push("Recommended DeepSeek compat snippet:");
|
|
@@ -1669,11 +1756,14 @@ function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[]): vo
|
|
|
1669
1756
|
if (missing.includes("supportsLongCacheRetention")) {
|
|
1670
1757
|
lines.push("- supportsLongCacheRetention: enable for DeepSeek-compatible endpoints that support long cache retention.");
|
|
1671
1758
|
}
|
|
1759
|
+
|
|
1760
|
+
appendCredentialSafeProviderGuidance(lines, placement, suggestion);
|
|
1672
1761
|
}
|
|
1673
1762
|
|
|
1674
1763
|
function buildDeepSeekCompatWarningText(key: string, missing: string[]): string {
|
|
1675
1764
|
const slashIdx = key.indexOf("/");
|
|
1676
1765
|
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
1766
|
+
const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
|
|
1677
1767
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
1678
1768
|
const lines: string[] = [
|
|
1679
1769
|
`💡 pi-cache-optimizer: ${key} is DeepSeek-like but merged compat lacks ${missing.join(" and ")}.`,
|
|
@@ -1681,7 +1771,7 @@ function buildDeepSeekCompatWarningText(key: string, missing: string[]): string
|
|
|
1681
1771
|
"",
|
|
1682
1772
|
];
|
|
1683
1773
|
|
|
1684
|
-
appendDeepSeekCompatAdviceLines(lines, missing);
|
|
1774
|
+
appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId });
|
|
1685
1775
|
|
|
1686
1776
|
return lines.join("\n");
|
|
1687
1777
|
}
|
|
@@ -3001,6 +3091,29 @@ async function readPersistedCacheStats(): Promise<CacheStatsState | undefined> {
|
|
|
3001
3091
|
return undefined;
|
|
3002
3092
|
}
|
|
3003
3093
|
|
|
3094
|
+
function filterRestorableStatsForSession(
|
|
3095
|
+
persisted: CacheStatsState | undefined,
|
|
3096
|
+
currentSessionHash?: string,
|
|
3097
|
+
): Record<string, CacheStats> {
|
|
3098
|
+
if (!persisted || !currentSessionHash) return {};
|
|
3099
|
+
|
|
3100
|
+
const prefix = `${currentSessionHash}:`;
|
|
3101
|
+
const filteredModelStats: Record<string, CacheStats> = {};
|
|
3102
|
+
for (const [fullKey, stats] of Object.entries(persisted.statsByModel)) {
|
|
3103
|
+
if (fullKey.startsWith(prefix)) {
|
|
3104
|
+
filteredModelStats[fullKey] = stats;
|
|
3105
|
+
} else if (!fullKey.includes(":")) {
|
|
3106
|
+
// Legacy v3-style key without session hash — migrate to current session.
|
|
3107
|
+
filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
|
|
3108
|
+
} else if (fullKey.startsWith("_nosession:")) {
|
|
3109
|
+
// Transitional _nosession bucket — migrate to current session.
|
|
3110
|
+
filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
|
|
3111
|
+
}
|
|
3112
|
+
}
|
|
3113
|
+
|
|
3114
|
+
return filteredModelStats;
|
|
3115
|
+
}
|
|
3116
|
+
|
|
3004
3117
|
/**
|
|
3005
3118
|
* The closure-internal writer. Since the closure has access to currentSessionHash,
|
|
3006
3119
|
* it passes the hash and statsByModel here. This function wraps them in the v4
|
|
@@ -3128,7 +3241,7 @@ async function writePersistedCacheStats(state: CacheStatsState, currentSessionHa
|
|
|
3128
3241
|
|
|
3129
3242
|
|
|
3130
3243
|
function isCompatCheckApplicable(model: PiModel): boolean {
|
|
3131
|
-
return
|
|
3244
|
+
return isOpenAICompatibleProxyApi(model.api) && !isOfficialOpenAIBaseUrl(model);
|
|
3132
3245
|
}
|
|
3133
3246
|
|
|
3134
3247
|
function isPromptCacheRetention400Applicable(model: PiModel): boolean {
|
|
@@ -3163,10 +3276,10 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
|
|
|
3163
3276
|
const baseUrl = lower(model.baseUrl || "");
|
|
3164
3277
|
const provider = lower(model.provider);
|
|
3165
3278
|
|
|
3166
|
-
//
|
|
3167
|
-
//
|
|
3168
|
-
// or
|
|
3169
|
-
if (api
|
|
3279
|
+
// Router/channel diagnostics only apply to OpenAI-compatible proxy APIs.
|
|
3280
|
+
// Native APIs like mistral-conversations, azure-openai-responses,
|
|
3281
|
+
// anthropic-messages, or bedrock-converse-stream are intentionally excluded.
|
|
3282
|
+
if (api === "azure-openai-responses" || isMistralConversationsApi(api) || !isOpenAICompatibleApi(api)) {
|
|
3170
3283
|
return notes;
|
|
3171
3284
|
}
|
|
3172
3285
|
|
|
@@ -3305,6 +3418,33 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
|
|
|
3305
3418
|
return notes;
|
|
3306
3419
|
}
|
|
3307
3420
|
|
|
3421
|
+
function getCompatCheckNotApplicableLines(model: PiModel): string[] {
|
|
3422
|
+
const api = lower(model.api);
|
|
3423
|
+
|
|
3424
|
+
if (isMistralConversationsApi(api)) {
|
|
3425
|
+
return [
|
|
3426
|
+
"ℹ️ Compat check not applicable for this model.",
|
|
3427
|
+
" Native Mistral `mistral-conversations` uses provider-native transport; OpenAI-compatible proxy compat flags do not apply.",
|
|
3428
|
+
];
|
|
3429
|
+
}
|
|
3430
|
+
|
|
3431
|
+
if (api === "azure-openai-responses") {
|
|
3432
|
+
return [
|
|
3433
|
+
"ℹ️ Compat check not applicable for this model.",
|
|
3434
|
+
" Native Azure OpenAI Responses uses the Responses transport; OpenAI-compatible proxy compat flags do not apply.",
|
|
3435
|
+
];
|
|
3436
|
+
}
|
|
3437
|
+
|
|
3438
|
+
if (api === "openai-codex-responses" || (api === "openai-responses" && isOfficialOpenAIBaseUrl(model))) {
|
|
3439
|
+
return [
|
|
3440
|
+
"ℹ️ Compat check not applicable for this model.",
|
|
3441
|
+
" Native Responses transports already use Pi core request handling; OpenAI-compatible proxy compat flags do not apply.",
|
|
3442
|
+
];
|
|
3443
|
+
}
|
|
3444
|
+
|
|
3445
|
+
return ["ℹ️ Compat check not applicable for this model."];
|
|
3446
|
+
}
|
|
3447
|
+
|
|
3308
3448
|
function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400?: boolean } = {}): string {
|
|
3309
3449
|
const lines: string[] = [];
|
|
3310
3450
|
lines.push(`Provider: ${model.provider}`);
|
|
@@ -3326,14 +3466,14 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
|
|
|
3326
3466
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
3327
3467
|
lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (same level as baseUrl/api/apiKey/models).`);
|
|
3328
3468
|
if (deepSeekCompatApplicable) {
|
|
3329
|
-
appendDeepSeekCompatAdviceLines(lines, missing);
|
|
3469
|
+
appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
|
|
3330
3470
|
} else {
|
|
3331
|
-
appendOpenAIProxyCompatAdviceLines(lines, missing);
|
|
3471
|
+
appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
|
|
3332
3472
|
}
|
|
3333
3473
|
} else if (deepSeekCompatApplicable || isCompatCheckApplicable(model)) {
|
|
3334
3474
|
lines.push("✅ Compat fully configured.");
|
|
3335
3475
|
} else {
|
|
3336
|
-
lines.push(
|
|
3476
|
+
lines.push(...getCompatCheckNotApplicableLines(model));
|
|
3337
3477
|
}
|
|
3338
3478
|
|
|
3339
3479
|
if (isPromptCacheRetention400Applicable(model)) {
|
|
@@ -3507,9 +3647,9 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
|
|
|
3507
3647
|
lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
|
|
3508
3648
|
lines.push(`(at the same level as baseUrl/api/apiKey/models).`);
|
|
3509
3649
|
if (deepSeekCompatApplicable) {
|
|
3510
|
-
appendDeepSeekCompatAdviceLines(lines, missing);
|
|
3650
|
+
appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
|
|
3511
3651
|
} else {
|
|
3512
|
-
appendOpenAIProxyCompatAdviceLines(lines, missing);
|
|
3652
|
+
appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
|
|
3513
3653
|
}
|
|
3514
3654
|
}
|
|
3515
3655
|
|
|
@@ -3521,7 +3661,7 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
|
|
|
3521
3661
|
lines.push(getPromptCacheRetentionUnsupportedHint());
|
|
3522
3662
|
}
|
|
3523
3663
|
} else {
|
|
3524
|
-
lines.push(
|
|
3664
|
+
lines.push(...getCompatCheckNotApplicableLines(model));
|
|
3525
3665
|
}
|
|
3526
3666
|
lines.push("");
|
|
3527
3667
|
}
|
|
@@ -3558,6 +3698,9 @@ export const __internals_for_tests = {
|
|
|
3558
3698
|
isNonEmptyString,
|
|
3559
3699
|
shouldInjectOpenAIPromptCacheKey,
|
|
3560
3700
|
isOpenAICompatibleApi,
|
|
3701
|
+
isOpenAICompatibleProxyApi,
|
|
3702
|
+
isResponsesPromptRewriteBypassApi,
|
|
3703
|
+
isMistralConversationsApi,
|
|
3561
3704
|
isOpenAIFamilyModel,
|
|
3562
3705
|
isOpenAIFamilyAssistantMessage,
|
|
3563
3706
|
isOpenAIFamilyToken,
|
|
@@ -3573,6 +3716,7 @@ export const __internals_for_tests = {
|
|
|
3573
3716
|
isOfficialOpenAIBaseUrl,
|
|
3574
3717
|
isCompatCheckApplicable,
|
|
3575
3718
|
isPromptCacheRetention400Applicable,
|
|
3719
|
+
hasPromptCacheRetentionUnsupportedSignal,
|
|
3576
3720
|
// Non-GPT OpenAI-compatible model detection
|
|
3577
3721
|
isKimiLikeModel,
|
|
3578
3722
|
isKimiLikeAssistantMessage,
|
|
@@ -3689,8 +3833,10 @@ export const __internals_for_tests = {
|
|
|
3689
3833
|
getAssistantMessageModelTokenValues,
|
|
3690
3834
|
getCompat,
|
|
3691
3835
|
modelKey,
|
|
3692
|
-
// Platform-friendly path
|
|
3836
|
+
// Platform-friendly path helpers
|
|
3693
3837
|
getModelsJsonDisplayPath,
|
|
3838
|
+
buildProviderCompatOverride,
|
|
3839
|
+
buildModelCompatOverride,
|
|
3694
3840
|
captureCacheRetentionEnv,
|
|
3695
3841
|
requestLongCacheRetention,
|
|
3696
3842
|
restoreCacheRetentionEnv,
|
|
@@ -3727,6 +3873,7 @@ export const __internals_for_tests = {
|
|
|
3727
3873
|
hashSessionId,
|
|
3728
3874
|
makeSessionModelKey,
|
|
3729
3875
|
modelKeyFromSessionKey,
|
|
3876
|
+
filterRestorableStatsForSession,
|
|
3730
3877
|
// Persistence helpers (for reload/reset tests)
|
|
3731
3878
|
mergeCacheSessions,
|
|
3732
3879
|
writePersistedCacheStats,
|
|
@@ -3753,6 +3900,15 @@ export default function (pi: ExtensionAPI) {
|
|
|
3753
3900
|
/** In-memory recent usage samples per model key (not persisted, cleared on reload). */
|
|
3754
3901
|
const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
|
|
3755
3902
|
|
|
3903
|
+
function syncSessionHash(ctx: Pick<ExtensionContext, "sessionManager">): void {
|
|
3904
|
+
const sid = ctx.sessionManager.getSessionId();
|
|
3905
|
+
if (sid && (sid !== currentSessionId || !currentSessionHashSet)) {
|
|
3906
|
+
currentSessionId = sid;
|
|
3907
|
+
currentSessionHash = hashSessionId(sid);
|
|
3908
|
+
currentSessionHashSet = true;
|
|
3909
|
+
}
|
|
3910
|
+
}
|
|
3911
|
+
|
|
3756
3912
|
/**
|
|
3757
3913
|
* Build a session-scoped stats key from the current session hash + model key.
|
|
3758
3914
|
* Returns `${sessionHash}:${provider}/${id}`.
|
|
@@ -3909,13 +4065,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
3909
4065
|
}
|
|
3910
4066
|
|
|
3911
4067
|
async function restoreCacheStats(reason: string, ctx: ExtensionContext): Promise<void> {
|
|
3912
|
-
|
|
3913
|
-
const sid = ctx.sessionManager.getSessionId();
|
|
3914
|
-
if (sid && (sid !== currentSessionId || !currentSessionHashSet)) {
|
|
3915
|
-
currentSessionId = sid;
|
|
3916
|
-
currentSessionHash = hashSessionId(sid);
|
|
3917
|
-
currentSessionHashSet = true;
|
|
3918
|
-
}
|
|
4068
|
+
syncSessionHash(ctx);
|
|
3919
4069
|
|
|
3920
4070
|
if (reason === "reload") {
|
|
3921
4071
|
// /reload: preserve session-scoped stats (same session hash).
|
|
@@ -3927,73 +4077,31 @@ export default function (pi: ExtensionAPI) {
|
|
|
3927
4077
|
clearRecentSamples();
|
|
3928
4078
|
|
|
3929
4079
|
const persisted = await readPersistedCacheStats();
|
|
3930
|
-
|
|
3931
|
-
|
|
3932
|
-
|
|
3933
|
-
|
|
3934
|
-
|
|
3935
|
-
// Current session's data
|
|
3936
|
-
filteredModelStats[fullKey] = stats;
|
|
3937
|
-
} else if (!fullKey.includes(":")) {
|
|
3938
|
-
// Legacy v3-style key without session hash — migrate to current session
|
|
3939
|
-
filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
|
|
3940
|
-
} else if (fullKey.startsWith("_nosession:")) {
|
|
3941
|
-
// _nosession migration remnant from old-path v4 write — migrate to current session
|
|
3942
|
-
filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
|
|
3943
|
-
}
|
|
3944
|
-
}
|
|
3945
|
-
cacheStatsByModel = filteredModelStats;
|
|
3946
|
-
cacheStatsLegacyFamily = persisted.legacyFamily;
|
|
3947
|
-
} else if (persisted) {
|
|
3948
|
-
cacheStatsByModel = persisted.statsByModel;
|
|
3949
|
-
cacheStatsLegacyFamily = persisted.legacyFamily;
|
|
3950
|
-
} else {
|
|
3951
|
-
cacheStatsByModel = {};
|
|
3952
|
-
cacheStatsLegacyFamily = emptyAllCacheStats();
|
|
3953
|
-
}
|
|
4080
|
+
cacheStatsByModel = filterRestorableStatsForSession(
|
|
4081
|
+
persisted,
|
|
4082
|
+
currentSessionHashSet ? currentSessionHash : undefined,
|
|
4083
|
+
);
|
|
4084
|
+
cacheStatsLegacyFamily = persisted?.legacyFamily ?? emptyAllCacheStats();
|
|
3954
4085
|
|
|
3955
4086
|
await rollOverStatsIfNeeded(ctx);
|
|
3956
4087
|
return;
|
|
3957
4088
|
}
|
|
3958
4089
|
|
|
3959
4090
|
// First load / process start: read persisted stats and filter for
|
|
3960
|
-
// this session's entries. If the session
|
|
3961
|
-
//
|
|
4091
|
+
// this session's entries. If the session hash is unavailable, start
|
|
4092
|
+
// fresh instead of loading all persisted session buckets.
|
|
3962
4093
|
const persisted = await readPersistedCacheStats();
|
|
3963
|
-
|
|
3964
|
-
|
|
3965
|
-
|
|
3966
|
-
|
|
3967
|
-
|
|
3968
|
-
// Current session's data — load it.
|
|
3969
|
-
filteredModelStats[fullKey] = stats;
|
|
3970
|
-
} else if (!fullKey.includes(":")) {
|
|
3971
|
-
// Legacy v3-style key without session hash (e.g. "otokapi/gpt-5.5").
|
|
3972
|
-
// Migrate to current session by prefixing with the session hash.
|
|
3973
|
-
filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
|
|
3974
|
-
} else if (fullKey.startsWith("_nosession:")) {
|
|
3975
|
-
// _nosession migration remnant from old-path v4 write — migrate to current session
|
|
3976
|
-
filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
|
|
3977
|
-
}
|
|
3978
|
-
// Other sessions' entries are preserved in the file but not loaded
|
|
3979
|
-
// into memory; they'll be rewritten on next persist.
|
|
3980
|
-
}
|
|
3981
|
-
cacheStatsByModel = filteredModelStats;
|
|
3982
|
-
cacheStatsLegacyFamily = persisted.legacyFamily;
|
|
3983
|
-
} else if (persisted) {
|
|
3984
|
-
// Persisted data exists but no session hash set yet.
|
|
3985
|
-
// This shouldn't normally happen — use the data as-is.
|
|
3986
|
-
cacheStatsByModel = persisted.statsByModel;
|
|
3987
|
-
cacheStatsLegacyFamily = persisted.legacyFamily;
|
|
3988
|
-
} else {
|
|
3989
|
-
cacheStatsByModel = {};
|
|
3990
|
-
cacheStatsLegacyFamily = emptyAllCacheStats();
|
|
3991
|
-
}
|
|
4094
|
+
cacheStatsByModel = filterRestorableStatsForSession(
|
|
4095
|
+
persisted,
|
|
4096
|
+
currentSessionHashSet ? currentSessionHash : undefined,
|
|
4097
|
+
);
|
|
4098
|
+
cacheStatsLegacyFamily = persisted?.legacyFamily ?? emptyAllCacheStats();
|
|
3992
4099
|
lastStatusText = undefined;
|
|
3993
4100
|
await rollOverStatsIfNeeded(ctx);
|
|
3994
4101
|
}
|
|
3995
4102
|
|
|
3996
4103
|
async function publishStatus(ctx: ExtensionContext, model: PiModel | undefined = ctx.model): Promise<void> {
|
|
4104
|
+
syncSessionHash(ctx);
|
|
3997
4105
|
await rollOverStatsIfNeeded(ctx);
|
|
3998
4106
|
|
|
3999
4107
|
const adapter = selectAdapterForModel(model);
|
|
@@ -4065,7 +4173,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
4065
4173
|
|
|
4066
4174
|
pi.on("before_agent_start", async (event, _ctx) => {
|
|
4067
4175
|
// ────────────────────────────────────────────────────────────────
|
|
4068
|
-
// OpenAI Responses
|
|
4176
|
+
// OpenAI Responses-family bypass (codex-responses + responses + azure responses)
|
|
4069
4177
|
//
|
|
4070
4178
|
// OpenAI's Responses API endpoints — both the Codex backend
|
|
4071
4179
|
// (openai-codex-responses, chatgpt.com) and the public
|
|
@@ -4091,11 +4199,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
4091
4199
|
// that use openai-completions are unaffected.
|
|
4092
4200
|
// ────────────────────────────────────────────────────────────────
|
|
4093
4201
|
const model = _ctx.model;
|
|
4094
|
-
if (model) {
|
|
4095
|
-
|
|
4096
|
-
if (api === "openai-codex-responses" || api === "openai-responses") {
|
|
4097
|
-
return {};
|
|
4098
|
-
}
|
|
4202
|
+
if (model && isResponsesPromptRewriteBypassApi(model.api)) {
|
|
4203
|
+
return {};
|
|
4099
4204
|
}
|
|
4100
4205
|
|
|
4101
4206
|
if (!runtimeOptimizerEnabled) return {};
|
|
@@ -4161,6 +4266,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
4161
4266
|
if (!runtimeOptimizerEnabled || !model) return;
|
|
4162
4267
|
if (event.status !== 400) return;
|
|
4163
4268
|
if (!isPromptCacheRetention400Applicable(model)) return;
|
|
4269
|
+
if (!hasPromptCacheRetentionUnsupportedSignal(event.headers)) return;
|
|
4164
4270
|
|
|
4165
4271
|
const key = modelKey(model);
|
|
4166
4272
|
promptCacheRetention400Models.add(key);
|
|
@@ -4175,6 +4281,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
4175
4281
|
});
|
|
4176
4282
|
|
|
4177
4283
|
pi.on("message_end", async (event, ctx) => {
|
|
4284
|
+
syncSessionHash(ctx);
|
|
4178
4285
|
const adapter = selectAdapterForAssistantMessage(event.message, ctx.model);
|
|
4179
4286
|
if (!adapter) return;
|
|
4180
4287
|
|
|
@@ -4221,6 +4328,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
4221
4328
|
pi.registerCommand("cache-optimizer", {
|
|
4222
4329
|
description: "Diagnose Pi cache configuration",
|
|
4223
4330
|
handler: async (args: string, cmdCtx) => {
|
|
4331
|
+
syncSessionHash(cmdCtx);
|
|
4224
4332
|
const model = cmdCtx.model;
|
|
4225
4333
|
const subcommand = args.trim().toLowerCase().split(/\s+/)[0] || "help";
|
|
4226
4334
|
|
|
@@ -4274,7 +4382,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
4274
4382
|
cmdCtx.ui.notify(
|
|
4275
4383
|
isDeepSeekCompatCheckApplicable(model) || isCompatCheckApplicable(model)
|
|
4276
4384
|
? "✅ Compat fully configured."
|
|
4277
|
-
:
|
|
4385
|
+
: getCompatCheckNotApplicableLines(model).join("\n"),
|
|
4278
4386
|
"info",
|
|
4279
4387
|
);
|
|
4280
4388
|
}
|
|
@@ -4372,7 +4480,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
4372
4480
|
cmdCtx.ui.notify(
|
|
4373
4481
|
isDeepSeekCompatCheckApplicable(model) || isCompatCheckApplicable(model)
|
|
4374
4482
|
? "✅ Compat fully configured."
|
|
4375
|
-
:
|
|
4483
|
+
: getCompatCheckNotApplicableLines(model).join("\n"),
|
|
4376
4484
|
"info",
|
|
4377
4485
|
);
|
|
4378
4486
|
}
|
|
@@ -4425,6 +4533,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
4425
4533
|
diagnosis.push(`✅ Active model "${displayKey}": compat fully configured.`);
|
|
4426
4534
|
} else {
|
|
4427
4535
|
diagnosis.push(`ℹ️ Active model "${displayKey}": compat check not applicable.`);
|
|
4536
|
+
const detailLines = getCompatCheckNotApplicableLines(model).slice(1);
|
|
4537
|
+
for (const line of detailLines) diagnosis.push(line);
|
|
4428
4538
|
}
|
|
4429
4539
|
} else {
|
|
4430
4540
|
diagnosis.push("No active model selected.");
|
package/package.json
CHANGED