pi-cache-optimizer 2.6.1 → 2.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +70 -1
- package/README.zh-CN.md +70 -1
- package/index.ts +47 -8
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -29,6 +29,7 @@ Pi extension for improving provider-side KV / prompt cache hit rates. It keeps s
|
|
|
29
29
|
- Requests long cache retention when Pi/provider compat supports it.
|
|
30
30
|
- Adds a session-id `prompt_cache_key` fallback for `openai-completions` / `openai-responses` payloads when no effective key exists.
|
|
31
31
|
- Warns once for third-party OpenAI-compatible proxies missing cache/session-affinity compat flags.
|
|
32
|
+
- Detects Anthropic adaptive thinking models (opus-4.6+, sonnet-4.6+, fable-5+) missing `forceAdaptiveThinking: true` compat.
|
|
32
33
|
- Shows session-scoped footer stats for supported model families.
|
|
33
34
|
|
|
34
35
|
Caching is provider-side and best-effort. Third-party proxies can still hide cache usage, reject unsupported parameters, or route requests across multiple upstreams.
|
|
@@ -58,6 +59,7 @@ Run `/reload` in Pi after install/update/remove so extension hooks refresh.
|
|
|
58
59
|
| `/cache-optimizer compat` | Shows copyable compat advice for the active model, if applicable. |
|
|
59
60
|
| `/cache-optimizer stats` | Shows today's session-scoped counters and recent trend for the active model. |
|
|
60
61
|
| `/cache-optimizer reset` | Resets only local stats for the active session + model; upstream provider cache is not modified. |
|
|
62
|
+
| `/cache-optimizer fix` | Auto-repairs safe compat issues for the active model (adaptive thinking, DeepSeek reasoning, OpenAI proxy session affinity). Shows preview + risk warning, requires confirmation. **Only modifies `models.json` after explicit user approval.** |
|
|
61
63
|
|
|
62
64
|
`enable` / `disable` are current-process switches. For a persistent opt-out, use environment variables below.
|
|
63
65
|
|
|
@@ -99,7 +101,74 @@ Notes:
|
|
|
99
101
|
- If you see `400 Unsupported parameter: prompt_cache_retention`, remove/avoid `supportsLongCacheRetention` for that channel. Keep `sendSessionAffinityHeaders` if supported.
|
|
100
102
|
- Use `/cache-optimizer compat` or `/cache-optimizer doctor` to see model-specific advice.
|
|
101
103
|
- For DeepSeek models, the Pi Mono guidance expects `compat.requiresReasoningContentOnAssistantMessages: true` and `compat.thinkingFormat: "deepseek"` alongside cache/session-affinity flags when the endpoint supports them.
|
|
102
|
-
- This extension only
|
|
104
|
+
- This extension's `doctor` and `compat` commands only advise; they do not modify `models.json`.
|
|
105
|
+
|
|
106
|
+
## Anthropic adaptive thinking models
|
|
107
|
+
|
|
108
|
+
Claude models from opus-4.6 / sonnet-4.6 / fable-5 onwards require `forceAdaptiveThinking: true` in compat. Without it, Pi sends the legacy thinking format and Anthropic rejects the request.
|
|
109
|
+
|
|
110
|
+
Pi's built-in catalog already sets this flag for official models. Custom channels in `models.json` that override these models must include the flag:
|
|
111
|
+
|
|
112
|
+
```json
|
|
113
|
+
{
|
|
114
|
+
"providers": {
|
|
115
|
+
"your-claude-channel": {
|
|
116
|
+
"api": "anthropic-messages",
|
|
117
|
+
"baseUrl": "https://...",
|
|
118
|
+
"apiKey": "env:YOUR_KEY",
|
|
119
|
+
"compat": {
|
|
120
|
+
"forceAdaptiveThinking": true
|
|
121
|
+
},
|
|
122
|
+
"models": [
|
|
123
|
+
{ "id": "claude-opus-4-8", "name": "Claude Opus 4.8" }
|
|
124
|
+
]
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Or use model-level override:
|
|
131
|
+
|
|
132
|
+
```json
|
|
133
|
+
{
|
|
134
|
+
"providers": {
|
|
135
|
+
"your-claude-channel": {
|
|
136
|
+
"modelOverrides": {
|
|
137
|
+
"claude-opus-4-8": {
|
|
138
|
+
"compat": {
|
|
139
|
+
"forceAdaptiveThinking": true
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
`/cache-optimizer doctor` and `/cache-optimizer compat` detect missing flags and show copyable JSON.
|
|
149
|
+
|
|
150
|
+
## Auto-repair with `/cache-optimizer fix`
|
|
151
|
+
|
|
152
|
+
**v2.6.0+** adds a `fix` subcommand that can auto-repair safe compat issues:
|
|
153
|
+
|
|
154
|
+
- Anthropic adaptive thinking (`forceAdaptiveThinking: true`)
|
|
155
|
+
- DeepSeek Pi Mono reasoning compat (`thinkingFormat: "deepseek"`, `requiresReasoningContentOnAssistantMessages: true`)
|
|
156
|
+
- OpenAI-compatible proxy session affinity (`sendSessionAffinityHeaders: true` for `openai-completions`, `sendSessionIdHeader: true` for `openai-responses`)
|
|
157
|
+
|
|
158
|
+
**Scope:** only the currently active model. Other channels require switching models and running `fix` again.
|
|
159
|
+
|
|
160
|
+
**Safety:**
|
|
161
|
+
|
|
162
|
+
1. Shows full preview of changes (file path, edit location, JSON to write, risks)
|
|
163
|
+
2. Warns: ① changes affect all sessions using that channel, ② automatic backup created at `models.json.backup-cache-optimizer-<timestamp>`, ③ Pi reload required
|
|
164
|
+
3. Uses comment-preserving surgical editor — existing comments, indentation, key order preserved
|
|
165
|
+
4. Requires explicit user confirmation (interactive prompt or `ui.select`)
|
|
166
|
+
5. Writes atomically (temp + rename); self-validates after write
|
|
167
|
+
6. Falls back to manual guidance if JSONC scanner cannot confidently locate the target
|
|
168
|
+
|
|
169
|
+
**Non-interactive mode:** refuses to write; shows manual edit guidance instead.
|
|
170
|
+
|
|
171
|
+
**Run:** `/cache-optimizer fix` when the active model has detected compat issues. The command shows "nothing to fix" when compat is already complete.
|
|
103
172
|
|
|
104
173
|
### Channels without a `models.json` provider entry
|
|
105
174
|
|
package/README.zh-CN.md
CHANGED
|
@@ -29,6 +29,7 @@
|
|
|
29
29
|
- 在 Pi / provider compat 支持时请求长缓存保留。
|
|
30
30
|
- 对 `openai-completions` / `openai-responses` 请求,在没有有效 key 时使用 Pi session id 补 `prompt_cache_key`。
|
|
31
31
|
- 对缺少缓存 / session-affinity compat 的第三方 OpenAI-compatible 代理给出一次性提醒。
|
|
32
|
+
- 检测 Anthropic adaptive thinking 模型(opus-4.6+、sonnet-4.6+、fable-5+)是否缺少 `forceAdaptiveThinking: true` compat。
|
|
32
33
|
- 为支持的模型家族显示按 session 隔离的底部缓存统计。
|
|
33
34
|
|
|
34
35
|
缓存是 provider 侧的 best-effort 行为。第三方代理仍可能隐藏缓存 usage、拒绝不支持的参数,或把请求路由到多个上游。
|
|
@@ -58,6 +59,7 @@ pi remove npm:pi-deepseek-cache-optimizer && pi install npm:pi-cache-optimizer
|
|
|
58
59
|
| `/cache-optimizer compat` | 对当前模型显示可复制的 compat 建议(如适用)。 |
|
|
59
60
|
| `/cache-optimizer stats` | 显示当前模型今天的 session-scoped 统计和近期趋势。 |
|
|
60
61
|
| `/cache-optimizer reset` | 只重置当前 session + 当前模型的本地统计;不会修改上游 provider 缓存。 |
|
|
62
|
+
| `/cache-optimizer fix` | 为当前模型自动修复安全的 compat 问题(adaptive thinking、DeepSeek reasoning、OpenAI proxy session affinity)。展示预览 + 风险提示,需要用户确认。**仅在用户明确批准后才修改 `models.json`。** |
|
|
61
63
|
|
|
62
64
|
`enable` / `disable` 是当前进程内开关。若要持久关闭某些能力,请使用下面的环境变量。
|
|
63
65
|
|
|
@@ -99,7 +101,74 @@ LiteLLM / OneAPI / NewAPI / 类 OpenRouter 渠道等第三方 `openai-completion
|
|
|
99
101
|
- 如果出现 `400 Unsupported parameter: prompt_cache_retention`,请为该渠道移除 / 避免 `supportsLongCacheRetention`;如支持,可保留 `sendSessionAffinityHeaders`。
|
|
100
102
|
- 使用 `/cache-optimizer compat` 或 `/cache-optimizer doctor` 查看当前模型的具体建议。
|
|
101
103
|
- 对 DeepSeek 模型,Pi Mono 指南期望在支持时同时设置 `compat.requiresReasoningContentOnAssistantMessages: true` 和 `compat.thinkingFormat: "deepseek"`,再配合缓存 / session-affinity 相关 compat。
|
|
102
|
-
-
|
|
104
|
+
- 本扩展的 `doctor` 和 `compat` 命令只给建议,不会修改 `models.json`。
|
|
105
|
+
|
|
106
|
+
## Anthropic adaptive thinking 模型
|
|
107
|
+
|
|
108
|
+
Claude 从 opus-4.6 / sonnet-4.6 / fable-5 开始需要在 compat 中设置 `forceAdaptiveThinking: true`。缺少此 flag 时,Pi 会发送旧版 thinking 格式,Anthropic 会拒绝请求。
|
|
109
|
+
|
|
110
|
+
Pi 内置 catalog 已为官方模型设置此 flag。`models.json` 中覆盖这些模型的自定义渠道必须包含该 flag:
|
|
111
|
+
|
|
112
|
+
```json
|
|
113
|
+
{
|
|
114
|
+
"providers": {
|
|
115
|
+
"your-claude-channel": {
|
|
116
|
+
"api": "anthropic-messages",
|
|
117
|
+
"baseUrl": "https://...",
|
|
118
|
+
"apiKey": "env:YOUR_KEY",
|
|
119
|
+
"compat": {
|
|
120
|
+
"forceAdaptiveThinking": true
|
|
121
|
+
},
|
|
122
|
+
"models": [
|
|
123
|
+
{ "id": "claude-opus-4-8", "name": "Claude Opus 4.8" }
|
|
124
|
+
]
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
或使用模型级 override:
|
|
131
|
+
|
|
132
|
+
```json
|
|
133
|
+
{
|
|
134
|
+
"providers": {
|
|
135
|
+
"your-claude-channel": {
|
|
136
|
+
"modelOverrides": {
|
|
137
|
+
"claude-opus-4-8": {
|
|
138
|
+
"compat": {
|
|
139
|
+
"forceAdaptiveThinking": true
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
`/cache-optimizer doctor` 和 `/cache-optimizer compat` 会检测缺失的 flag 并显示可复制的 JSON。
|
|
149
|
+
|
|
150
|
+
## 使用 `/cache-optimizer fix` 自动修复
|
|
151
|
+
|
|
152
|
+
**v2.6.0+** 新增 `fix` 子命令,可自动修复安全的 compat 问题:
|
|
153
|
+
|
|
154
|
+
- Anthropic adaptive thinking(`forceAdaptiveThinking: true`)
|
|
155
|
+
- DeepSeek Pi Mono reasoning compat(`thinkingFormat: "deepseek"`、`requiresReasoningContentOnAssistantMessages: true`)
|
|
156
|
+
- OpenAI-compatible proxy session affinity(`openai-completions` 用 `sendSessionAffinityHeaders: true`,`openai-responses` 用 `sendSessionIdHeader: true`)
|
|
157
|
+
|
|
158
|
+
**范围:** 仅当前 active model。其他渠道需切换模型后再次运行 `fix`。
|
|
159
|
+
|
|
160
|
+
**安全机制:**
|
|
161
|
+
|
|
162
|
+
1. 显示完整变更预览(文件路径、编辑位置、要写入的 JSON、风险说明)
|
|
163
|
+
2. 警告:① 修改影响使用该渠道的所有 session,② 自动备份到 `models.json.backup-cache-optimizer-<timestamp>`,③ 需重启 Pi 或 reload
|
|
164
|
+
3. 使用保留注释的精确编辑器 —— 现有注释、缩进、key 顺序全部保留
|
|
165
|
+
4. 需要用户明确确认(交互式提示或 `ui.select`)
|
|
166
|
+
5. 原子写入(temp + rename);写入后自我验证
|
|
167
|
+
6. 如果 JSONC 扫描器无法置信定位目标,回退到手动修改指引
|
|
168
|
+
|
|
169
|
+
**非交互模式:** 拒绝写入,显示手动编辑指引。
|
|
170
|
+
|
|
171
|
+
**运行:** 当 active model 检测到 compat 问题时执行 `/cache-optimizer fix`。compat 已完整时,命令显示"无需修复"。
|
|
103
172
|
|
|
104
173
|
### 没有 `models.json` provider entry 的渠道
|
|
105
174
|
|
package/index.ts
CHANGED
|
@@ -1381,6 +1381,34 @@ function modelKey(model: PiModel): string {
|
|
|
1381
1381
|
return `${model.provider}/${model.id}`;
|
|
1382
1382
|
}
|
|
1383
1383
|
|
|
1384
|
+
function isRouterModel(model: PiModel | undefined): boolean {
|
|
1385
|
+
return lower(model?.provider) === "router";
|
|
1386
|
+
}
|
|
1387
|
+
|
|
1388
|
+
function modelFromAssistantMessage(message: unknown, fallback: PiModel | undefined): PiModel | undefined {
|
|
1389
|
+
const record = getAssistantRecord(message);
|
|
1390
|
+
if (!record) return fallback;
|
|
1391
|
+
|
|
1392
|
+
const id = lower(record.responseModel) || lower(record.model) || fallback?.id;
|
|
1393
|
+
const provider = lower(record.provider) || fallback?.provider;
|
|
1394
|
+
const api = lower(record.api) || fallback?.api;
|
|
1395
|
+
if (!id || !provider || !api) return fallback;
|
|
1396
|
+
|
|
1397
|
+
return {
|
|
1398
|
+
...(fallback ?? {}),
|
|
1399
|
+
id,
|
|
1400
|
+
name: id,
|
|
1401
|
+
provider,
|
|
1402
|
+
api,
|
|
1403
|
+
baseUrl: fallback?.baseUrl ?? "",
|
|
1404
|
+
reasoning: fallback?.reasoning ?? false,
|
|
1405
|
+
input: fallback?.input ?? ["text"],
|
|
1406
|
+
cost: fallback?.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
1407
|
+
contextWindow: fallback?.contextWindow ?? 0,
|
|
1408
|
+
maxTokens: fallback?.maxTokens ?? 0,
|
|
1409
|
+
} as PiModel;
|
|
1410
|
+
}
|
|
1411
|
+
|
|
1384
1412
|
function keyForModelExt(model: { provider: string; id: string }): string {
|
|
1385
1413
|
return `${model.provider}/${model.id}`;
|
|
1386
1414
|
}
|
|
@@ -2835,7 +2863,8 @@ function selectAdapterForModel(model: PiModel | undefined): CacheProviderAdapter
|
|
|
2835
2863
|
}
|
|
2836
2864
|
|
|
2837
2865
|
function selectAdapterForAssistantMessage(message: unknown, model: PiModel | undefined): CacheProviderAdapter | undefined {
|
|
2838
|
-
|
|
2866
|
+
const responseModel = isRouterModel(model) ? modelFromAssistantMessage(message, model) : model;
|
|
2867
|
+
return CACHE_PROVIDER_ADAPTERS.find((adapter) => adapter.matchesAssistantMessage(message, responseModel));
|
|
2839
2868
|
}
|
|
2840
2869
|
|
|
2841
2870
|
function notifyCacheCompatIfNeeded(
|
|
@@ -5141,6 +5170,14 @@ export default function (pi: ExtensionAPI) {
|
|
|
5141
5170
|
|
|
5142
5171
|
const adapter = selectAdapterForModel(model);
|
|
5143
5172
|
let statusText: string | undefined;
|
|
5173
|
+
if (!adapter && isRouterModel(model)) {
|
|
5174
|
+
// router/auto has no stable target family before the first successful
|
|
5175
|
+
// routed response. Keep the existing cache footer visible instead of
|
|
5176
|
+
// clearing it on model_select; message_end will switch to the real
|
|
5177
|
+
// upstream model/provider after pi-router relays the response metadata.
|
|
5178
|
+
return;
|
|
5179
|
+
}
|
|
5180
|
+
|
|
5144
5181
|
if (adapter) {
|
|
5145
5182
|
// Display session-scoped stats. A model that has never been used
|
|
5146
5183
|
// in this session shows 0/0. The message_end hook populates
|
|
@@ -5322,9 +5359,11 @@ export default function (pi: ExtensionAPI) {
|
|
|
5322
5359
|
|
|
5323
5360
|
const usage = adapter.normalizeUsage(event.message);
|
|
5324
5361
|
|
|
5362
|
+
const statsModel = isRouterModel(ctx.model) ? modelFromAssistantMessage(event.message, ctx.model) : ctx.model;
|
|
5363
|
+
|
|
5325
5364
|
// Record recent sample (even when usage is missing, for trend diagnosis)
|
|
5326
|
-
if (
|
|
5327
|
-
const sk = sessionModelKey(
|
|
5365
|
+
if (statsModel) {
|
|
5366
|
+
const sk = sessionModelKey(statsModel);
|
|
5328
5367
|
const missingFields = usage === undefined || (usage.cacheRead === 0 && usage.cacheWrite === 0 && usage.totalInput === 0)
|
|
5329
5368
|
? true
|
|
5330
5369
|
: hasMissingUsageFields(event.message, adapter);
|
|
@@ -5335,17 +5374,17 @@ export default function (pi: ExtensionAPI) {
|
|
|
5335
5374
|
|
|
5336
5375
|
await rollOverStatsIfNeeded(ctx);
|
|
5337
5376
|
|
|
5338
|
-
// Update stats scoped to current session +
|
|
5339
|
-
// Falls back to legacy family when
|
|
5340
|
-
if (
|
|
5341
|
-
const sk = sessionModelKey(
|
|
5377
|
+
// Update stats scoped to current session + actual routed model.
|
|
5378
|
+
// Falls back to legacy family when no model is available.
|
|
5379
|
+
if (statsModel) {
|
|
5380
|
+
const sk = sessionModelKey(statsModel);
|
|
5342
5381
|
addUsageToCacheStats(getOrCreateStatsByModelKey(sk), usage);
|
|
5343
5382
|
} else {
|
|
5344
5383
|
addUsageToCacheStats(getStatsForModel(undefined, adapter), usage);
|
|
5345
5384
|
}
|
|
5346
5385
|
|
|
5347
5386
|
schedulePersistCacheStats(ctx);
|
|
5348
|
-
await publishStatus(ctx);
|
|
5387
|
+
await publishStatus(ctx, statsModel);
|
|
5349
5388
|
});
|
|
5350
5389
|
|
|
5351
5390
|
// ────────────────────────────────────────────────────────────────
|
package/package.json
CHANGED