pi-cache-optimizer 2.6.1 → 2.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -29,6 +29,7 @@ Pi extension for improving provider-side KV / prompt cache hit rates. It keeps s
29
29
  - Requests long cache retention when Pi/provider compat supports it.
30
30
  - Adds a session-id `prompt_cache_key` fallback for `openai-completions` / `openai-responses` payloads when no effective key exists.
31
31
  - Warns once for third-party OpenAI-compatible proxies missing cache/session-affinity compat flags.
32
+ - Detects Anthropic adaptive thinking models (opus-4.6+, sonnet-4.6+, fable-5+) missing `forceAdaptiveThinking: true` compat.
32
33
  - Shows session-scoped footer stats for supported model families.
33
34
 
34
35
  Caching is provider-side and best-effort. Third-party proxies can still hide cache usage, reject unsupported parameters, or route requests across multiple upstreams.
@@ -58,6 +59,7 @@ Run `/reload` in Pi after install/update/remove so extension hooks refresh.
58
59
  | `/cache-optimizer compat` | Shows copyable compat advice for the active model, if applicable. |
59
60
  | `/cache-optimizer stats` | Shows today's session-scoped counters and recent trend for the active model. |
60
61
  | `/cache-optimizer reset` | Resets only local stats for the active session + model; upstream provider cache is not modified. |
62
+ | `/cache-optimizer fix` | Auto-repairs safe compat issues for the active model (adaptive thinking, DeepSeek reasoning, OpenAI proxy session affinity). Shows preview + risk warning, requires confirmation. **Only modifies `models.json` after explicit user approval.** |
61
63
 
62
64
  `enable` / `disable` are current-process switches. For a persistent opt-out, use environment variables below.
63
65
 
@@ -99,7 +101,74 @@ Notes:
99
101
  - If you see `400 Unsupported parameter: prompt_cache_retention`, remove/avoid `supportsLongCacheRetention` for that channel. Keep `sendSessionAffinityHeaders` if supported.
100
102
  - Use `/cache-optimizer compat` or `/cache-optimizer doctor` to see model-specific advice.
101
103
  - For DeepSeek models, the Pi Mono guidance expects `compat.requiresReasoningContentOnAssistantMessages: true` and `compat.thinkingFormat: "deepseek"` alongside cache/session-affinity flags when the endpoint supports them.
102
- - This extension only advises; it does not edit `models.json`.
104
+ - This extension's `doctor` and `compat` commands only advise; they do not modify `models.json`.
105
+
106
+ ## Anthropic adaptive thinking models
107
+
108
+ Claude models from opus-4.6 / sonnet-4.6 / fable-5 onwards require `forceAdaptiveThinking: true` in compat. Without it, Pi sends the legacy thinking format and Anthropic rejects the request.
109
+
110
+ Pi's built-in catalog already sets this flag for official models. Custom channels in `models.json` that override these models must include the flag:
111
+
112
+ ```json
113
+ {
114
+ "providers": {
115
+ "your-claude-channel": {
116
+ "api": "anthropic-messages",
117
+ "baseUrl": "https://...",
118
+ "apiKey": "env:YOUR_KEY",
119
+ "compat": {
120
+ "forceAdaptiveThinking": true
121
+ },
122
+ "models": [
123
+ { "id": "claude-opus-4-8", "name": "Claude Opus 4.8" }
124
+ ]
125
+ }
126
+ }
127
+ }
128
+ ```
129
+
130
+ Or use model-level override:
131
+
132
+ ```json
133
+ {
134
+ "providers": {
135
+ "your-claude-channel": {
136
+ "modelOverrides": {
137
+ "claude-opus-4-8": {
138
+ "compat": {
139
+ "forceAdaptiveThinking": true
140
+ }
141
+ }
142
+ }
143
+ }
144
+ }
145
+ }
146
+ ```
147
+
148
+ `/cache-optimizer doctor` and `/cache-optimizer compat` detect missing flags and show copyable JSON.
149
+
150
+ ## Auto-repair with `/cache-optimizer fix`
151
+
152
+ **v2.6.0+** adds a `fix` subcommand that can auto-repair safe compat issues:
153
+
154
+ - Anthropic adaptive thinking (`forceAdaptiveThinking: true`)
155
+ - DeepSeek Pi Mono reasoning compat (`thinkingFormat: "deepseek"`, `requiresReasoningContentOnAssistantMessages: true`)
156
+ - OpenAI-compatible proxy session affinity (`sendSessionAffinityHeaders: true` for `openai-completions`, `sendSessionIdHeader: true` for `openai-responses`)
157
+
158
+ **Scope:** only the currently active model. Other channels require switching models and running `fix` again.
159
+
160
+ **Safety:**
161
+
162
+ 1. Shows full preview of changes (file path, edit location, JSON to write, risks)
163
+ 2. Warns: ① changes affect all sessions using that channel, ② automatic backup created at `models.json.backup-cache-optimizer-<timestamp>`, ③ Pi reload required
164
+ 3. Uses comment-preserving surgical editor — existing comments, indentation, key order preserved
165
+ 4. Requires explicit user confirmation (interactive prompt or `ui.select`)
166
+ 5. Writes atomically (temp + rename); self-validates after write
167
+ 6. Falls back to manual guidance if JSONC scanner cannot confidently locate the target
168
+
169
+ **Non-interactive mode:** refuses to write; shows manual edit guidance instead.
170
+
171
+ **Run:** `/cache-optimizer fix` when the active model has detected compat issues. The command shows "nothing to fix" when compat is already complete.
103
172
 
104
173
  ### Channels without a `models.json` provider entry
105
174
 
package/README.zh-CN.md CHANGED
@@ -29,6 +29,7 @@
29
29
  - 在 Pi / provider compat 支持时请求长缓存保留。
30
30
  - 对 `openai-completions` / `openai-responses` 请求,在没有有效 key 时使用 Pi session id 补 `prompt_cache_key`。
31
31
  - 对缺少缓存 / session-affinity compat 的第三方 OpenAI-compatible 代理给出一次性提醒。
32
+ - 检测 Anthropic adaptive thinking 模型(opus-4.6+、sonnet-4.6+、fable-5+)是否缺少 `forceAdaptiveThinking: true` compat。
32
33
  - 为支持的模型家族显示按 session 隔离的底部缓存统计。
33
34
 
34
35
  缓存是 provider 侧的 best-effort 行为。第三方代理仍可能隐藏缓存 usage、拒绝不支持的参数,或把请求路由到多个上游。
@@ -58,6 +59,7 @@ pi remove npm:pi-deepseek-cache-optimizer && pi install npm:pi-cache-optimizer
58
59
  | `/cache-optimizer compat` | 对当前模型显示可复制的 compat 建议(如适用)。 |
59
60
  | `/cache-optimizer stats` | 显示当前模型今天的 session-scoped 统计和近期趋势。 |
60
61
  | `/cache-optimizer reset` | 只重置当前 session + 当前模型的本地统计;不会修改上游 provider 缓存。 |
62
+ | `/cache-optimizer fix` | 为当前模型自动修复安全的 compat 问题(adaptive thinking、DeepSeek reasoning、OpenAI proxy session affinity)。展示预览 + 风险提示,需要用户确认。**仅在用户明确批准后才修改 `models.json`。** |
61
63
 
62
64
  `enable` / `disable` 是当前进程内开关。若要持久关闭某些能力,请使用下面的环境变量。
63
65
 
@@ -99,7 +101,74 @@ LiteLLM / OneAPI / NewAPI / 类 OpenRouter 渠道等第三方 `openai-completion
99
101
  - 如果出现 `400 Unsupported parameter: prompt_cache_retention`,请为该渠道移除 / 避免 `supportsLongCacheRetention`;如支持,可保留 `sendSessionAffinityHeaders`。
100
102
  - 使用 `/cache-optimizer compat` 或 `/cache-optimizer doctor` 查看当前模型的具体建议。
101
103
  - 对 DeepSeek 模型,Pi Mono 指南期望在支持时同时设置 `compat.requiresReasoningContentOnAssistantMessages: true` 和 `compat.thinkingFormat: "deepseek"`,再配合缓存 / session-affinity 相关 compat。
102
- - 本扩展只给建议,不会修改 `models.json`。
104
+ - 本扩展的 `doctor` 和 `compat` 命令只给建议,不会修改 `models.json`。
105
+
106
+ ## Anthropic adaptive thinking 模型
107
+
108
+ Claude 从 opus-4.6 / sonnet-4.6 / fable-5 开始需要在 compat 中设置 `forceAdaptiveThinking: true`。缺少此 flag 时,Pi 会发送旧版 thinking 格式,Anthropic 会拒绝请求。
109
+
110
+ Pi 内置 catalog 已为官方模型设置此 flag。`models.json` 中覆盖这些模型的自定义渠道必须包含该 flag:
111
+
112
+ ```json
113
+ {
114
+ "providers": {
115
+ "your-claude-channel": {
116
+ "api": "anthropic-messages",
117
+ "baseUrl": "https://...",
118
+ "apiKey": "env:YOUR_KEY",
119
+ "compat": {
120
+ "forceAdaptiveThinking": true
121
+ },
122
+ "models": [
123
+ { "id": "claude-opus-4-8", "name": "Claude Opus 4.8" }
124
+ ]
125
+ }
126
+ }
127
+ }
128
+ ```
129
+
130
+ 或使用模型级 override:
131
+
132
+ ```json
133
+ {
134
+ "providers": {
135
+ "your-claude-channel": {
136
+ "modelOverrides": {
137
+ "claude-opus-4-8": {
138
+ "compat": {
139
+ "forceAdaptiveThinking": true
140
+ }
141
+ }
142
+ }
143
+ }
144
+ }
145
+ }
146
+ ```
147
+
148
+ `/cache-optimizer doctor` 和 `/cache-optimizer compat` 会检测缺失的 flag 并显示可复制的 JSON。
149
+
150
+ ## 使用 `/cache-optimizer fix` 自动修复
151
+
152
+ **v2.6.0+** 新增 `fix` 子命令,可自动修复安全的 compat 问题:
153
+
154
+ - Anthropic adaptive thinking(`forceAdaptiveThinking: true`)
155
+ - DeepSeek Pi Mono reasoning compat(`thinkingFormat: "deepseek"`、`requiresReasoningContentOnAssistantMessages: true`)
156
+ - OpenAI-compatible proxy session affinity(`openai-completions` 用 `sendSessionAffinityHeaders: true`,`openai-responses` 用 `sendSessionIdHeader: true`)
157
+
158
+ **范围:** 仅当前 active model。其他渠道需切换模型后再次运行 `fix`。
159
+
160
+ **安全机制:**
161
+
162
+ 1. 显示完整变更预览(文件路径、编辑位置、要写入的 JSON、风险说明)
163
+ 2. 警告:① 修改影响使用该渠道的所有 session,② 自动备份到 `models.json.backup-cache-optimizer-<timestamp>`,③ 需重启 Pi 或 reload
164
+ 3. 使用保留注释的精确编辑器 —— 现有注释、缩进、key 顺序全部保留
165
+ 4. 需要用户明确确认(交互式提示或 `ui.select`)
166
+ 5. 原子写入(temp + rename);写入后自我验证
167
+ 6. 如果 JSONC 扫描器无法置信定位目标,回退到手动修改指引
168
+
169
+ **非交互模式:** 拒绝写入,显示手动编辑指引。
170
+
171
+ **运行:** 当 active model 检测到 compat 问题时执行 `/cache-optimizer fix`。compat 已完整时,命令显示"无需修复"。
103
172
 
104
173
  ### 没有 `models.json` provider entry 的渠道
105
174
 
package/index.ts CHANGED
@@ -1381,6 +1381,34 @@ function modelKey(model: PiModel): string {
1381
1381
  return `${model.provider}/${model.id}`;
1382
1382
  }
1383
1383
 
1384
+ function isRouterModel(model: PiModel | undefined): boolean {
1385
+ return lower(model?.provider) === "router";
1386
+ }
1387
+
1388
+ function modelFromAssistantMessage(message: unknown, fallback: PiModel | undefined): PiModel | undefined {
1389
+ const record = getAssistantRecord(message);
1390
+ if (!record) return fallback;
1391
+
1392
+ const id = lower(record.responseModel) || lower(record.model) || fallback?.id;
1393
+ const provider = lower(record.provider) || fallback?.provider;
1394
+ const api = lower(record.api) || fallback?.api;
1395
+ if (!id || !provider || !api) return fallback;
1396
+
1397
+ return {
1398
+ ...(fallback ?? {}),
1399
+ id,
1400
+ name: id,
1401
+ provider,
1402
+ api,
1403
+ baseUrl: fallback?.baseUrl ?? "",
1404
+ reasoning: fallback?.reasoning ?? false,
1405
+ input: fallback?.input ?? ["text"],
1406
+ cost: fallback?.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
1407
+ contextWindow: fallback?.contextWindow ?? 0,
1408
+ maxTokens: fallback?.maxTokens ?? 0,
1409
+ } as PiModel;
1410
+ }
1411
+
1384
1412
  function keyForModelExt(model: { provider: string; id: string }): string {
1385
1413
  return `${model.provider}/${model.id}`;
1386
1414
  }
@@ -2835,7 +2863,8 @@ function selectAdapterForModel(model: PiModel | undefined): CacheProviderAdapter
2835
2863
  }
2836
2864
 
2837
2865
  function selectAdapterForAssistantMessage(message: unknown, model: PiModel | undefined): CacheProviderAdapter | undefined {
2838
- return CACHE_PROVIDER_ADAPTERS.find((adapter) => adapter.matchesAssistantMessage(message, model));
2866
+ const responseModel = isRouterModel(model) ? modelFromAssistantMessage(message, model) : model;
2867
+ return CACHE_PROVIDER_ADAPTERS.find((adapter) => adapter.matchesAssistantMessage(message, responseModel));
2839
2868
  }
2840
2869
 
2841
2870
  function notifyCacheCompatIfNeeded(
@@ -5141,6 +5170,14 @@ export default function (pi: ExtensionAPI) {
5141
5170
 
5142
5171
  const adapter = selectAdapterForModel(model);
5143
5172
  let statusText: string | undefined;
5173
+ if (!adapter && isRouterModel(model)) {
5174
+ // router/auto has no stable target family before the first successful
5175
+ // routed response. Keep the existing cache footer visible instead of
5176
+ // clearing it on model_select; message_end will switch to the real
5177
+ // upstream model/provider after pi-router relays the response metadata.
5178
+ return;
5179
+ }
5180
+
5144
5181
  if (adapter) {
5145
5182
  // Display session-scoped stats. A model that has never been used
5146
5183
  // in this session shows 0/0. The message_end hook populates
@@ -5322,9 +5359,11 @@ export default function (pi: ExtensionAPI) {
5322
5359
 
5323
5360
  const usage = adapter.normalizeUsage(event.message);
5324
5361
 
5362
+ const statsModel = isRouterModel(ctx.model) ? modelFromAssistantMessage(event.message, ctx.model) : ctx.model;
5363
+
5325
5364
  // Record recent sample (even when usage is missing, for trend diagnosis)
5326
- if (ctx.model) {
5327
- const sk = sessionModelKey(ctx.model);
5365
+ if (statsModel) {
5366
+ const sk = sessionModelKey(statsModel);
5328
5367
  const missingFields = usage === undefined || (usage.cacheRead === 0 && usage.cacheWrite === 0 && usage.totalInput === 0)
5329
5368
  ? true
5330
5369
  : hasMissingUsageFields(event.message, adapter);
@@ -5335,17 +5374,17 @@ export default function (pi: ExtensionAPI) {
5335
5374
 
5336
5375
  await rollOverStatsIfNeeded(ctx);
5337
5376
 
5338
- // Update stats scoped to current session + active model.
5339
- // Falls back to legacy family when ctx.model is undefined.
5340
- if (ctx.model) {
5341
- const sk = sessionModelKey(ctx.model);
5377
+ // Update stats scoped to current session + actual routed model.
5378
+ // Falls back to legacy family when no model is available.
5379
+ if (statsModel) {
5380
+ const sk = sessionModelKey(statsModel);
5342
5381
  addUsageToCacheStats(getOrCreateStatsByModelKey(sk), usage);
5343
5382
  } else {
5344
5383
  addUsageToCacheStats(getStatsForModel(undefined, adapter), usage);
5345
5384
  }
5346
5385
 
5347
5386
  schedulePersistCacheStats(ctx);
5348
- await publishStatus(ctx);
5387
+ await publishStatus(ctx, statsModel);
5349
5388
  });
5350
5389
 
5351
5390
  // ────────────────────────────────────────────────────────────────
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-cache-optimizer",
3
- "version": "2.6.1",
3
+ "version": "2.6.3",
4
4
  "description": "Improve Pi prompt/KV cache hit rates with stable prompts, OpenAI-compatible cache keys, proxy compat warnings, and footer cache stats.",
5
5
  "keywords": [
6
6
  "pi-package",