pi-cache-optimizer 2.4.3 → 2.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -35,6 +35,17 @@ This release keeps the original DeepSeek behavior and adds read-only stats adapt
35
35
  |---|---|---|---|
36
36
  | DeepSeek | Model id/name contains `deepseek` | `DS cache` | Pi `usage.cacheRead`/`usage.input`, or raw `prompt_cache_hit_tokens`, `prompt_cache_miss_tokens`, `prompt_tokens` when visible |
37
37
  | OpenAI-family | Model id/name contains conservative OpenAI-family tokens such as `gpt-`, `chatgpt`, `o1`, `o3`, `o4`, or `o5` | `OpenAI cache` | Pi-normalized usage, or raw `prompt_tokens_details.cached_tokens` / `input_tokens_details.cached_tokens` with prompt/input totals |
38
+ | Kimi / Moonshot | Model id/name contains `kimi` | `Kimi cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
39
+ | Qwen / Alibaba | Model id/name contains `qwen` | `Qwen cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
40
+ | GLM / Zhipu | Model id/name contains `glm` | `GLM cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
41
+ | MiniMax | Model id/name contains `minimax` | `MiniMax cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
42
+ | Hunyuan / Tencent | Model id/name contains `hunyuan` | `Hunyuan cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
43
+ | Mistral | Model id/name contains `mistral`, `mixtral`, or `codestral` | `Mistral cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
44
+ | xAI / Grok | Model id/name contains `grok`, or pattern `xai` with safe boundaries | `Grok cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
45
+ | Meta / Llama | Model id/name contains `llama` | `Llama cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
46
+ | NVIDIA Nemotron | Model id/name contains `nemotron` | `Nemotron cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
47
+ | Cohere / Command | Model id/name contains `cohere` or `command-r` | `Cohere cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
48
+ | Yi / 零一万物 | Model id/name contains `yi-`, `01-ai`, `zero-one`, or pattern `yi` with safe boundaries | `Yi cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
38
49
  | Anthropic / Claude | Model id/name contains `anthropic` or `claude` | `Claude cache` | Pi-normalized usage, or raw `cache_read_input_tokens`, `cache_creation_input_tokens`, `input_tokens` |
39
50
  | Gemini / Vertex | Model id/name contains `gemini` or `vertex` | `Gemini cache` | Pi-normalized usage, or raw Gemini/Vertex cached-content token metadata when visible |
40
51
 
@@ -51,7 +62,7 @@ This extension is pure Node.js — no shell exec, no native bindings, no platfor
51
62
  | Windows | Works through the bash shell Pi requires on Windows (Git Bash, Cygwin, MSYS2, or WSL). See Pi's [Windows setup](https://github.com/earendil-works/pi-coding-agent/blob/main/docs/windows.md). |
52
63
  | Termux / Android | Works inside Pi's Termux setup. |
53
64
 
54
- State files under `~/.pi/agent/` are resolved via Node's `os.homedir()`, so on Windows the path automatically expands to `C:\Users\<you>\.pi\agent\...`. All shell snippets in this README are bash, matching the shell Pi runs in on every supported platform; no PowerShell or `cmd.exe` translation is needed when commands are executed inside (or for) Pi.
65
+ State files under `~/.pi/agent/` are resolved via Node's `os.homedir()`, so on Windows the path automatically expands to `C:\Users\<you>\.pi\agent\...`. The extension's compat warnings, `/cache-optimizer doctor`, and `/cache-optimizer compat` show the platform-appropriate path automatically (`~/.pi/agent/models.json` on Linux/macOS, `%USERPROFILE%\.pi\agent\models.json` on Windows). All shell snippets in this README are bash, matching the shell Pi runs in on every supported platform; no PowerShell or `cmd.exe` translation is needed when commands are executed inside (or for) Pi.
55
66
 
56
67
  ## Quickstart
57
68
 
@@ -115,6 +126,64 @@ rm -f ~/.pi/agent/deepseek-cache-optimizer-stats.json
115
126
 
116
127
 
117
128
 
129
+ ## Adding an OpenAI-compatible proxy channel
130
+
131
+ When adding a third-party OpenAI-compatible proxy provider (e.g. `otokapi`, `cafecode`,
132
+ OpenRouter, etc.) to `~/.pi/agent/models.json`, the `compat` flags for cache optimization
133
+ are NOT required for the model to work — but they dramatically improve cache durability.
134
+
135
+ ### Minimal provider config template
136
+
137
+ ```jsonc
138
+ {
139
+ "providers": {
140
+ "your-provider-id": {
141
+ "api": "openai-completions", // or "openai-responses"
142
+ "baseUrl": "https://your-proxy.example.com/v1",
143
+ "apiKey": "your-api-key",
144
+ "models": {
145
+ "gpt-5.5": {
146
+ "id": "gpt-5.5",
147
+ "name": "GPT 5.5",
148
+ "contextWindowTokens": 128000,
149
+ "maxOutputTokens": 8192,
150
+ "thinking": {
151
+ // Use the thinking modes your proxy actually supports.
152
+ // Pi maps --thinking <level> to tokens via thinkingLevelMap.
153
+ // The template below keeps each level distinct — DO NOT
154
+ // map everything to "xhigh". Your proxy may not support
155
+ // all levels; remove unsupported ones or test each.
156
+ "thinkingLevelMap": {
157
+ "off": null,
158
+ "minimal": "minimal",
159
+ "low": "low",
160
+ "medium": "medium",
161
+ "high": "high",
162
+ "xhigh": "xhigh"
163
+ }
164
+ },
165
+ "compat": {
166
+ "supportsLongCacheRetention": true,
167
+ "sendSessionAffinityHeaders": true
168
+ }
169
+ }
170
+ }
171
+ }
172
+ }
173
+ }
174
+ ```
175
+
176
+ Key points:
177
+
178
+ - `thinkingLevelMap` keeps distinct levels. If your proxy does not support a particular
179
+ level (e.g. `minimal`), remove that entry or set to `null`. Do **not** collapse all
180
+ levels to `"xhigh"` — that defeats user control over reasoning effort.
181
+ - `compat` flags help Pi request longer cache retention and send session-affinity
182
+ headers for proxy-side cache locality. Only enable them if your proxy supports them.
183
+ - The extension detects model families by `id`/`name` strings, not by provider id,
184
+ base URL, or API type. Use recognizable model ids (e.g. `gpt-5.5`, `kimi-k2.5`) for
185
+ correct stats adapter selection.
186
+
118
187
  ## Footer cache stats
119
188
 
120
189
  The Pi footer displays stats for the **active model family** only, for example:
@@ -155,10 +224,18 @@ Reset behavior:
155
224
 
156
225
  For direct DeepSeek or DeepSeek-like OpenAI-compatible proxies, configure the provider or model `compat` like this:
157
226
 
158
- ```json
227
+ The `compat` block goes inside your provider object in `~/.pi/agent/models.json`, at
228
+ the same level as `baseUrl`, `api`, `apiKey`, and `models`:
229
+
230
+ ```jsonc
159
231
  {
160
232
  "providers": {
161
233
  "deepseek": {
234
+ "api": "openai-completions",
235
+ "baseUrl": "https://api.deepseek.com/v1",
236
+ "apiKey": "sk-...",
237
+ "models": { /* ... */ },
238
+ // 👇 compat goes here, NOT inside models
162
239
  "compat": {
163
240
  "thinkingFormat": "deepseek",
164
241
  "supportsLongCacheRetention": true,
@@ -180,6 +257,63 @@ For Claude/Anthropic models behind an OpenAI-compatible endpoint, the extension
180
257
 
181
258
  > Reminder: only enable session-affinity headers or cache-control compat when your endpoint or proxy supports them.
182
259
 
260
+ ## Diagnostic command
261
+
262
+ The extension registers a Pi command `/cache-optimizer` for interactive diagnosis.
263
+
264
+ ```
265
+ /cache-optimizer — interactive menu (or text help when no UI)
266
+ /cache-optimizer doctor — show provider, model, API, base URL, compat status
267
+ /cache-optimizer compat — show compat suggestion with edit instructions
268
+ ```
269
+
270
+ When run without arguments, `/cache-optimizer` shows an interactive selection menu
271
+ (Doctor / Compat / Cancel) when the Pi UI supports it (`ctx.ui.select`). In
272
+ non-interactive terminals, it falls back to text help with current model compat
273
+ status.
274
+
275
+ ### `/cache-optimizer doctor`
276
+
277
+ Displays the active model's provider, model id, name, API type, base URL, current
278
+ `compat` flags, and any missing cache/session-affinity flags. If flags are missing,
279
+ it also shows a copyable JSON snippet and the exact edit location.
280
+
281
+ When all compat flags are present and applicable (third-party `openai-completions`
282
+ proxy), the output shows `✅ Compat fully configured.` For models where the
283
+ compat check does not apply (official OpenAI, non-`openai-completions` APIs,
284
+ custom transports), it shows `ℹ️ Compat check not applicable for this model.`:
285
+
286
+ ```text
287
+ Provider: otokapi
288
+ Model: gpt-5.5
289
+ API: openai-completions
290
+ Base URL: https://otokapi.example.com/v1
291
+ Compat: {}
292
+ ⚠️ Missing compat flags: supportsLongCacheRetention, sendSessionAffinityHeaders
293
+ Edit ~/.pi/agent/models.json -> providers["otokapi"] -> compat (same level as baseUrl/api/apiKey/models):
294
+ {
295
+ "supportsLongCacheRetention": true,
296
+ "sendSessionAffinityHeaders": true
297
+ }
298
+ ```
299
+
300
+ ### `/cache-optimizer compat`
301
+
302
+ Shows only the compat suggestion for the active model, including file path,
303
+ provider path, and copyable JSON snippet. When no flags are missing, it shows
304
+ `✅ Compat fully configured.` if the model is an applicable third-party proxy,
305
+ or `ℹ️ Compat check not applicable for this model.` otherwise.
306
+
307
+ ### Security
308
+
309
+ The command reads only metadata exposed by Pi through `ctx.model`:
310
+ provider, id, name, api, baseUrl, compat. It does NOT read or expose:
311
+ - API keys or environment secrets
312
+ - Request/response payloads
313
+ - Prompts or model outputs
314
+ - HTTP headers
315
+ - Raw `~/.pi/agent/models.json` content
316
+
183
317
  ## How it works
184
318
 
185
319
  Provider caches are usually based on exact or near-exact prefix matching. Pi's system prompt contains stable content that is likely shared across sessions (tools, skills, guidelines) and dynamic content that changes frequently (git status, task context).
package/README.zh-CN.md CHANGED
@@ -38,6 +38,17 @@
38
38
  |---|---|---|---|
39
39
  | DeepSeek | model id/name 包含 `deepseek` | `DS cache` | Pi `usage.cacheRead`/`usage.input`,或可见 raw 字段 `prompt_cache_hit_tokens`、`prompt_cache_miss_tokens`、`prompt_tokens` |
40
40
  | OpenAI-family | model id/name 包含保守 OpenAI-family token,例如 `gpt-`、`chatgpt`、`o1`、`o3`、`o4` 或 `o5` | `OpenAI cache` | Pi 归一化 usage,或可见 raw 字段 `prompt_tokens_details.cached_tokens` / `input_tokens_details.cached_tokens` 及 prompt/input total |
41
+ | Kimi / Moonshot | model id/name 包含 `kimi` | `Kimi cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
42
+ | Qwen / Alibaba | model id/name 包含 `qwen` | `Qwen cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
43
+ | GLM / Zhipu | model id/name 包含 `glm` | `GLM cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
44
+ | MiniMax | model id/name 包含 `minimax` | `MiniMax cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
45
+ | Hunyuan / Tencent | model id/name 包含 `hunyuan` | `Hunyuan cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
46
+ | Mistral | model id/name 包含 `mistral`、`mixtral` 或 `codestral` | `Mistral cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
47
+ | xAI / Grok | model id/name 包含 `grok`,或安全边界内 `xai` 模式 | `Grok cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
48
+ | Meta / Llama | model id/name 包含 `llama` | `Llama cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
49
+ | NVIDIA Nemotron | model id/name 包含 `nemotron` | `Nemotron cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
50
+ | Cohere / Command | model id/name 包含 `cohere` 或 `command-r` | `Cohere cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
51
+ | Yi / 零一万物 | model id/name 包含 `yi-`、`01-ai`、`zero-one`,或安全边界内 `yi` 模式 | `Yi cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
41
52
  | Anthropic / Claude | model id/name 包含 `anthropic` 或 `claude` | `Claude cache` | Pi 归一化 usage,或可见 raw 字段 `cache_read_input_tokens`、`cache_creation_input_tokens`、`input_tokens` |
42
53
  | Gemini / Vertex | model id/name 包含 `gemini` 或 `vertex` | `Gemini cache` | Pi 归一化 usage,或可见 Gemini/Vertex cached-content token metadata |
43
54
 
@@ -54,7 +65,7 @@ Generic OpenAI-compatible 代理**不会**仅因为使用 OpenAI 形状 API 或
54
65
  | Windows | 通过 Pi 在 Windows 下要求的 bash shell 运行(Git Bash、Cygwin、MSYS2 或 WSL)。详见 Pi 的 [Windows setup](https://github.com/earendil-works/pi-coding-agent/blob/main/docs/windows.md)。 |
55
66
  | Termux / Android | 在 Pi 的 Termux 环境中可用。 |
56
67
 
57
- 状态文件 `~/.pi/agent/` 通过 Node 的 `os.homedir()` 解析,所以在 Windows 上会自动展开为 `C:\Users\<你>\.pi\agent\...`。本文档中所有 shell 命令均使用 bash 语法,与 Pi 在每个受支持平台下运行的 shell 一致;只要在 Pi 内(或为 Pi 而执行)运行,就**不需要**改写为 PowerShell 或 `cmd.exe` 形式。
68
+ 状态文件 `~/.pi/agent/` 通过 Node 的 `os.homedir()` 解析,所以在 Windows 上会自动展开为 `C:\Users\<你>\.pi\agent\...`。扩展的 compat 提醒、`/cache-optimizer doctor` 和 `/cache-optimizer compat` 会自动显示适合当前平台的路径(Linux/macOS 上显示 `~/.pi/agent/models.json`,Windows 上显示 `%USERPROFILE%\.pi\agent\models.json`)。本文档中所有 shell 命令均使用 bash 语法,与 Pi 在每个受支持平台下运行的 shell 一致;只要在 Pi 内(或为 Pi 而执行)运行,就**不需要**改写为 PowerShell 或 `cmd.exe` 形式。
58
69
 
59
70
  ## 快速开始
60
71
 
@@ -118,6 +129,56 @@ rm -f ~/.pi/agent/deepseek-cache-optimizer-stats.json
118
129
 
119
130
 
120
131
 
132
+ ## 添加 OpenAI-compatible 代理渠道
133
+
134
+ 当在 `~/.pi/agent/models.json` 中添加第三方 OpenAI-compatible 代理 provider(例如 `otokapi`、`cafecode`、OpenRouter 等)时,缓存优化的 `compat` 标志对模型正常使用不是必需的,但它们能显著提高缓存持久性。
135
+
136
+ ### 最小 provider 配置模板
137
+
138
+ ```jsonc
139
+ {
140
+ "providers": {
141
+ "your-provider-id": {
142
+ "api": "openai-completions", // 或 "openai-responses"
143
+ "baseUrl": "https://your-proxy.example.com/v1",
144
+ "apiKey": "your-api-key",
145
+ "models": {
146
+ "gpt-5.5": {
147
+ "id": "gpt-5.5",
148
+ "name": "GPT 5.5",
149
+ "contextWindowTokens": 128000,
150
+ "maxOutputTokens": 8192,
151
+ "thinking": {
152
+ // 使用你的代理实际支持的 thinking 级别。
153
+ // Pi 通过 thinkingLevelMap 将 --thinking <level> 映射为 token。
154
+ // 下面模板保持各级别独立 —— 不要全部映射为 "xhigh"。
155
+ // 你的代理可能不支持所有级别;移除不支持的或逐个测试。
156
+ "thinkingLevelMap": {
157
+ "off": null,
158
+ "minimal": "minimal",
159
+ "low": "low",
160
+ "medium": "medium",
161
+ "high": "high",
162
+ "xhigh": "xhigh"
163
+ }
164
+ },
165
+ "compat": {
166
+ "supportsLongCacheRetention": true,
167
+ "sendSessionAffinityHeaders": true
168
+ }
169
+ }
170
+ }
171
+ }
172
+ }
173
+ }
174
+ ```
175
+
176
+ 关键点:
177
+
178
+ - `thinkingLevelMap` 保持不同的 level 独立。如果你的代理不支持某个级别(例如 `minimal`),请移除该条目或设为 `null`。**不要**将所有级别都映射为 `"xhigh"` —— 那会破坏用户对推理努力度的控制。
179
+ - `compat` 标志帮助 Pi 请求更长的缓存保留时间,并通过发送 session-affinity headers 实现代理侧缓存本地性。仅在代理支持时才启用。
180
+ - 扩展通过模型 `id`/`name` 字符串来检测模型家族,而不是通过 provider id、base URL 或 API 类型。请使用易识别的模型 id(例如 `gpt-5.5`、`kimi-k2.5`),以便正确匹配统计 adapter。
181
+
121
182
  ## 底部缓存统计
122
183
 
123
184
  Pi footer 只显示**当前活跃模型 family** 的统计,例如:
@@ -156,12 +217,19 @@ Gemini cache 1/2 · 0.18M/0.50M tok (36%)
156
217
 
157
218
  ## 建议的 compat 配置
158
219
 
159
- 对直连 DeepSeek 或 DeepSeek-like OpenAI-compatible 代理,建议在对应 provider 或 model 的 `compat` 中配置:
220
+ 对直连 DeepSeek 或 DeepSeek-like OpenAI-compatible 代理,建议在对应 provider 或 model 的 `compat` 中配置。
221
+
222
+ `compat` 块应该放在 `~/.pi/agent/models.json` 中 provider 对象内部,与 `baseUrl`、`api`、`apiKey`、`models` 同级:
160
223
 
161
- ```json
224
+ ```jsonc
162
225
  {
163
226
  "providers": {
164
227
  "deepseek": {
228
+ "api": "openai-completions",
229
+ "baseUrl": "https://api.deepseek.com/v1",
230
+ "apiKey": "sk-...",
231
+ "models": { /* ... */ },
232
+ // 👇 compat 在此位置,而不是在 models 内部
165
233
  "compat": {
166
234
  "thinkingFormat": "deepseek",
167
235
  "supportsLongCacheRetention": true,
@@ -183,6 +251,51 @@ Gemini cache 1/2 · 0.18M/0.50M tok (36%)
183
251
 
184
252
  > 提醒:只有在 endpoint 或代理明确支持时,才建议启用 session-affinity headers 或 cache-control compat。
185
253
 
254
+ ## 诊断命令
255
+
256
+ 扩展注册了 Pi 命令 `/cache-optimizer` 用于交互式诊断。
257
+
258
+ ```
259
+ /cache-optimizer — 交互菜单(无 UI 时显示文字帮助)
260
+ /cache-optimizer doctor — 显示 provider、model、API、base URL、compat 状态
261
+ /cache-optimizer compat — 显示 compat 建议和编辑说明
262
+ ```
263
+
264
+ 不带参数时,当 Pi UI 支持时(`ctx.ui.select` 可用),`/cache-optimizer` 会显示交互选择菜单(Doctor / Compat / Cancel)。在非交互终端中,会回退到文字帮助和当前模型 compat 状态。
265
+
266
+ ### `/cache-optimizer doctor`
267
+
268
+ 显示当前模型的 provider、model id、名称、API 类型、base URL、当前 `compat` 标志以及缺少的缓存/session-affinity 标志。如果缺少标志,还会显示可复制的 JSON 片段和精确编辑位置。
269
+
270
+ 如果所有 compat 标志都已配置且适用(第三方 `openai-completions` 代理),输出显示 `✅ Compat fully configured.`。对于不适用 compat 检查的模型(官方 OpenAI、非 `openai-completions` API、custom transport),显示 `ℹ️ Compat check not applicable for this model.`:
271
+
272
+ ```text
273
+ Provider: otokapi
274
+ Model: gpt-5.5
275
+ API: openai-completions
276
+ Base URL: https://otokapi.example.com/v1
277
+ Compat: {}
278
+ ⚠️ Missing compat flags: supportsLongCacheRetention, sendSessionAffinityHeaders
279
+ Edit ~/.pi/agent/models.json -> providers["otokapi"] -> compat (same level as baseUrl/api/apiKey/models):
280
+ {
281
+ "supportsLongCacheRetention": true,
282
+ "sendSessionAffinityHeaders": true
283
+ }
284
+ ```
285
+
286
+ ### `/cache-optimizer compat`
287
+
288
+ 仅显示当前模型的 compat 建议,包括文件路径、provider 路径和可复制 JSON 片段。当没有缺失标志时,如果模型是适用的第三方代理则显示 `✅ Compat fully configured.`,否则显示 `ℹ️ Compat check not applicable for this model.`。
289
+
290
+ ### 安全说明
291
+
292
+ 命令只读取 Pi 通过 `ctx.model` 暴露的元数据:provider、id、name、api、baseUrl、compat。它**不会**读取或暴露:
293
+ - API key 或环境密钥
294
+ - 请求/响应 payload
295
+ - Prompt 或模型输出
296
+ - HTTP headers
297
+ - `~/.pi/agent/models.json` 的原始内容
298
+
186
299
  ## 原理
187
300
 
188
301
  Provider 缓存通常依赖精确或近似精确的前缀匹配。Pi 的 system prompt 包含跨会话稳定的内容(工具定义、技能、规范),也包含每次变化的动态内容(git status、当前任务)。
package/index.ts CHANGED
@@ -48,6 +48,16 @@ const NO_PROMPT_REWRITE_ENV = "PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE";
48
48
  // persisted metrics.
49
49
  let promptTruncationDetected = false;
50
50
 
51
+ // Timestamp (ms) of the most recent integrity truncation event.
52
+ // Used by /cache-optimizer doctor to surface recovery guidance.
53
+ // Reset to 0 on reload.
54
+ let lastPromptIntegrityWarningAt = 0;
55
+
56
+ /** Getter for lastPromptIntegrityWarningAt (exported for tests via __internals_for_tests). */
57
+ function getLastPromptIntegrityWarningAt(): number {
58
+ return lastPromptIntegrityWarningAt;
59
+ }
60
+
51
61
  // Minimum count of skills before compression is worth applying.
52
62
  // Below this, pi's verbose XML block is small enough that the overhead of
53
63
  // an additional one-line index isn't worth the loss of per-skill
@@ -80,6 +90,7 @@ const MIN_STABLE_CANDIDATE_LENGTH = 8;
80
90
 
81
91
  const ASSISTANT_MESSAGE_MODEL_TOKEN_KEYS = ["model", "name"];
82
92
  const OPENAI_REASONING_MODEL_PATTERN = /(^|[/\s:_-])o[1345]($|[-_.:/\s])/;
93
+ const XAI_MODEL_PATTERN = /(^|[/\s:_-])xai($|[-_.:/\s])/;
83
94
 
84
95
  type CacheCompat = {
85
96
  sendSessionAffinityHeaders?: boolean;
@@ -547,6 +558,26 @@ function getCompat(model: PiModel | undefined): CacheCompat {
547
558
  return (model?.compat ?? {}) as CacheCompat;
548
559
  }
549
560
 
561
+ /**
562
+ * Return a platform-friendly display path for `~/.pi/agent/models.json`.
563
+ *
564
+ * On Windows (platform starts with "win") the path is shown as
565
+ * `%USERPROFILE%\.pi\agent\models.json` to match Windows conventions.
566
+ * On all other platforms (Linux, macOS, etc.) it is shown as
567
+ * `~/.pi/agent/models.json` (the Unix-style tilde shorthand).
568
+ *
569
+ * This is a DISPLAY helper only. Actual path resolution is done by Pi
570
+ * (via Node `os.homedir()` + path.join), and this string is never used
571
+ * for I/O — only for warning/doctor/README text so that users on any
572
+ * platform see a copyable path they recognize.
573
+ */
574
+ function getModelsJsonDisplayPath(platform: string = process.platform): string {
575
+ if (platform.startsWith("win")) {
576
+ return `%USERPROFILE%\\.pi\\agent\\models.json`;
577
+ }
578
+ return "~/.pi/agent/models.json";
579
+ }
580
+
550
581
  function isEnabledEnv(value: string | undefined): boolean {
551
582
  if (!value) return false;
552
583
  const normalized = value.trim().toLowerCase();
@@ -672,6 +703,62 @@ function isHunyuanLikeAssistantMessage(message: unknown, model: PiModel | undefi
672
703
  return modelOrAssistantMessageHas(message, model, ["hunyuan"]);
673
704
  }
674
705
 
706
+ // ── Additional OpenAI-compatible model detection ──────────────────
707
+
708
+ function isMistralLikeModel(model: PiModel | undefined): boolean {
709
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["mistral", "mixtral", "codestral"]);
710
+ }
711
+ function isMistralLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
712
+ return modelOrAssistantMessageHas(message, model, ["mistral", "mixtral", "codestral"]);
713
+ }
714
+
715
+ function isGrokLikeModel(model: PiModel | undefined): boolean {
716
+ const tokens = getModelIdNameTokenValues(model);
717
+ return hasAnyTokenContaining(tokens, ["grok"]) || tokens.some((t) => XAI_MODEL_PATTERN.test(t));
718
+ }
719
+ function isGrokLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
720
+ const allTokens = [
721
+ ...getModelIdNameTokenValues(model),
722
+ ...getAssistantMessageModelTokenValues(message),
723
+ ];
724
+ return hasAnyTokenContaining(allTokens, ["grok"]) || allTokens.some((t) => XAI_MODEL_PATTERN.test(t));
725
+ }
726
+
727
+ function isLlamaLikeModel(model: PiModel | undefined): boolean {
728
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["llama"]);
729
+ }
730
+ function isLlamaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
731
+ return modelOrAssistantMessageHas(message, model, ["llama"]);
732
+ }
733
+
734
+ function isNemotronLikeModel(model: PiModel | undefined): boolean {
735
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["nemotron"]);
736
+ }
737
+ function isNemotronLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
738
+ return modelOrAssistantMessageHas(message, model, ["nemotron"]);
739
+ }
740
+
741
+ function isCohereLikeModel(model: PiModel | undefined): boolean {
742
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["cohere", "command-r"]);
743
+ }
744
+ function isCohereLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
745
+ return modelOrAssistantMessageHas(message, model, ["cohere", "command-r"]);
746
+ }
747
+
748
+ const YI_MODEL_PATTERN = /(^|[\/\s:_-])yi($|[\-_.:\/\s])/;
749
+
750
+ function isYiLikeModel(model: PiModel | undefined): boolean {
751
+ const tokens = getModelIdNameTokenValues(model);
752
+ return hasAnyTokenContaining(tokens, ["yi-", "01-ai", "zero-one"]) || tokens.some((t) => YI_MODEL_PATTERN.test(t));
753
+ }
754
+ function isYiLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
755
+ const allTokens = [
756
+ ...getModelIdNameTokenValues(model),
757
+ ...getAssistantMessageModelTokenValues(message),
758
+ ];
759
+ return hasAnyTokenContaining(allTokens, ["yi-", "01-ai", "zero-one"]) || allTokens.some((t) => YI_MODEL_PATTERN.test(t));
760
+ }
761
+
675
762
  // ── Model key ──────────────────────────────────────────────────────
676
763
 
677
764
  function modelKey(model: PiModel): string {
@@ -937,9 +1024,15 @@ function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): stri
937
1024
  suggestion[flag] = true;
938
1025
  }
939
1026
 
1027
+ // Extract provider id from the model key (e.g. "otokapi/gpt-5.5" -> "otokapi").
1028
+ // If no slash is found, fall back to the key itself.
1029
+ const slashIdx = key.indexOf("/");
1030
+ const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
1031
+
1032
+ const modelsJsonPath = getModelsJsonDisplayPath();
940
1033
  const lines: string[] = [
941
1034
  `💡 pi-cache-optimizer: ${key} is a third-party GPT/OpenAI-compatible proxy but merged compat lacks ${missing.join(" and ")}.`,
942
- `Add under the model's compat in ~/.pi/agent/models.json (only if the endpoint supports them):`,
1035
+ `Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (at the same level as baseUrl/api/apiKey/models):`,
943
1036
  ``,
944
1037
  JSON.stringify(suggestion, null, 2),
945
1038
  ``,
@@ -993,9 +1086,12 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
993
1086
  if (missing.length === 0) return undefined;
994
1087
 
995
1088
  const key = modelKey(model);
1089
+ const slashIdx = key.indexOf("/");
1090
+ const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
1091
+ const modelsJsonPath = getModelsJsonDisplayPath();
996
1092
  return (
997
1093
  `💡 pi-cache-optimizer: ${key} is DeepSeek-like but merged compat lacks ${missing.join(" and ")}. ` +
998
- "Proxies may reduce or hide cache hits; add these compat flags in ~/.pi/agent/models.json when the endpoint supports them."
1094
+ `Proxies may reduce or hide cache hits. Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (at the same level as baseUrl/api/apiKey/models).`
999
1095
  );
1000
1096
  },
1001
1097
  },
@@ -1136,6 +1232,109 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
1136
1232
  return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
1137
1233
  },
1138
1234
  },
1235
+ // ── More OpenAI-compatible adapters ──────────────────────────
1236
+ {
1237
+ id: "openai" as CacheProviderId,
1238
+ label: "Mistral cache",
1239
+ matchesModel: isMistralLikeModel,
1240
+ matchesAssistantMessage(message, model) {
1241
+ if (!isAssistantMessage(message)) return false;
1242
+ return isMistralLikeAssistantMessage(message, model);
1243
+ },
1244
+ normalizeUsage(message) {
1245
+ return normalizeWithFallback(message, getOpenAIRawUsage);
1246
+ },
1247
+ warningText(model) {
1248
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
1249
+ if (missing.length === 0) return undefined;
1250
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
1251
+ },
1252
+ },
1253
+ {
1254
+ id: "openai" as CacheProviderId,
1255
+ label: "Grok cache",
1256
+ matchesModel: isGrokLikeModel,
1257
+ matchesAssistantMessage(message, model) {
1258
+ if (!isAssistantMessage(message)) return false;
1259
+ return isGrokLikeAssistantMessage(message, model);
1260
+ },
1261
+ normalizeUsage(message) {
1262
+ return normalizeWithFallback(message, getOpenAIRawUsage);
1263
+ },
1264
+ warningText(model) {
1265
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
1266
+ if (missing.length === 0) return undefined;
1267
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
1268
+ },
1269
+ },
1270
+ {
1271
+ id: "openai" as CacheProviderId,
1272
+ label: "Llama cache",
1273
+ matchesModel: isLlamaLikeModel,
1274
+ matchesAssistantMessage(message, model) {
1275
+ if (!isAssistantMessage(message)) return false;
1276
+ return isLlamaLikeAssistantMessage(message, model);
1277
+ },
1278
+ normalizeUsage(message) {
1279
+ return normalizeWithFallback(message, getOpenAIRawUsage);
1280
+ },
1281
+ warningText(model) {
1282
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
1283
+ if (missing.length === 0) return undefined;
1284
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
1285
+ },
1286
+ },
1287
+ {
1288
+ id: "openai" as CacheProviderId,
1289
+ label: "Nemotron cache",
1290
+ matchesModel: isNemotronLikeModel,
1291
+ matchesAssistantMessage(message, model) {
1292
+ if (!isAssistantMessage(message)) return false;
1293
+ return isNemotronLikeAssistantMessage(message, model);
1294
+ },
1295
+ normalizeUsage(message) {
1296
+ return normalizeWithFallback(message, getOpenAIRawUsage);
1297
+ },
1298
+ warningText(model) {
1299
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
1300
+ if (missing.length === 0) return undefined;
1301
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
1302
+ },
1303
+ },
1304
+ {
1305
+ id: "openai" as CacheProviderId,
1306
+ label: "Cohere cache",
1307
+ matchesModel: isCohereLikeModel,
1308
+ matchesAssistantMessage(message, model) {
1309
+ if (!isAssistantMessage(message)) return false;
1310
+ return isCohereLikeAssistantMessage(message, model);
1311
+ },
1312
+ normalizeUsage(message) {
1313
+ return normalizeWithFallback(message, getOpenAIRawUsage);
1314
+ },
1315
+ warningText(model) {
1316
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
1317
+ if (missing.length === 0) return undefined;
1318
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
1319
+ },
1320
+ },
1321
+ {
1322
+ id: "openai" as CacheProviderId,
1323
+ label: "Yi cache",
1324
+ matchesModel: isYiLikeModel,
1325
+ matchesAssistantMessage(message, model) {
1326
+ if (!isAssistantMessage(message)) return false;
1327
+ return isYiLikeAssistantMessage(message, model);
1328
+ },
1329
+ normalizeUsage(message) {
1330
+ return normalizeWithFallback(message, getOpenAIRawUsage);
1331
+ },
1332
+ warningText(model) {
1333
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
1334
+ if (missing.length === 0) return undefined;
1335
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
1336
+ },
1337
+ },
1139
1338
  ];
1140
1339
 
1141
1340
  function selectAdapterForModel(model: PiModel | undefined): CacheProviderAdapter | undefined {
@@ -1359,6 +1558,76 @@ async function writePersistedCacheStats(state: CacheStatsState): Promise<void> {
1359
1558
 
1360
1559
 
1361
1560
 
1561
+ function isCompatCheckApplicable(model: PiModel): boolean {
1562
+ return lower(model.api) === "openai-completions" && !isOfficialOpenAIBaseUrl(model);
1563
+ }
1564
+
1565
+ function buildDoctorDiagnosis(model: PiModel): string {
1566
+ const lines: string[] = [];
1567
+ lines.push(`Provider: ${model.provider}`);
1568
+ lines.push(`Model: ${model.id}`);
1569
+ if (model.name && model.name !== model.id) lines.push(`Name: ${model.name}`);
1570
+ lines.push(`API: ${model.api}`);
1571
+ lines.push(`Base URL: ${model.baseUrl || "(default)"}`);
1572
+
1573
+ const compat = getCompat(model);
1574
+ lines.push(`Compat: ${JSON.stringify(compat)}`);
1575
+
1576
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
1577
+ if (missing.length > 0) {
1578
+ lines.push(`⚠️ Missing compat flags: ${missing.join(", ")}`);
1579
+ const key = modelKey(model);
1580
+ const slashIdx = key.indexOf("/");
1581
+ const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
1582
+ const suggestion = Object.fromEntries(missing.map((f) => [f, true]));
1583
+ const modelsJsonPath = getModelsJsonDisplayPath();
1584
+ lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (same level as baseUrl/api/apiKey/models):`);
1585
+ lines.push(JSON.stringify(suggestion, null, 2));
1586
+ } else if (isCompatCheckApplicable(model)) {
1587
+ lines.push("✅ Compat fully configured.");
1588
+ } else {
1589
+ lines.push("ℹ️ Compat check not applicable for this model.");
1590
+ }
1591
+
1592
+ // ── Integrity diagnostics ──
1593
+ if (lastPromptIntegrityWarningAt > 0) {
1594
+ const ago = Date.now() - lastPromptIntegrityWarningAt;
1595
+ const mins = Math.floor(ago / 60000);
1596
+ if (mins < 5) {
1597
+ lines.push("");
1598
+ lines.push("⚠️ Recent prompt integrity issue detected:");
1599
+ lines.push(` Last detected ${mins > 0 ? `${mins} min` : `${Math.floor(ago / 1000)}s`} ago. The prompt reorder was`);
1600
+ lines.push(` skipped on that turn to preserve structural markers.`);
1601
+ lines.push(` Common causes: extension system prompt format change, substring collision.`);
1602
+ lines.push(` Steps:`);
1603
+ lines.push(` 1. Run /reload to reset (may clear transient issues).`);
1604
+ lines.push(` 2. Set PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 & /reload to disable reorder.`);
1605
+ lines.push(` 3. If persistent, file an issue with this doctor output.`);
1606
+ }
1607
+ }
1608
+
1609
+ return lines.join("\n");
1610
+ }
1611
+
1612
+ function buildCompatDiagnosis(model: PiModel): string | undefined {
1613
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
1614
+ if (missing.length === 0) return undefined;
1615
+
1616
+ const key = modelKey(model);
1617
+ const slashIdx = key.indexOf("/");
1618
+ const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
1619
+ const suggestion = Object.fromEntries(missing.map((f) => [f, true]));
1620
+ const modelsJsonPath = getModelsJsonDisplayPath();
1621
+ return (
1622
+ `Active model: ${key}\n` +
1623
+ `Missing: ${missing.join(", ")}\n\n` +
1624
+ `Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat` +
1625
+ ` (at the same level as baseUrl/api/apiKey/models) and add:\n` +
1626
+ `${JSON.stringify(suggestion, null, 2)}\n\n` +
1627
+ `Only enable if your endpoint supports them.`
1628
+ );
1629
+ }
1630
+
1362
1631
  // Internal helpers exported only so the task verification script
1363
1632
  // (.trellis/tasks/.../verify.ts) can exercise them. They are not part of the
1364
1633
  // extension's public API; pi only invokes the default export below.
@@ -1398,11 +1667,32 @@ export const __internals_for_tests = {
1398
1667
  isMiniMaxLikeAssistantMessage,
1399
1668
  isHunyuanLikeModel,
1400
1669
  isHunyuanLikeAssistantMessage,
1670
+ // Additional OpenAI-compatible model detection
1671
+ isMistralLikeModel,
1672
+ isMistralLikeAssistantMessage,
1673
+ isGrokLikeModel,
1674
+ isGrokLikeAssistantMessage,
1675
+ isLlamaLikeModel,
1676
+ isLlamaLikeAssistantMessage,
1677
+ isNemotronLikeModel,
1678
+ isNemotronLikeAssistantMessage,
1679
+ isCohereLikeModel,
1680
+ isCohereLikeAssistantMessage,
1681
+ isYiLikeModel,
1682
+ isYiLikeAssistantMessage,
1401
1683
  buildOpenAIProxyCompatWarningText,
1402
1684
  getModelIdNameTokenValues,
1403
1685
  getAssistantMessageModelTokenValues,
1404
1686
  getCompat,
1405
1687
  modelKey,
1688
+ // Platform-friendly path helper
1689
+ getModelsJsonDisplayPath,
1690
+ // Integrity diagnostics
1691
+ getLastPromptIntegrityWarningAt,
1692
+ // Diagnostic command helpers
1693
+ isCompatCheckApplicable,
1694
+ buildDoctorDiagnosis,
1695
+ buildCompatDiagnosis,
1406
1696
  // Cache stats helpers (module-level, usable from verify script)
1407
1697
  addUsageToCacheStats,
1408
1698
  formatCacheStats,
@@ -1419,8 +1709,10 @@ export default function (pi: ExtensionAPI) {
1419
1709
  let lastStatusText: string | undefined;
1420
1710
  let persistenceWarningShown = false;
1421
1711
  let persistTimer: ReturnType<typeof setTimeout> | null = null;
1712
+ let integrityNotificationShown = false;
1422
1713
  const PERSIST_DEBOUNCE_MS = 2000;
1423
1714
 
1715
+
1424
1716
  function getCacheStatsState(): CacheStatsState {
1425
1717
  return { statsByModel: cacheStatsByModel, legacyFamily: cacheStatsLegacyFamily };
1426
1718
  }
@@ -1525,6 +1817,9 @@ export default function (pi: ExtensionAPI) {
1525
1817
  cacheStatsByModel = {};
1526
1818
  cacheStatsLegacyFamily = emptyAllCacheStats();
1527
1819
  lastStatusText = undefined;
1820
+ // Reset integrity diagnostics on reload
1821
+ lastPromptIntegrityWarningAt = 0;
1822
+ integrityNotificationShown = false;
1528
1823
  await flushPersistCacheStats(ctx);
1529
1824
  return;
1530
1825
  }
@@ -1564,6 +1859,35 @@ export default function (pi: ExtensionAPI) {
1564
1859
  if (promptTruncationDetected && statusText !== undefined) {
1565
1860
  statusText = statusText + " ⚠️ integrity";
1566
1861
  promptTruncationDetected = false;
1862
+ lastPromptIntegrityWarningAt = Date.now();
1863
+
1864
+ // One-time notification with recovery steps (per session).
1865
+ if (!integrityNotificationShown) {
1866
+ integrityNotificationShown = true;
1867
+ ctx.ui.notify(
1868
+ `⚠️ ${LOG_PREFIX}: A prompt structural marker was lost during reorder on this turn. ` +
1869
+ `The original prompt was used instead to preserve integrity.\n\n` +
1870
+ `Recovery steps:\n` +
1871
+ `1. Run /reload to reset (may clear transient issues).\n` +
1872
+ `2. Set PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 and /reload to disable reorder.\n` +
1873
+ `3. If persistent, run /cache-optimizer doctor and file an issue (no API keys/prompts).`,
1874
+ "warning",
1875
+ );
1876
+ }
1877
+ }
1878
+
1879
+ // ⚠️ compat footer marker: if the active model is a non-official
1880
+ // openai-completions model with missing supportsLongCacheRetention
1881
+ // or sendSessionAffinityHeaders, append the marker to indicate that
1882
+ // compat configuration is incomplete. Re-evaluated on every status
1883
+ // update so the marker persists through stats changes and day
1884
+ // rollovers. Redundant setStatus calls are blocked by the
1885
+ // `lastStatusText` early return above.
1886
+ if (statusText !== undefined && model) {
1887
+ const compatMissing = describeMissingOpenAICompatibleProxyCompat(model);
1888
+ if (compatMissing.length > 0) {
1889
+ statusText = statusText + " ⚠️ compat";
1890
+ }
1567
1891
  }
1568
1892
 
1569
1893
  if (statusText === lastStatusText) return;
@@ -1695,4 +2019,99 @@ export default function (pi: ExtensionAPI) {
1695
2019
  schedulePersistCacheStats(ctx);
1696
2020
  await publishStatus(ctx);
1697
2021
  });
2022
+
2023
+ // ────────────────────────────────────────────────────────────────
2024
+ // Register /cache-optimizer command
2025
+ // Subcommands:
2026
+ // doctor — show current model/provider/api/baseUrl/compat status
2027
+ // compat — show compat suggestion with file path
2028
+ // (no args) — show help summary + current diagnosis
2029
+ // ────────────────────────────────────────────────────────────────
2030
+ pi.registerCommand("cache-optimizer", {
2031
+ description: "Diagnose Pi cache configuration",
2032
+ handler: async (args: string, cmdCtx) => {
2033
+ const model = cmdCtx.model;
2034
+ const subcommand = args.trim().toLowerCase().split(/\s+/)[0] || "help";
2035
+
2036
+ if (subcommand === "doctor") {
2037
+ if (!model) {
2038
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
2039
+ return;
2040
+ }
2041
+ cmdCtx.ui.notify(buildDoctorDiagnosis(model), "info");
2042
+ } else if (subcommand === "compat") {
2043
+ if (!model) {
2044
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
2045
+ return;
2046
+ }
2047
+ const compatResult = buildCompatDiagnosis(model);
2048
+ if (compatResult) {
2049
+ cmdCtx.ui.notify(compatResult, "warning");
2050
+ } else {
2051
+ cmdCtx.ui.notify(
2052
+ isCompatCheckApplicable(model)
2053
+ ? "✅ Compat fully configured."
2054
+ : "ℹ️ Compat check not applicable for this model.",
2055
+ "info",
2056
+ );
2057
+ }
2058
+ } else {
2059
+ // Try interactive selection menu when UI supports it
2060
+ if (cmdCtx.hasUI) {
2061
+ const menuOptions = [
2062
+ "🩺 Doctor — Show current model cache configuration",
2063
+ "⚙️ Compat — Show compat suggestion with edit instructions",
2064
+ "❌ Cancel",
2065
+ ];
2066
+ const choice = await cmdCtx.ui.select("Cache Optimizer", menuOptions);
2067
+ if (choice === menuOptions[0]) {
2068
+ if (!model) {
2069
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
2070
+ } else {
2071
+ cmdCtx.ui.notify(buildDoctorDiagnosis(model), "info");
2072
+ }
2073
+ } else if (choice === menuOptions[1]) {
2074
+ if (!model) {
2075
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
2076
+ } else {
2077
+ const compatResult = buildCompatDiagnosis(model);
2078
+ if (compatResult) {
2079
+ cmdCtx.ui.notify(compatResult, "warning");
2080
+ } else {
2081
+ cmdCtx.ui.notify(
2082
+ isCompatCheckApplicable(model)
2083
+ ? "✅ Compat fully configured."
2084
+ : "ℹ️ Compat check not applicable for this model.",
2085
+ "info",
2086
+ );
2087
+ }
2088
+ }
2089
+ }
2090
+ // choice === "cancel" or undefined → no action
2091
+ return;
2092
+ }
2093
+
2094
+ // Fallback: text help when no interactive UI
2095
+ const diagnosis: string[] = [];
2096
+ diagnosis.push("📋 /cache-optimizer commands:");
2097
+ diagnosis.push(" doctor — Show current model/provider/api/baseUrl/compat status");
2098
+ diagnosis.push(" compat — Show compat suggestion with edit location");
2099
+ diagnosis.push("");
2100
+ if (model) {
2101
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2102
+ if (missing.length > 0) {
2103
+ diagnosis.push(`⚠️ Active model "${modelKey(model)}" missing compat: ${missing.join(", ")}`);
2104
+ diagnosis.push('Run "/cache-optimizer compat" for edit instructions.');
2105
+ } else if (isCompatCheckApplicable(model)) {
2106
+ diagnosis.push(`✅ Active model "${modelKey(model)}": compat fully configured.`);
2107
+ } else {
2108
+ diagnosis.push(`ℹ️ Active model "${modelKey(model)}": compat check not applicable.`);
2109
+ }
2110
+ } else {
2111
+ diagnosis.push("No active model selected.");
2112
+ }
2113
+ cmdCtx.ui.notify(diagnosis.join("\n"), "info");
2114
+ }
2115
+ },
2116
+ });
1698
2117
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-cache-optimizer",
3
- "version": "2.4.3",
3
+ "version": "2.4.5",
4
4
  "description": "Pi extension that improves provider-side KV/prompt cache hit rates (DeepSeek, OpenAI, Claude, Gemini) by reordering the system prompt, requesting long retention, and showing footer cache stats. Renamed from pi-deepseek-cache-optimizer.",
5
5
  "keywords": [
6
6
  "pi-package",