pi-cache-optimizer 2.4.4 → 2.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +135 -2
- package/README.zh-CN.md +115 -3
- package/index.ts +528 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -46,6 +46,16 @@ This release keeps the original DeepSeek behavior and adds read-only stats adapt
|
|
|
46
46
|
| NVIDIA Nemotron | Model id/name contains `nemotron` | `Nemotron cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
47
47
|
| Cohere / Command | Model id/name contains `cohere` or `command-r` | `Cohere cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
48
48
|
| Yi / 零一万物 | Model id/name contains `yi-`, `01-ai`, `zero-one`, or pattern `yi` with safe boundaries | `Yi cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
49
|
+
| Doubao / ByteDance / Seed | Model id/name contains `doubao`, `豆包`, `volcengine`, `bytedance`, `byte-dance`, or pattern `seed` with safe boundaries | `Doubao cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
50
|
+
| Baidu ERNIE / Wenxin | Model id/name contains `ernie`, `wenxin`, `文心`, `yiyan`, `一言`, or `baidu` | `ERNIE cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
51
|
+
| Baichuan / 百川 | Model id/name contains `baichuan` or `百川` | `Baichuan cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
52
|
+
| StepFun / 阶跃星辰 | Model id/name contains `stepfun` or `step-` prefix | `StepFun cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
53
|
+
| iFlytek Spark / 讯飞星火 | Model id/name contains `spark`, `xinghuo`, `星火`, `iflytek`, or `讯飞` | `Spark cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
54
|
+
| InternLM / 书生 | Model id/name contains `internlm`, `intern-lm`, or `书生` | `InternLM cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
55
|
+
| Google Gemma | Model id/name contains `gemma` | `Gemma cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
56
|
+
| Microsoft Phi | Model id/name contains `phi-` prefix, or pattern `phi` with safe boundaries | `Phi cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
57
|
+
| AI21 Jamba | Model id/name contains `jamba` or `ai21` | `Jamba cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
58
|
+
| Upstage Solar | Model id/name contains `solar` or `upstage` | `Solar cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
49
59
|
| Anthropic / Claude | Model id/name contains `anthropic` or `claude` | `Claude cache` | Pi-normalized usage, or raw `cache_read_input_tokens`, `cache_creation_input_tokens`, `input_tokens` |
|
|
50
60
|
| Gemini / Vertex | Model id/name contains `gemini` or `vertex` | `Gemini cache` | Pi-normalized usage, or raw Gemini/Vertex cached-content token metadata when visible |
|
|
51
61
|
|
|
@@ -62,7 +72,7 @@ This extension is pure Node.js — no shell exec, no native bindings, no platfor
|
|
|
62
72
|
| Windows | Works through the bash shell Pi requires on Windows (Git Bash, Cygwin, MSYS2, or WSL). See Pi's [Windows setup](https://github.com/earendil-works/pi-coding-agent/blob/main/docs/windows.md). |
|
|
63
73
|
| Termux / Android | Works inside Pi's Termux setup. |
|
|
64
74
|
|
|
65
|
-
State files under `~/.pi/agent/` are resolved via Node's `os.homedir()`, so on Windows the path automatically expands to `C:\Users\<you>\.pi\agent\...`. All shell snippets in this README are bash, matching the shell Pi runs in on every supported platform; no PowerShell or `cmd.exe` translation is needed when commands are executed inside (or for) Pi.
|
|
75
|
+
State files under `~/.pi/agent/` are resolved via Node's `os.homedir()`, so on Windows the path automatically expands to `C:\Users\<you>\.pi\agent\...`. The extension's compat warnings, `/cache-optimizer doctor`, and `/cache-optimizer compat` show the platform-appropriate path automatically (`~/.pi/agent/models.json` on Linux/macOS, `%USERPROFILE%\.pi\agent\models.json` on Windows). All shell snippets in this README are bash, matching the shell Pi runs in on every supported platform; no PowerShell or `cmd.exe` translation is needed when commands are executed inside (or for) Pi.
|
|
66
76
|
|
|
67
77
|
## Quickstart
|
|
68
78
|
|
|
@@ -126,6 +136,64 @@ rm -f ~/.pi/agent/deepseek-cache-optimizer-stats.json
|
|
|
126
136
|
|
|
127
137
|
|
|
128
138
|
|
|
139
|
+
## Adding an OpenAI-compatible proxy channel
|
|
140
|
+
|
|
141
|
+
When adding a third-party OpenAI-compatible proxy provider (e.g. `otokapi`, `cafecode`,
|
|
142
|
+
OpenRouter, etc.) to `~/.pi/agent/models.json`, the `compat` flags for cache optimization
|
|
143
|
+
are NOT required for the model to work — but they dramatically improve cache durability.
|
|
144
|
+
|
|
145
|
+
### Minimal provider config template
|
|
146
|
+
|
|
147
|
+
```jsonc
|
|
148
|
+
{
|
|
149
|
+
"providers": {
|
|
150
|
+
"your-provider-id": {
|
|
151
|
+
"api": "openai-completions", // or "openai-responses"
|
|
152
|
+
"baseUrl": "https://your-proxy.example.com/v1",
|
|
153
|
+
"apiKey": "your-api-key",
|
|
154
|
+
"models": {
|
|
155
|
+
"gpt-5.5": {
|
|
156
|
+
"id": "gpt-5.5",
|
|
157
|
+
"name": "GPT 5.5",
|
|
158
|
+
"contextWindowTokens": 128000,
|
|
159
|
+
"maxOutputTokens": 8192,
|
|
160
|
+
"thinking": {
|
|
161
|
+
// Use the thinking modes your proxy actually supports.
|
|
162
|
+
// Pi maps --thinking <level> to tokens via thinkingLevelMap.
|
|
163
|
+
// The template below keeps each level distinct — DO NOT
|
|
164
|
+
// map everything to "xhigh". Your proxy may not support
|
|
165
|
+
// all levels; remove unsupported ones or test each.
|
|
166
|
+
"thinkingLevelMap": {
|
|
167
|
+
"off": null,
|
|
168
|
+
"minimal": "minimal",
|
|
169
|
+
"low": "low",
|
|
170
|
+
"medium": "medium",
|
|
171
|
+
"high": "high",
|
|
172
|
+
"xhigh": "xhigh"
|
|
173
|
+
}
|
|
174
|
+
},
|
|
175
|
+
"compat": {
|
|
176
|
+
"supportsLongCacheRetention": true,
|
|
177
|
+
"sendSessionAffinityHeaders": true
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
Key points:
|
|
187
|
+
|
|
188
|
+
- `thinkingLevelMap` keeps distinct levels. If your proxy does not support a particular
|
|
189
|
+
level (e.g. `minimal`), remove that entry or set to `null`. Do **not** collapse all
|
|
190
|
+
levels to `"xhigh"` — that defeats user control over reasoning effort.
|
|
191
|
+
- `compat` flags help Pi request longer cache retention and send session-affinity
|
|
192
|
+
headers for proxy-side cache locality. Only enable them if your proxy supports them.
|
|
193
|
+
- The extension detects model families by `id`/`name` strings, not by provider id,
|
|
194
|
+
base URL, or API type. Use recognizable model ids (e.g. `gpt-5.5`, `kimi-k2.5`) for
|
|
195
|
+
correct stats adapter selection.
|
|
196
|
+
|
|
129
197
|
## Footer cache stats
|
|
130
198
|
|
|
131
199
|
The Pi footer displays stats for the **active model family** only, for example:
|
|
@@ -166,10 +234,18 @@ Reset behavior:
|
|
|
166
234
|
|
|
167
235
|
For direct DeepSeek or DeepSeek-like OpenAI-compatible proxies, configure the provider or model `compat` like this:
|
|
168
236
|
|
|
169
|
-
|
|
237
|
+
The `compat` block goes inside your provider object in `~/.pi/agent/models.json`, at
|
|
238
|
+
the same level as `baseUrl`, `api`, `apiKey`, and `models`:
|
|
239
|
+
|
|
240
|
+
```jsonc
|
|
170
241
|
{
|
|
171
242
|
"providers": {
|
|
172
243
|
"deepseek": {
|
|
244
|
+
"api": "openai-completions",
|
|
245
|
+
"baseUrl": "https://api.deepseek.com/v1",
|
|
246
|
+
"apiKey": "sk-...",
|
|
247
|
+
"models": { /* ... */ },
|
|
248
|
+
// 👇 compat goes here, NOT inside models
|
|
173
249
|
"compat": {
|
|
174
250
|
"thinkingFormat": "deepseek",
|
|
175
251
|
"supportsLongCacheRetention": true,
|
|
@@ -191,6 +267,63 @@ For Claude/Anthropic models behind an OpenAI-compatible endpoint, the extension
|
|
|
191
267
|
|
|
192
268
|
> Reminder: only enable session-affinity headers or cache-control compat when your endpoint or proxy supports them.
|
|
193
269
|
|
|
270
|
+
## Diagnostic command
|
|
271
|
+
|
|
272
|
+
The extension registers a Pi command `/cache-optimizer` for interactive diagnosis.
|
|
273
|
+
|
|
274
|
+
```
|
|
275
|
+
/cache-optimizer — interactive menu (or text help when no UI)
|
|
276
|
+
/cache-optimizer doctor — show provider, model, API, base URL, compat status
|
|
277
|
+
/cache-optimizer compat — show compat suggestion with edit instructions
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
When run without arguments, `/cache-optimizer` shows an interactive selection menu
|
|
281
|
+
(Doctor / Compat / Cancel) when the Pi UI supports it (`ctx.ui.select`). In
|
|
282
|
+
non-interactive terminals, it falls back to text help with current model compat
|
|
283
|
+
status.
|
|
284
|
+
|
|
285
|
+
### `/cache-optimizer doctor`
|
|
286
|
+
|
|
287
|
+
Displays the active model's provider, model id, name, API type, base URL, current
|
|
288
|
+
`compat` flags, and any missing cache/session-affinity flags. If flags are missing,
|
|
289
|
+
it also shows a copyable JSON snippet and the exact edit location.
|
|
290
|
+
|
|
291
|
+
When all compat flags are present and applicable (third-party `openai-completions`
|
|
292
|
+
proxy), the output shows `✅ Compat fully configured.` For models where the
|
|
293
|
+
compat check does not apply (official OpenAI, non-`openai-completions` APIs,
|
|
294
|
+
custom transports), it shows `ℹ️ Compat check not applicable for this model.`:
|
|
295
|
+
|
|
296
|
+
```text
|
|
297
|
+
Provider: otokapi
|
|
298
|
+
Model: gpt-5.5
|
|
299
|
+
API: openai-completions
|
|
300
|
+
Base URL: https://otokapi.example.com/v1
|
|
301
|
+
Compat: {}
|
|
302
|
+
⚠️ Missing compat flags: supportsLongCacheRetention, sendSessionAffinityHeaders
|
|
303
|
+
Edit ~/.pi/agent/models.json -> providers["otokapi"] -> compat (same level as baseUrl/api/apiKey/models):
|
|
304
|
+
{
|
|
305
|
+
"supportsLongCacheRetention": true,
|
|
306
|
+
"sendSessionAffinityHeaders": true
|
|
307
|
+
}
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
### `/cache-optimizer compat`
|
|
311
|
+
|
|
312
|
+
Shows only the compat suggestion for the active model, including file path,
|
|
313
|
+
provider path, and copyable JSON snippet. When no flags are missing, it shows
|
|
314
|
+
`✅ Compat fully configured.` if the model is an applicable third-party proxy,
|
|
315
|
+
or `ℹ️ Compat check not applicable for this model.` otherwise.
|
|
316
|
+
|
|
317
|
+
### Security
|
|
318
|
+
|
|
319
|
+
The command reads only metadata exposed by Pi through `ctx.model`:
|
|
320
|
+
provider, id, name, api, baseUrl, compat. It does NOT read or expose:
|
|
321
|
+
- API keys or environment secrets
|
|
322
|
+
- Request/response payloads
|
|
323
|
+
- Prompts or model outputs
|
|
324
|
+
- HTTP headers
|
|
325
|
+
- Raw `~/.pi/agent/models.json` content
|
|
326
|
+
|
|
194
327
|
## How it works
|
|
195
328
|
|
|
196
329
|
Provider caches are usually based on exact or near-exact prefix matching. Pi's system prompt contains stable content that is likely shared across sessions (tools, skills, guidelines) and dynamic content that changes frequently (git status, task context).
|
package/README.zh-CN.md
CHANGED
|
@@ -49,6 +49,16 @@
|
|
|
49
49
|
| NVIDIA Nemotron | model id/name 包含 `nemotron` | `Nemotron cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
50
50
|
| Cohere / Command | model id/name 包含 `cohere` 或 `command-r` | `Cohere cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
51
51
|
| Yi / 零一万物 | model id/name 包含 `yi-`、`01-ai`、`zero-one`,或安全边界内 `yi` 模式 | `Yi cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
52
|
+
| Doubao / ByteDance / Seed | model id/name 包含 `doubao`、`豆包`、`volcengine`、`bytedance`、`byte-dance`,或安全边界内 `seed` 模式 | `Doubao cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
53
|
+
| Baidu ERNIE / 文心一言 | model id/name 包含 `ernie`、`wenxin`、`文心`、`yiyan`、`一言` 或 `baidu` | `ERNIE cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
54
|
+
| Baichuan / 百川 | model id/name 包含 `baichuan` 或 `百川` | `Baichuan cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
55
|
+
| StepFun / 阶跃星辰 | model id/name 包含 `stepfun` 或 `step-` 前缀 | `StepFun cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
56
|
+
| iFlytek Spark / 讯飞星火 | model id/name 包含 `spark`、`xinghuo`、`星火`、`iflytek` 或 `讯飞` | `Spark cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
57
|
+
| InternLM / 书生 | model id/name 包含 `internlm`、`intern-lm` 或 `书生` | `InternLM cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
58
|
+
| Google Gemma | model id/name 包含 `gemma` | `Gemma cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
59
|
+
| Microsoft Phi | model id/name 包含 `phi-` 前缀,或安全边界内 `phi` 模式 | `Phi cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
60
|
+
| AI21 Jamba | model id/name 包含 `jamba` 或 `ai21` | `Jamba cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
61
|
+
| Upstage Solar | model id/name 包含 `solar` 或 `upstage` | `Solar cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
52
62
|
| Anthropic / Claude | model id/name 包含 `anthropic` 或 `claude` | `Claude cache` | Pi 归一化 usage,或可见 raw 字段 `cache_read_input_tokens`、`cache_creation_input_tokens`、`input_tokens` |
|
|
53
63
|
| Gemini / Vertex | model id/name 包含 `gemini` 或 `vertex` | `Gemini cache` | Pi 归一化 usage,或可见 Gemini/Vertex cached-content token metadata |
|
|
54
64
|
|
|
@@ -65,7 +75,7 @@ Generic OpenAI-compatible 代理**不会**仅因为使用 OpenAI 形状 API 或
|
|
|
65
75
|
| Windows | 通过 Pi 在 Windows 下要求的 bash shell 运行(Git Bash、Cygwin、MSYS2 或 WSL)。详见 Pi 的 [Windows setup](https://github.com/earendil-works/pi-coding-agent/blob/main/docs/windows.md)。 |
|
|
66
76
|
| Termux / Android | 在 Pi 的 Termux 环境中可用。 |
|
|
67
77
|
|
|
68
|
-
状态文件 `~/.pi/agent/` 通过 Node 的 `os.homedir()` 解析,所以在 Windows 上会自动展开为 `C:\Users\<你>\.pi\agent
|
|
78
|
+
状态文件 `~/.pi/agent/` 通过 Node 的 `os.homedir()` 解析,所以在 Windows 上会自动展开为 `C:\Users\<你>\.pi\agent\...`。扩展的 compat 提醒、`/cache-optimizer doctor` 和 `/cache-optimizer compat` 会自动显示适合当前平台的路径(Linux/macOS 上显示 `~/.pi/agent/models.json`,Windows 上显示 `%USERPROFILE%\.pi\agent\models.json`)。本文档中所有 shell 命令均使用 bash 语法,与 Pi 在每个受支持平台下运行的 shell 一致;只要在 Pi 内(或为 Pi 而执行)运行,就**不需要**改写为 PowerShell 或 `cmd.exe` 形式。
|
|
69
79
|
|
|
70
80
|
## 快速开始
|
|
71
81
|
|
|
@@ -129,6 +139,56 @@ rm -f ~/.pi/agent/deepseek-cache-optimizer-stats.json
|
|
|
129
139
|
|
|
130
140
|
|
|
131
141
|
|
|
142
|
+
## 添加 OpenAI-compatible 代理渠道
|
|
143
|
+
|
|
144
|
+
当在 `~/.pi/agent/models.json` 中添加第三方 OpenAI-compatible 代理 provider(例如 `otokapi`、`cafecode`、OpenRouter 等)时,缓存优化的 `compat` 标志对模型正常使用不是必需的,但它们能显著提高缓存持久性。
|
|
145
|
+
|
|
146
|
+
### 最小 provider 配置模板
|
|
147
|
+
|
|
148
|
+
```jsonc
|
|
149
|
+
{
|
|
150
|
+
"providers": {
|
|
151
|
+
"your-provider-id": {
|
|
152
|
+
"api": "openai-completions", // 或 "openai-responses"
|
|
153
|
+
"baseUrl": "https://your-proxy.example.com/v1",
|
|
154
|
+
"apiKey": "your-api-key",
|
|
155
|
+
"models": {
|
|
156
|
+
"gpt-5.5": {
|
|
157
|
+
"id": "gpt-5.5",
|
|
158
|
+
"name": "GPT 5.5",
|
|
159
|
+
"contextWindowTokens": 128000,
|
|
160
|
+
"maxOutputTokens": 8192,
|
|
161
|
+
"thinking": {
|
|
162
|
+
// 使用你的代理实际支持的 thinking 级别。
|
|
163
|
+
// Pi 通过 thinkingLevelMap 将 --thinking <level> 映射为 token。
|
|
164
|
+
// 下面模板保持各级别独立 —— 不要全部映射为 "xhigh"。
|
|
165
|
+
// 你的代理可能不支持所有级别;移除不支持的或逐个测试。
|
|
166
|
+
"thinkingLevelMap": {
|
|
167
|
+
"off": null,
|
|
168
|
+
"minimal": "minimal",
|
|
169
|
+
"low": "low",
|
|
170
|
+
"medium": "medium",
|
|
171
|
+
"high": "high",
|
|
172
|
+
"xhigh": "xhigh"
|
|
173
|
+
}
|
|
174
|
+
},
|
|
175
|
+
"compat": {
|
|
176
|
+
"supportsLongCacheRetention": true,
|
|
177
|
+
"sendSessionAffinityHeaders": true
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
关键点:
|
|
187
|
+
|
|
188
|
+
- `thinkingLevelMap` 保持不同的 level 独立。如果你的代理不支持某个级别(例如 `minimal`),请移除该条目或设为 `null`。**不要**将所有级别都映射为 `"xhigh"` —— 那会破坏用户对推理努力度的控制。
|
|
189
|
+
- `compat` 标志帮助 Pi 请求更长的缓存保留时间,并通过发送 session-affinity headers 实现代理侧缓存本地性。仅在代理支持时才启用。
|
|
190
|
+
- 扩展通过模型 `id`/`name` 字符串来检测模型家族,而不是通过 provider id、base URL 或 API 类型。请使用易识别的模型 id(例如 `gpt-5.5`、`kimi-k2.5`),以便正确匹配统计 adapter。
|
|
191
|
+
|
|
132
192
|
## 底部缓存统计
|
|
133
193
|
|
|
134
194
|
Pi footer 只显示**当前活跃模型 family** 的统计,例如:
|
|
@@ -167,12 +227,19 @@ Gemini cache 1/2 · 0.18M/0.50M tok (36%)
|
|
|
167
227
|
|
|
168
228
|
## 建议的 compat 配置
|
|
169
229
|
|
|
170
|
-
对直连 DeepSeek 或 DeepSeek-like OpenAI-compatible 代理,建议在对应 provider 或 model 的 `compat`
|
|
230
|
+
对直连 DeepSeek 或 DeepSeek-like OpenAI-compatible 代理,建议在对应 provider 或 model 的 `compat` 中配置。
|
|
231
|
+
|
|
232
|
+
`compat` 块应该放在 `~/.pi/agent/models.json` 中 provider 对象内部,与 `baseUrl`、`api`、`apiKey`、`models` 同级:
|
|
171
233
|
|
|
172
|
-
```
|
|
234
|
+
```jsonc
|
|
173
235
|
{
|
|
174
236
|
"providers": {
|
|
175
237
|
"deepseek": {
|
|
238
|
+
"api": "openai-completions",
|
|
239
|
+
"baseUrl": "https://api.deepseek.com/v1",
|
|
240
|
+
"apiKey": "sk-...",
|
|
241
|
+
"models": { /* ... */ },
|
|
242
|
+
// 👇 compat 在此位置,而不是在 models 内部
|
|
176
243
|
"compat": {
|
|
177
244
|
"thinkingFormat": "deepseek",
|
|
178
245
|
"supportsLongCacheRetention": true,
|
|
@@ -194,6 +261,51 @@ Gemini cache 1/2 · 0.18M/0.50M tok (36%)
|
|
|
194
261
|
|
|
195
262
|
> 提醒:只有在 endpoint 或代理明确支持时,才建议启用 session-affinity headers 或 cache-control compat。
|
|
196
263
|
|
|
264
|
+
## 诊断命令
|
|
265
|
+
|
|
266
|
+
扩展注册了 Pi 命令 `/cache-optimizer` 用于交互式诊断。
|
|
267
|
+
|
|
268
|
+
```
|
|
269
|
+
/cache-optimizer — 交互菜单(无 UI 时显示文字帮助)
|
|
270
|
+
/cache-optimizer doctor — 显示 provider、model、API、base URL、compat 状态
|
|
271
|
+
/cache-optimizer compat — 显示 compat 建议和编辑说明
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
不带参数时,当 Pi UI 支持时(`ctx.ui.select` 可用),`/cache-optimizer` 会显示交互选择菜单(Doctor / Compat / Cancel)。在非交互终端中,会回退到文字帮助和当前模型 compat 状态。
|
|
275
|
+
|
|
276
|
+
### `/cache-optimizer doctor`
|
|
277
|
+
|
|
278
|
+
显示当前模型的 provider、model id、名称、API 类型、base URL、当前 `compat` 标志以及缺少的缓存/session-affinity 标志。如果缺少标志,还会显示可复制的 JSON 片段和精确编辑位置。
|
|
279
|
+
|
|
280
|
+
如果所有 compat 标志都已配置且适用(第三方 `openai-completions` 代理),输出显示 `✅ Compat fully configured.`。对于不适用 compat 检查的模型(官方 OpenAI、非 `openai-completions` API、custom transport),显示 `ℹ️ Compat check not applicable for this model.`:
|
|
281
|
+
|
|
282
|
+
```text
|
|
283
|
+
Provider: otokapi
|
|
284
|
+
Model: gpt-5.5
|
|
285
|
+
API: openai-completions
|
|
286
|
+
Base URL: https://otokapi.example.com/v1
|
|
287
|
+
Compat: {}
|
|
288
|
+
⚠️ Missing compat flags: supportsLongCacheRetention, sendSessionAffinityHeaders
|
|
289
|
+
Edit ~/.pi/agent/models.json -> providers["otokapi"] -> compat (same level as baseUrl/api/apiKey/models):
|
|
290
|
+
{
|
|
291
|
+
"supportsLongCacheRetention": true,
|
|
292
|
+
"sendSessionAffinityHeaders": true
|
|
293
|
+
}
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
### `/cache-optimizer compat`
|
|
297
|
+
|
|
298
|
+
仅显示当前模型的 compat 建议,包括文件路径、provider 路径和可复制 JSON 片段。当没有缺失标志时,如果模型是适用的第三方代理则显示 `✅ Compat fully configured.`,否则显示 `ℹ️ Compat check not applicable for this model.`。
|
|
299
|
+
|
|
300
|
+
### 安全说明
|
|
301
|
+
|
|
302
|
+
命令只读取 Pi 通过 `ctx.model` 暴露的元数据:provider、id、name、api、baseUrl、compat。它**不会**读取或暴露:
|
|
303
|
+
- API key 或环境密钥
|
|
304
|
+
- 请求/响应 payload
|
|
305
|
+
- Prompt 或模型输出
|
|
306
|
+
- HTTP headers
|
|
307
|
+
- `~/.pi/agent/models.json` 的原始内容
|
|
308
|
+
|
|
197
309
|
## 原理
|
|
198
310
|
|
|
199
311
|
Provider 缓存通常依赖精确或近似精确的前缀匹配。Pi 的 system prompt 包含跨会话稳定的内容(工具定义、技能、规范),也包含每次变化的动态内容(git status、当前任务)。
|
package/index.ts
CHANGED
|
@@ -48,6 +48,16 @@ const NO_PROMPT_REWRITE_ENV = "PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE";
|
|
|
48
48
|
// persisted metrics.
|
|
49
49
|
let promptTruncationDetected = false;
|
|
50
50
|
|
|
51
|
+
// Timestamp (ms) of the most recent integrity truncation event.
|
|
52
|
+
// Used by /cache-optimizer doctor to surface recovery guidance.
|
|
53
|
+
// Reset to 0 on reload.
|
|
54
|
+
let lastPromptIntegrityWarningAt = 0;
|
|
55
|
+
|
|
56
|
+
/** Getter for lastPromptIntegrityWarningAt (exported for tests via __internals_for_tests). */
|
|
57
|
+
function getLastPromptIntegrityWarningAt(): number {
|
|
58
|
+
return lastPromptIntegrityWarningAt;
|
|
59
|
+
}
|
|
60
|
+
|
|
51
61
|
// Minimum count of skills before compression is worth applying.
|
|
52
62
|
// Below this, pi's verbose XML block is small enough that the overhead of
|
|
53
63
|
// an additional one-line index isn't worth the loss of per-skill
|
|
@@ -548,6 +558,26 @@ function getCompat(model: PiModel | undefined): CacheCompat {
|
|
|
548
558
|
return (model?.compat ?? {}) as CacheCompat;
|
|
549
559
|
}
|
|
550
560
|
|
|
561
|
+
/**
|
|
562
|
+
* Return a platform-friendly display path for `~/.pi/agent/models.json`.
|
|
563
|
+
*
|
|
564
|
+
* On Windows (platform starts with "win") the path is shown as
|
|
565
|
+
* `%USERPROFILE%\.pi\agent\models.json` to match Windows conventions.
|
|
566
|
+
* On all other platforms (Linux, macOS, etc.) it is shown as
|
|
567
|
+
* `~/.pi/agent/models.json` (the Unix-style tilde shorthand).
|
|
568
|
+
*
|
|
569
|
+
* This is a DISPLAY helper only. Actual path resolution is done by Pi
|
|
570
|
+
* (via Node `os.homedir()` + path.join), and this string is never used
|
|
571
|
+
* for I/O — only for warning/doctor/README text so that users on any
|
|
572
|
+
* platform see a copyable path they recognize.
|
|
573
|
+
*/
|
|
574
|
+
function getModelsJsonDisplayPath(platform: string = process.platform): string {
|
|
575
|
+
if (platform.startsWith("win")) {
|
|
576
|
+
return `%USERPROFILE%\\.pi\\agent\\models.json`;
|
|
577
|
+
}
|
|
578
|
+
return "~/.pi/agent/models.json";
|
|
579
|
+
}
|
|
580
|
+
|
|
551
581
|
function isEnabledEnv(value: string | undefined): boolean {
|
|
552
582
|
if (!value) return false;
|
|
553
583
|
const normalized = value.trim().toLowerCase();
|
|
@@ -729,6 +759,94 @@ function isYiLikeAssistantMessage(message: unknown, model: PiModel | undefined):
|
|
|
729
759
|
return hasAnyTokenContaining(allTokens, ["yi-", "01-ai", "zero-one"]) || allTokens.some((t) => YI_MODEL_PATTERN.test(t));
|
|
730
760
|
}
|
|
731
761
|
|
|
762
|
+
// ── More OpenAI-compatible model detection (batch 2) ───────────────
|
|
763
|
+
|
|
764
|
+
const DOUBAO_SEED_PATTERN = /(^|[\/\s:_-])seed($|[\-_.:\/\s])/i;
|
|
765
|
+
|
|
766
|
+
function isDoubaoLikeModel(model: PiModel | undefined): boolean {
|
|
767
|
+
const tokens = getModelIdNameTokenValues(model);
|
|
768
|
+
return hasAnyTokenContaining(tokens, ["doubao", "豆包", "volcengine", "bytedance", "byte-dance"]) ||
|
|
769
|
+
tokens.some((t) => DOUBAO_SEED_PATTERN.test(t));
|
|
770
|
+
}
|
|
771
|
+
function isDoubaoLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
772
|
+
const allTokens = [
|
|
773
|
+
...getModelIdNameTokenValues(model),
|
|
774
|
+
...getAssistantMessageModelTokenValues(message),
|
|
775
|
+
];
|
|
776
|
+
return hasAnyTokenContaining(allTokens, ["doubao", "豆包", "volcengine", "bytedance", "byte-dance"]) ||
|
|
777
|
+
allTokens.some((t) => DOUBAO_SEED_PATTERN.test(t));
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
function isErnieLikeModel(model: PiModel | undefined): boolean {
|
|
781
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["ernie", "wenxin", "文心", "yiyan", "一言", "baidu"]);
|
|
782
|
+
}
|
|
783
|
+
function isErnieLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
784
|
+
return modelOrAssistantMessageHas(message, model, ["ernie", "wenxin", "文心", "yiyan", "一言", "baidu"]);
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
function isBaichuanLikeModel(model: PiModel | undefined): boolean {
|
|
788
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["baichuan", "百川"]);
|
|
789
|
+
}
|
|
790
|
+
function isBaichuanLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
791
|
+
return modelOrAssistantMessageHas(message, model, ["baichuan", "百川"]);
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
function isStepFunLikeModel(model: PiModel | undefined): boolean {
|
|
795
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["stepfun", "step-"]);
|
|
796
|
+
}
|
|
797
|
+
function isStepFunLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
798
|
+
return modelOrAssistantMessageHas(message, model, ["stepfun", "step-"]);
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
function isSparkLikeModel(model: PiModel | undefined): boolean {
|
|
802
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["spark", "xinghuo", "星火", "iflytek", "讯飞"]);
|
|
803
|
+
}
|
|
804
|
+
function isSparkLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
805
|
+
return modelOrAssistantMessageHas(message, model, ["spark", "xinghuo", "星火", "iflytek", "讯飞"]);
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
function isInternLMLikeModel(model: PiModel | undefined): boolean {
|
|
809
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["internlm", "intern-lm", "书生"]);
|
|
810
|
+
}
|
|
811
|
+
function isInternLMLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
812
|
+
return modelOrAssistantMessageHas(message, model, ["internlm", "intern-lm", "书生"]);
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
function isGemmaLikeModel(model: PiModel | undefined): boolean {
|
|
816
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["gemma"]);
|
|
817
|
+
}
|
|
818
|
+
function isGemmaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
819
|
+
return modelOrAssistantMessageHas(message, model, ["gemma"]);
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
const PHI_MODEL_PATTERN = /(^|[\/\s:_-])phi($|[\-_.:\/\s])/i;
|
|
823
|
+
|
|
824
|
+
function isPhiLikeModel(model: PiModel | undefined): boolean {
|
|
825
|
+
const tokens = getModelIdNameTokenValues(model);
|
|
826
|
+
return hasAnyTokenContaining(tokens, ["phi-"]) || tokens.some((t) => PHI_MODEL_PATTERN.test(t));
|
|
827
|
+
}
|
|
828
|
+
function isPhiLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
829
|
+
const allTokens = [
|
|
830
|
+
...getModelIdNameTokenValues(model),
|
|
831
|
+
...getAssistantMessageModelTokenValues(message),
|
|
832
|
+
];
|
|
833
|
+
return hasAnyTokenContaining(allTokens, ["phi-"]) || allTokens.some((t) => PHI_MODEL_PATTERN.test(t));
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
function isJambaLikeModel(model: PiModel | undefined): boolean {
|
|
837
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["jamba", "ai21"]);
|
|
838
|
+
}
|
|
839
|
+
function isJambaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
840
|
+
return modelOrAssistantMessageHas(message, model, ["jamba", "ai21"]);
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
function isSolarLikeModel(model: PiModel | undefined): boolean {
|
|
844
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["solar", "upstage"]);
|
|
845
|
+
}
|
|
846
|
+
function isSolarLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
847
|
+
return modelOrAssistantMessageHas(message, model, ["solar", "upstage"]);
|
|
848
|
+
}
|
|
849
|
+
|
|
732
850
|
// ── Model key ──────────────────────────────────────────────────────
|
|
733
851
|
|
|
734
852
|
function modelKey(model: PiModel): string {
|
|
@@ -994,9 +1112,15 @@ function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): stri
|
|
|
994
1112
|
suggestion[flag] = true;
|
|
995
1113
|
}
|
|
996
1114
|
|
|
1115
|
+
// Extract provider id from the model key (e.g. "otokapi/gpt-5.5" -> "otokapi").
|
|
1116
|
+
// If no slash is found, fall back to the key itself.
|
|
1117
|
+
const slashIdx = key.indexOf("/");
|
|
1118
|
+
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
1119
|
+
|
|
1120
|
+
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
997
1121
|
const lines: string[] = [
|
|
998
1122
|
`💡 pi-cache-optimizer: ${key} is a third-party GPT/OpenAI-compatible proxy but merged compat lacks ${missing.join(" and ")}.`,
|
|
999
|
-
`
|
|
1123
|
+
`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (at the same level as baseUrl/api/apiKey/models):`,
|
|
1000
1124
|
``,
|
|
1001
1125
|
JSON.stringify(suggestion, null, 2),
|
|
1002
1126
|
``,
|
|
@@ -1050,9 +1174,12 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
|
|
|
1050
1174
|
if (missing.length === 0) return undefined;
|
|
1051
1175
|
|
|
1052
1176
|
const key = modelKey(model);
|
|
1177
|
+
const slashIdx = key.indexOf("/");
|
|
1178
|
+
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
1179
|
+
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
1053
1180
|
return (
|
|
1054
1181
|
`💡 pi-cache-optimizer: ${key} is DeepSeek-like but merged compat lacks ${missing.join(" and ")}. ` +
|
|
1055
|
-
|
|
1182
|
+
`Proxies may reduce or hide cache hits. Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (at the same level as baseUrl/api/apiKey/models).`
|
|
1056
1183
|
);
|
|
1057
1184
|
},
|
|
1058
1185
|
},
|
|
@@ -1296,6 +1423,177 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
|
|
|
1296
1423
|
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1297
1424
|
},
|
|
1298
1425
|
},
|
|
1426
|
+
// ── More OpenAI-compatible adapters (batch 2) ───────────────────
|
|
1427
|
+
{
|
|
1428
|
+
id: "openai" as CacheProviderId,
|
|
1429
|
+
label: "Doubao cache",
|
|
1430
|
+
matchesModel: isDoubaoLikeModel,
|
|
1431
|
+
matchesAssistantMessage(message, model) {
|
|
1432
|
+
if (!isAssistantMessage(message)) return false;
|
|
1433
|
+
return isDoubaoLikeAssistantMessage(message, model);
|
|
1434
|
+
},
|
|
1435
|
+
normalizeUsage(message) {
|
|
1436
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1437
|
+
},
|
|
1438
|
+
warningText(model) {
|
|
1439
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1440
|
+
if (missing.length === 0) return undefined;
|
|
1441
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1442
|
+
},
|
|
1443
|
+
},
|
|
1444
|
+
{
|
|
1445
|
+
id: "openai" as CacheProviderId,
|
|
1446
|
+
label: "ERNIE cache",
|
|
1447
|
+
matchesModel: isErnieLikeModel,
|
|
1448
|
+
matchesAssistantMessage(message, model) {
|
|
1449
|
+
if (!isAssistantMessage(message)) return false;
|
|
1450
|
+
return isErnieLikeAssistantMessage(message, model);
|
|
1451
|
+
},
|
|
1452
|
+
normalizeUsage(message) {
|
|
1453
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1454
|
+
},
|
|
1455
|
+
warningText(model) {
|
|
1456
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1457
|
+
if (missing.length === 0) return undefined;
|
|
1458
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1459
|
+
},
|
|
1460
|
+
},
|
|
1461
|
+
{
|
|
1462
|
+
id: "openai" as CacheProviderId,
|
|
1463
|
+
label: "Baichuan cache",
|
|
1464
|
+
matchesModel: isBaichuanLikeModel,
|
|
1465
|
+
matchesAssistantMessage(message, model) {
|
|
1466
|
+
if (!isAssistantMessage(message)) return false;
|
|
1467
|
+
return isBaichuanLikeAssistantMessage(message, model);
|
|
1468
|
+
},
|
|
1469
|
+
normalizeUsage(message) {
|
|
1470
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1471
|
+
},
|
|
1472
|
+
warningText(model) {
|
|
1473
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1474
|
+
if (missing.length === 0) return undefined;
|
|
1475
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1476
|
+
},
|
|
1477
|
+
},
|
|
1478
|
+
{
|
|
1479
|
+
id: "openai" as CacheProviderId,
|
|
1480
|
+
label: "StepFun cache",
|
|
1481
|
+
matchesModel: isStepFunLikeModel,
|
|
1482
|
+
matchesAssistantMessage(message, model) {
|
|
1483
|
+
if (!isAssistantMessage(message)) return false;
|
|
1484
|
+
return isStepFunLikeAssistantMessage(message, model);
|
|
1485
|
+
},
|
|
1486
|
+
normalizeUsage(message) {
|
|
1487
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1488
|
+
},
|
|
1489
|
+
warningText(model) {
|
|
1490
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1491
|
+
if (missing.length === 0) return undefined;
|
|
1492
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1493
|
+
},
|
|
1494
|
+
},
|
|
1495
|
+
{
|
|
1496
|
+
id: "openai" as CacheProviderId,
|
|
1497
|
+
label: "Spark cache",
|
|
1498
|
+
matchesModel: isSparkLikeModel,
|
|
1499
|
+
matchesAssistantMessage(message, model) {
|
|
1500
|
+
if (!isAssistantMessage(message)) return false;
|
|
1501
|
+
return isSparkLikeAssistantMessage(message, model);
|
|
1502
|
+
},
|
|
1503
|
+
normalizeUsage(message) {
|
|
1504
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1505
|
+
},
|
|
1506
|
+
warningText(model) {
|
|
1507
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1508
|
+
if (missing.length === 0) return undefined;
|
|
1509
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1510
|
+
},
|
|
1511
|
+
},
|
|
1512
|
+
{
|
|
1513
|
+
id: "openai" as CacheProviderId,
|
|
1514
|
+
label: "InternLM cache",
|
|
1515
|
+
matchesModel: isInternLMLikeModel,
|
|
1516
|
+
matchesAssistantMessage(message, model) {
|
|
1517
|
+
if (!isAssistantMessage(message)) return false;
|
|
1518
|
+
return isInternLMLikeAssistantMessage(message, model);
|
|
1519
|
+
},
|
|
1520
|
+
normalizeUsage(message) {
|
|
1521
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1522
|
+
},
|
|
1523
|
+
warningText(model) {
|
|
1524
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1525
|
+
if (missing.length === 0) return undefined;
|
|
1526
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1527
|
+
},
|
|
1528
|
+
},
|
|
1529
|
+
{
|
|
1530
|
+
id: "openai" as CacheProviderId,
|
|
1531
|
+
label: "Gemma cache",
|
|
1532
|
+
matchesModel: isGemmaLikeModel,
|
|
1533
|
+
matchesAssistantMessage(message, model) {
|
|
1534
|
+
if (!isAssistantMessage(message)) return false;
|
|
1535
|
+
return isGemmaLikeAssistantMessage(message, model);
|
|
1536
|
+
},
|
|
1537
|
+
normalizeUsage(message) {
|
|
1538
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1539
|
+
},
|
|
1540
|
+
warningText(model) {
|
|
1541
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1542
|
+
if (missing.length === 0) return undefined;
|
|
1543
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1544
|
+
},
|
|
1545
|
+
},
|
|
1546
|
+
{
|
|
1547
|
+
id: "openai" as CacheProviderId,
|
|
1548
|
+
label: "Phi cache",
|
|
1549
|
+
matchesModel: isPhiLikeModel,
|
|
1550
|
+
matchesAssistantMessage(message, model) {
|
|
1551
|
+
if (!isAssistantMessage(message)) return false;
|
|
1552
|
+
return isPhiLikeAssistantMessage(message, model);
|
|
1553
|
+
},
|
|
1554
|
+
normalizeUsage(message) {
|
|
1555
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1556
|
+
},
|
|
1557
|
+
warningText(model) {
|
|
1558
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1559
|
+
if (missing.length === 0) return undefined;
|
|
1560
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1561
|
+
},
|
|
1562
|
+
},
|
|
1563
|
+
{
|
|
1564
|
+
id: "openai" as CacheProviderId,
|
|
1565
|
+
label: "Jamba cache",
|
|
1566
|
+
matchesModel: isJambaLikeModel,
|
|
1567
|
+
matchesAssistantMessage(message, model) {
|
|
1568
|
+
if (!isAssistantMessage(message)) return false;
|
|
1569
|
+
return isJambaLikeAssistantMessage(message, model);
|
|
1570
|
+
},
|
|
1571
|
+
normalizeUsage(message) {
|
|
1572
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1573
|
+
},
|
|
1574
|
+
warningText(model) {
|
|
1575
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1576
|
+
if (missing.length === 0) return undefined;
|
|
1577
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1578
|
+
},
|
|
1579
|
+
},
|
|
1580
|
+
{
|
|
1581
|
+
id: "openai" as CacheProviderId,
|
|
1582
|
+
label: "Solar cache",
|
|
1583
|
+
matchesModel: isSolarLikeModel,
|
|
1584
|
+
matchesAssistantMessage(message, model) {
|
|
1585
|
+
if (!isAssistantMessage(message)) return false;
|
|
1586
|
+
return isSolarLikeAssistantMessage(message, model);
|
|
1587
|
+
},
|
|
1588
|
+
normalizeUsage(message) {
|
|
1589
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1590
|
+
},
|
|
1591
|
+
warningText(model) {
|
|
1592
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1593
|
+
if (missing.length === 0) return undefined;
|
|
1594
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1595
|
+
},
|
|
1596
|
+
},
|
|
1299
1597
|
];
|
|
1300
1598
|
|
|
1301
1599
|
function selectAdapterForModel(model: PiModel | undefined): CacheProviderAdapter | undefined {
|
|
@@ -1519,6 +1817,76 @@ async function writePersistedCacheStats(state: CacheStatsState): Promise<void> {
|
|
|
1519
1817
|
|
|
1520
1818
|
|
|
1521
1819
|
|
|
1820
|
+
function isCompatCheckApplicable(model: PiModel): boolean {
|
|
1821
|
+
return lower(model.api) === "openai-completions" && !isOfficialOpenAIBaseUrl(model);
|
|
1822
|
+
}
|
|
1823
|
+
|
|
1824
|
+
function buildDoctorDiagnosis(model: PiModel): string {
|
|
1825
|
+
const lines: string[] = [];
|
|
1826
|
+
lines.push(`Provider: ${model.provider}`);
|
|
1827
|
+
lines.push(`Model: ${model.id}`);
|
|
1828
|
+
if (model.name && model.name !== model.id) lines.push(`Name: ${model.name}`);
|
|
1829
|
+
lines.push(`API: ${model.api}`);
|
|
1830
|
+
lines.push(`Base URL: ${model.baseUrl || "(default)"}`);
|
|
1831
|
+
|
|
1832
|
+
const compat = getCompat(model);
|
|
1833
|
+
lines.push(`Compat: ${JSON.stringify(compat)}`);
|
|
1834
|
+
|
|
1835
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1836
|
+
if (missing.length > 0) {
|
|
1837
|
+
lines.push(`⚠️ Missing compat flags: ${missing.join(", ")}`);
|
|
1838
|
+
const key = modelKey(model);
|
|
1839
|
+
const slashIdx = key.indexOf("/");
|
|
1840
|
+
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
1841
|
+
const suggestion = Object.fromEntries(missing.map((f) => [f, true]));
|
|
1842
|
+
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
1843
|
+
lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (same level as baseUrl/api/apiKey/models):`);
|
|
1844
|
+
lines.push(JSON.stringify(suggestion, null, 2));
|
|
1845
|
+
} else if (isCompatCheckApplicable(model)) {
|
|
1846
|
+
lines.push("✅ Compat fully configured.");
|
|
1847
|
+
} else {
|
|
1848
|
+
lines.push("ℹ️ Compat check not applicable for this model.");
|
|
1849
|
+
}
|
|
1850
|
+
|
|
1851
|
+
// ── Integrity diagnostics ──
|
|
1852
|
+
if (lastPromptIntegrityWarningAt > 0) {
|
|
1853
|
+
const ago = Date.now() - lastPromptIntegrityWarningAt;
|
|
1854
|
+
const mins = Math.floor(ago / 60000);
|
|
1855
|
+
if (mins < 5) {
|
|
1856
|
+
lines.push("");
|
|
1857
|
+
lines.push("⚠️ Recent prompt integrity issue detected:");
|
|
1858
|
+
lines.push(` Last detected ${mins > 0 ? `${mins} min` : `${Math.floor(ago / 1000)}s`} ago. The prompt reorder was`);
|
|
1859
|
+
lines.push(` skipped on that turn to preserve structural markers.`);
|
|
1860
|
+
lines.push(` Common causes: extension system prompt format change, substring collision.`);
|
|
1861
|
+
lines.push(` Steps:`);
|
|
1862
|
+
lines.push(` 1. Run /reload to reset (may clear transient issues).`);
|
|
1863
|
+
lines.push(` 2. Set PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 & /reload to disable reorder.`);
|
|
1864
|
+
lines.push(` 3. If persistent, file an issue with this doctor output.`);
|
|
1865
|
+
}
|
|
1866
|
+
}
|
|
1867
|
+
|
|
1868
|
+
return lines.join("\n");
|
|
1869
|
+
}
|
|
1870
|
+
|
|
1871
|
+
function buildCompatDiagnosis(model: PiModel): string | undefined {
|
|
1872
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1873
|
+
if (missing.length === 0) return undefined;
|
|
1874
|
+
|
|
1875
|
+
const key = modelKey(model);
|
|
1876
|
+
const slashIdx = key.indexOf("/");
|
|
1877
|
+
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
1878
|
+
const suggestion = Object.fromEntries(missing.map((f) => [f, true]));
|
|
1879
|
+
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
1880
|
+
return (
|
|
1881
|
+
`Active model: ${key}\n` +
|
|
1882
|
+
`Missing: ${missing.join(", ")}\n\n` +
|
|
1883
|
+
`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat` +
|
|
1884
|
+
` (at the same level as baseUrl/api/apiKey/models) and add:\n` +
|
|
1885
|
+
`${JSON.stringify(suggestion, null, 2)}\n\n` +
|
|
1886
|
+
`Only enable if your endpoint supports them.`
|
|
1887
|
+
);
|
|
1888
|
+
}
|
|
1889
|
+
|
|
1522
1890
|
// Internal helpers exported only so the task verification script
|
|
1523
1891
|
// (.trellis/tasks/.../verify.ts) can exercise them. They are not part of the
|
|
1524
1892
|
// extension's public API; pi only invokes the default export below.
|
|
@@ -1571,11 +1939,40 @@ export const __internals_for_tests = {
|
|
|
1571
1939
|
isCohereLikeAssistantMessage,
|
|
1572
1940
|
isYiLikeModel,
|
|
1573
1941
|
isYiLikeAssistantMessage,
|
|
1942
|
+
// More OpenAI-compatible model detection (batch 2)
|
|
1943
|
+
isDoubaoLikeModel,
|
|
1944
|
+
isDoubaoLikeAssistantMessage,
|
|
1945
|
+
isErnieLikeModel,
|
|
1946
|
+
isErnieLikeAssistantMessage,
|
|
1947
|
+
isBaichuanLikeModel,
|
|
1948
|
+
isBaichuanLikeAssistantMessage,
|
|
1949
|
+
isStepFunLikeModel,
|
|
1950
|
+
isStepFunLikeAssistantMessage,
|
|
1951
|
+
isSparkLikeModel,
|
|
1952
|
+
isSparkLikeAssistantMessage,
|
|
1953
|
+
isInternLMLikeModel,
|
|
1954
|
+
isInternLMLikeAssistantMessage,
|
|
1955
|
+
isGemmaLikeModel,
|
|
1956
|
+
isGemmaLikeAssistantMessage,
|
|
1957
|
+
isPhiLikeModel,
|
|
1958
|
+
isPhiLikeAssistantMessage,
|
|
1959
|
+
isJambaLikeModel,
|
|
1960
|
+
isJambaLikeAssistantMessage,
|
|
1961
|
+
isSolarLikeModel,
|
|
1962
|
+
isSolarLikeAssistantMessage,
|
|
1574
1963
|
buildOpenAIProxyCompatWarningText,
|
|
1575
1964
|
getModelIdNameTokenValues,
|
|
1576
1965
|
getAssistantMessageModelTokenValues,
|
|
1577
1966
|
getCompat,
|
|
1578
1967
|
modelKey,
|
|
1968
|
+
// Platform-friendly path helper
|
|
1969
|
+
getModelsJsonDisplayPath,
|
|
1970
|
+
// Integrity diagnostics
|
|
1971
|
+
getLastPromptIntegrityWarningAt,
|
|
1972
|
+
// Diagnostic command helpers
|
|
1973
|
+
isCompatCheckApplicable,
|
|
1974
|
+
buildDoctorDiagnosis,
|
|
1975
|
+
buildCompatDiagnosis,
|
|
1579
1976
|
// Cache stats helpers (module-level, usable from verify script)
|
|
1580
1977
|
addUsageToCacheStats,
|
|
1581
1978
|
formatCacheStats,
|
|
@@ -1592,8 +1989,10 @@ export default function (pi: ExtensionAPI) {
|
|
|
1592
1989
|
let lastStatusText: string | undefined;
|
|
1593
1990
|
let persistenceWarningShown = false;
|
|
1594
1991
|
let persistTimer: ReturnType<typeof setTimeout> | null = null;
|
|
1992
|
+
let integrityNotificationShown = false;
|
|
1595
1993
|
const PERSIST_DEBOUNCE_MS = 2000;
|
|
1596
1994
|
|
|
1995
|
+
|
|
1597
1996
|
function getCacheStatsState(): CacheStatsState {
|
|
1598
1997
|
return { statsByModel: cacheStatsByModel, legacyFamily: cacheStatsLegacyFamily };
|
|
1599
1998
|
}
|
|
@@ -1698,6 +2097,9 @@ export default function (pi: ExtensionAPI) {
|
|
|
1698
2097
|
cacheStatsByModel = {};
|
|
1699
2098
|
cacheStatsLegacyFamily = emptyAllCacheStats();
|
|
1700
2099
|
lastStatusText = undefined;
|
|
2100
|
+
// Reset integrity diagnostics on reload
|
|
2101
|
+
lastPromptIntegrityWarningAt = 0;
|
|
2102
|
+
integrityNotificationShown = false;
|
|
1701
2103
|
await flushPersistCacheStats(ctx);
|
|
1702
2104
|
return;
|
|
1703
2105
|
}
|
|
@@ -1737,6 +2139,35 @@ export default function (pi: ExtensionAPI) {
|
|
|
1737
2139
|
if (promptTruncationDetected && statusText !== undefined) {
|
|
1738
2140
|
statusText = statusText + " ⚠️ integrity";
|
|
1739
2141
|
promptTruncationDetected = false;
|
|
2142
|
+
lastPromptIntegrityWarningAt = Date.now();
|
|
2143
|
+
|
|
2144
|
+
// One-time notification with recovery steps (per session).
|
|
2145
|
+
if (!integrityNotificationShown) {
|
|
2146
|
+
integrityNotificationShown = true;
|
|
2147
|
+
ctx.ui.notify(
|
|
2148
|
+
`⚠️ ${LOG_PREFIX}: A prompt structural marker was lost during reorder on this turn. ` +
|
|
2149
|
+
`The original prompt was used instead to preserve integrity.\n\n` +
|
|
2150
|
+
`Recovery steps:\n` +
|
|
2151
|
+
`1. Run /reload to reset (may clear transient issues).\n` +
|
|
2152
|
+
`2. Set PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 and /reload to disable reorder.\n` +
|
|
2153
|
+
`3. If persistent, run /cache-optimizer doctor and file an issue (no API keys/prompts).`,
|
|
2154
|
+
"warning",
|
|
2155
|
+
);
|
|
2156
|
+
}
|
|
2157
|
+
}
|
|
2158
|
+
|
|
2159
|
+
// ⚠️ compat footer marker: if the active model is a non-official
|
|
2160
|
+
// openai-completions model with missing supportsLongCacheRetention
|
|
2161
|
+
// or sendSessionAffinityHeaders, append the marker to indicate that
|
|
2162
|
+
// compat configuration is incomplete. Re-evaluated on every status
|
|
2163
|
+
// update so the marker persists through stats changes and day
|
|
2164
|
+
// rollovers. Redundant setStatus calls are blocked by the
|
|
2165
|
+
// `lastStatusText` early return above.
|
|
2166
|
+
if (statusText !== undefined && model) {
|
|
2167
|
+
const compatMissing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2168
|
+
if (compatMissing.length > 0) {
|
|
2169
|
+
statusText = statusText + " ⚠️ compat";
|
|
2170
|
+
}
|
|
1740
2171
|
}
|
|
1741
2172
|
|
|
1742
2173
|
if (statusText === lastStatusText) return;
|
|
@@ -1868,4 +2299,99 @@ export default function (pi: ExtensionAPI) {
|
|
|
1868
2299
|
schedulePersistCacheStats(ctx);
|
|
1869
2300
|
await publishStatus(ctx);
|
|
1870
2301
|
});
|
|
2302
|
+
|
|
2303
|
+
// ────────────────────────────────────────────────────────────────
|
|
2304
|
+
// Register /cache-optimizer command
|
|
2305
|
+
// Subcommands:
|
|
2306
|
+
// doctor — show current model/provider/api/baseUrl/compat status
|
|
2307
|
+
// compat — show compat suggestion with file path
|
|
2308
|
+
// (no args) — show help summary + current diagnosis
|
|
2309
|
+
// ────────────────────────────────────────────────────────────────
|
|
2310
|
+
pi.registerCommand("cache-optimizer", {
|
|
2311
|
+
description: "Diagnose Pi cache configuration",
|
|
2312
|
+
handler: async (args: string, cmdCtx) => {
|
|
2313
|
+
const model = cmdCtx.model;
|
|
2314
|
+
const subcommand = args.trim().toLowerCase().split(/\s+/)[0] || "help";
|
|
2315
|
+
|
|
2316
|
+
if (subcommand === "doctor") {
|
|
2317
|
+
if (!model) {
|
|
2318
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
2319
|
+
return;
|
|
2320
|
+
}
|
|
2321
|
+
cmdCtx.ui.notify(buildDoctorDiagnosis(model), "info");
|
|
2322
|
+
} else if (subcommand === "compat") {
|
|
2323
|
+
if (!model) {
|
|
2324
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
2325
|
+
return;
|
|
2326
|
+
}
|
|
2327
|
+
const compatResult = buildCompatDiagnosis(model);
|
|
2328
|
+
if (compatResult) {
|
|
2329
|
+
cmdCtx.ui.notify(compatResult, "warning");
|
|
2330
|
+
} else {
|
|
2331
|
+
cmdCtx.ui.notify(
|
|
2332
|
+
isCompatCheckApplicable(model)
|
|
2333
|
+
? "✅ Compat fully configured."
|
|
2334
|
+
: "ℹ️ Compat check not applicable for this model.",
|
|
2335
|
+
"info",
|
|
2336
|
+
);
|
|
2337
|
+
}
|
|
2338
|
+
} else {
|
|
2339
|
+
// Try interactive selection menu when UI supports it
|
|
2340
|
+
if (cmdCtx.hasUI) {
|
|
2341
|
+
const menuOptions = [
|
|
2342
|
+
"🩺 Doctor — Show current model cache configuration",
|
|
2343
|
+
"⚙️ Compat — Show compat suggestion with edit instructions",
|
|
2344
|
+
"❌ Cancel",
|
|
2345
|
+
];
|
|
2346
|
+
const choice = await cmdCtx.ui.select("Cache Optimizer", menuOptions);
|
|
2347
|
+
if (choice === menuOptions[0]) {
|
|
2348
|
+
if (!model) {
|
|
2349
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
2350
|
+
} else {
|
|
2351
|
+
cmdCtx.ui.notify(buildDoctorDiagnosis(model), "info");
|
|
2352
|
+
}
|
|
2353
|
+
} else if (choice === menuOptions[1]) {
|
|
2354
|
+
if (!model) {
|
|
2355
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
2356
|
+
} else {
|
|
2357
|
+
const compatResult = buildCompatDiagnosis(model);
|
|
2358
|
+
if (compatResult) {
|
|
2359
|
+
cmdCtx.ui.notify(compatResult, "warning");
|
|
2360
|
+
} else {
|
|
2361
|
+
cmdCtx.ui.notify(
|
|
2362
|
+
isCompatCheckApplicable(model)
|
|
2363
|
+
? "✅ Compat fully configured."
|
|
2364
|
+
: "ℹ️ Compat check not applicable for this model.",
|
|
2365
|
+
"info",
|
|
2366
|
+
);
|
|
2367
|
+
}
|
|
2368
|
+
}
|
|
2369
|
+
}
|
|
2370
|
+
// choice === "cancel" or undefined → no action
|
|
2371
|
+
return;
|
|
2372
|
+
}
|
|
2373
|
+
|
|
2374
|
+
// Fallback: text help when no interactive UI
|
|
2375
|
+
const diagnosis: string[] = [];
|
|
2376
|
+
diagnosis.push("📋 /cache-optimizer commands:");
|
|
2377
|
+
diagnosis.push(" doctor — Show current model/provider/api/baseUrl/compat status");
|
|
2378
|
+
diagnosis.push(" compat — Show compat suggestion with edit location");
|
|
2379
|
+
diagnosis.push("");
|
|
2380
|
+
if (model) {
|
|
2381
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2382
|
+
if (missing.length > 0) {
|
|
2383
|
+
diagnosis.push(`⚠️ Active model "${modelKey(model)}" missing compat: ${missing.join(", ")}`);
|
|
2384
|
+
diagnosis.push('Run "/cache-optimizer compat" for edit instructions.');
|
|
2385
|
+
} else if (isCompatCheckApplicable(model)) {
|
|
2386
|
+
diagnosis.push(`✅ Active model "${modelKey(model)}": compat fully configured.`);
|
|
2387
|
+
} else {
|
|
2388
|
+
diagnosis.push(`ℹ️ Active model "${modelKey(model)}": compat check not applicable.`);
|
|
2389
|
+
}
|
|
2390
|
+
} else {
|
|
2391
|
+
diagnosis.push("No active model selected.");
|
|
2392
|
+
}
|
|
2393
|
+
cmdCtx.ui.notify(diagnosis.join("\n"), "info");
|
|
2394
|
+
}
|
|
2395
|
+
},
|
|
2396
|
+
});
|
|
1871
2397
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-cache-optimizer",
|
|
3
|
-
"version": "2.4.
|
|
3
|
+
"version": "2.4.6",
|
|
4
4
|
"description": "Pi extension that improves provider-side KV/prompt cache hit rates (DeepSeek, OpenAI, Claude, Gemini) by reordering the system prompt, requesting long retention, and showing footer cache stats. Renamed from pi-deepseek-cache-optimizer.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|