pi-cache-optimizer 2.6.5 → 2.6.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +114 -1
- package/README.zh-CN.md +114 -1
- package/index.ts +392 -30
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -20,6 +20,7 @@ Pi extension for improving provider-side KV / prompt cache hit rates. It keeps s
|
|
|
20
20
|
- [Anthropic adaptive thinking models](#anthropic-adaptive-thinking-models)
|
|
21
21
|
- [Auto-repair with `/cache-optimizer fix`](#auto-repair-with-cache-optimizer-fix)
|
|
22
22
|
- [Footer stats](#footer-stats)
|
|
23
|
+
- [For router / virtual-channel extension authors](#for-router--virtual-channel-extension-authors)
|
|
23
24
|
- [Uninstall](#uninstall)
|
|
24
25
|
- [Verify effect](#verify-effect)
|
|
25
26
|
- [License](#license)
|
|
@@ -33,8 +34,9 @@ Pi extension for improving provider-side KV / prompt cache hit rates. It keeps s
|
|
|
33
34
|
- Warns once for third-party OpenAI-compatible proxies missing cache/session-affinity compat flags.
|
|
34
35
|
- Detects Anthropic adaptive thinking models (opus-4.6+, sonnet-4.6+, fable-5+) missing `forceAdaptiveThinking: true` compat.
|
|
35
36
|
- Shows session-scoped footer stats for supported model families.
|
|
37
|
+
- Supports optional router-extension integration through versioned global protocols (`Symbol.for("pi.routing.registry.v1")` and `Symbol.for("pi.cache.hints.v1")`) without importing router packages.
|
|
36
38
|
|
|
37
|
-
Caching is provider-side and best-effort. Third-party proxies can still hide cache usage, reject unsupported parameters, or route requests across multiple upstreams.
|
|
39
|
+
Caching is provider-side and best-effort. Third-party proxies and router extensions can still hide cache usage, reject unsupported parameters, or route requests across multiple upstreams.
|
|
38
40
|
|
|
39
41
|
## Install
|
|
40
42
|
|
|
@@ -50,6 +52,8 @@ pi remove npm:pi-deepseek-cache-optimizer && pi install npm:pi-cache-optimizer
|
|
|
50
52
|
|
|
51
53
|
Run `/reload` in Pi after install/update/remove so extension hooks refresh.
|
|
52
54
|
|
|
55
|
+
On Pi 0.79.7 and newer, `pi update` updates Pi itself only. To update installed Pi packages such as this extension, run `pi update --extensions` (packages only) or `pi update --all` (Pi + packages).
|
|
56
|
+
|
|
53
57
|
## Commands
|
|
54
58
|
|
|
55
59
|
| Command | Effect |
|
|
@@ -212,6 +216,8 @@ If only one model should change, use `modelOverrides`:
|
|
|
212
216
|
|
|
213
217
|
Stats are read-only local counters stored at `~/.pi/agent/pi-cache-optimizer-stats.json` and scoped by Pi session + provider/model. They contain only dates and numeric counters — no API keys, prompts, payloads, headers, responses, or model output.
|
|
214
218
|
|
|
219
|
+
Pi 0.79+ also includes a built-in footer `CH` marker for the latest prompt cache hit rate. This extension complements that marker with persisted, provider/model/session-scoped counters plus proxy compat diagnostics.
|
|
220
|
+
|
|
215
221
|
Example footer:
|
|
216
222
|
|
|
217
223
|
```text
|
|
@@ -224,6 +230,113 @@ Supported footer labels include: DS, Claude, OpenAI, Gemini, Kimi, Qwen, GLM, Mi
|
|
|
224
230
|
|
|
225
231
|
Adapter selection uses only model id/name (plus assistant message model/name on message end). Generic OpenAI-shaped APIs are not treated as OpenAI-family unless the model id/name matches a supported family.
|
|
226
232
|
|
|
233
|
+
## For router / virtual-channel extension authors
|
|
234
|
+
|
|
235
|
+
If your Pi extension provides a virtual routing provider (for example `router/auto`, `router/smart`, or a profile/channel that forwards to a real upstream), this extension can show cache stats for the real upstream provider/model instead of the virtual shell. Integration is optional, versioned, and does **not** require importing this package.
|
|
236
|
+
|
|
237
|
+
### Minimum integration: final assistant message metadata
|
|
238
|
+
|
|
239
|
+
For seamless final cache-stat attribution, relay the real upstream identity on completed assistant messages:
|
|
240
|
+
|
|
241
|
+
```ts
|
|
242
|
+
{
|
|
243
|
+
role: "assistant",
|
|
244
|
+
provider: "anthropic", // real upstream provider
|
|
245
|
+
responseModel: "claude-opus-4-8", // or model: "..."
|
|
246
|
+
api: "anthropic-messages", // upstream Pi API id when known
|
|
247
|
+
usage: {
|
|
248
|
+
input: 1200, // Pi-normalized uncached input tokens, if available
|
|
249
|
+
cacheRead: 8000, // tokens read from provider prompt cache
|
|
250
|
+
cacheWrite: 500, // tokens newly written to provider prompt cache
|
|
251
|
+
},
|
|
252
|
+
}
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
`message_end` treats these assistant-message fields as authoritative. If `provider` + `model`/`responseModel` + cache usage are present, stats update the upstream bucket even when the active model is still `router/auto`. If upstream usage does not expose cache fields, leave them absent/zero; this extension will not fake cache hits.
|
|
256
|
+
|
|
257
|
+
### Optional: live route registry for pre-response UX
|
|
258
|
+
|
|
259
|
+
Final message metadata is enough for post-response stats. For pre-response flows — footer display before the first response, `/cache-optimizer doctor`, `/cache-optimizer compat`, `/cache-optimizer reset`, and OpenAI-compatible `prompt_cache_key` fallback — register a live route adapter under `Symbol.for("pi.routing.registry.v1")`.
|
|
260
|
+
|
|
261
|
+
Protocol shape:
|
|
262
|
+
|
|
263
|
+
```ts
|
|
264
|
+
type PiRouteSnapshot = {
|
|
265
|
+
virtualProvider: string;
|
|
266
|
+
virtualModelId: string;
|
|
267
|
+
provider: string;
|
|
268
|
+
modelId: string;
|
|
269
|
+
api?: string;
|
|
270
|
+
canonicalModelId?: string;
|
|
271
|
+
routeLabel?: string;
|
|
272
|
+
status?: "planned" | "trying" | "selected" | "success" | "failed";
|
|
273
|
+
sessionIdHash?: string;
|
|
274
|
+
requestId?: string;
|
|
275
|
+
timestamp: number;
|
|
276
|
+
};
|
|
277
|
+
|
|
278
|
+
type PiRouterAdapterV1 = {
|
|
279
|
+
virtualProvider: string;
|
|
280
|
+
resolveActiveRoute(
|
|
281
|
+
virtualModelId: string,
|
|
282
|
+
hint?: { sessionIdHash?: string; requestId?: string },
|
|
283
|
+
): PiRouteSnapshot | undefined;
|
|
284
|
+
resolveCandidateRoutes?(virtualModelId: string): PiRouteSnapshot[];
|
|
285
|
+
subscribe?(listener: (event: PiRouteSnapshot) => void): () => void;
|
|
286
|
+
};
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
Registration pattern:
|
|
290
|
+
|
|
291
|
+
```ts
|
|
292
|
+
const ROUTING = Symbol.for("pi.routing.registry.v1");
|
|
293
|
+
const registry = (globalThis as Record<symbol, unknown>)[ROUTING] as
|
|
294
|
+
| { version: 1; registerRouter(adapter: PiRouterAdapterV1): () => void }
|
|
295
|
+
| undefined;
|
|
296
|
+
|
|
297
|
+
registry?.registerRouter({
|
|
298
|
+
virtualProvider: "router",
|
|
299
|
+
resolveActiveRoute(virtualModelId, hint) {
|
|
300
|
+
return {
|
|
301
|
+
virtualProvider: "router",
|
|
302
|
+
virtualModelId,
|
|
303
|
+
provider: "deepseek",
|
|
304
|
+
modelId: "deepseek-v4",
|
|
305
|
+
api: "openai-completions",
|
|
306
|
+
sessionIdHash: hint?.sessionIdHash,
|
|
307
|
+
timestamp: Date.now(),
|
|
308
|
+
};
|
|
309
|
+
},
|
|
310
|
+
});
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
Do not overwrite an existing registry. If your extension loads before this optimizer, retry registration on `session_start` or create the same V1 registry shape only if no registry exists.
|
|
314
|
+
|
|
315
|
+
### Optional: query-scoped cache hints
|
|
316
|
+
|
|
317
|
+
Routers that forward to an inner Pi request path can read query-scoped hints from `Symbol.for("pi.cache.hints.v1")`:
|
|
318
|
+
|
|
319
|
+
```ts
|
|
320
|
+
const CACHE_HINTS = Symbol.for("pi.cache.hints.v1");
|
|
321
|
+
const hints = (globalThis as Record<symbol, any>)[CACHE_HINTS]?.getHints?.({
|
|
322
|
+
sessionIdHash,
|
|
323
|
+
virtualProvider: "router",
|
|
324
|
+
virtualModelId: "auto",
|
|
325
|
+
upstreamProvider: "deepseek",
|
|
326
|
+
upstreamModelId: "deepseek-v4",
|
|
327
|
+
api: "openai-completions",
|
|
328
|
+
});
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
When the query matches the current session/route, `hints` may contain `systemPrompt`, `promptCacheKey`, and `cacheRetention: "long"`. Treat these as advisory and sensitive: do not log them, do not expose prompt text, and do not overwrite an existing request-level `prompt_cache_key` / `promptCacheKey`.
|
|
332
|
+
|
|
333
|
+
### Security and correctness rules
|
|
334
|
+
|
|
335
|
+
- Do not import `pi-cache-optimizer`; use `Symbol.for(...)` discovery only.
|
|
336
|
+
- Do not expose API keys, prompts, payloads, headers, response bodies, or model output in route snapshots or logs.
|
|
337
|
+
- Use assistant-message metadata for final attribution; live registry data is advisory and may be stale by response time.
|
|
338
|
+
- Preserve truthful usage. Missing cache usage should show as 0/under-reported, not as synthetic hits.
|
|
339
|
+
|
|
227
340
|
## Uninstall
|
|
228
341
|
|
|
229
342
|
```bash
|
package/README.zh-CN.md
CHANGED
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
- [Anthropic adaptive thinking 模型](#anthropic-adaptive-thinking-模型)
|
|
21
21
|
- [使用 `/cache-optimizer fix` 自动修复](#使用-cache-optimizer-fix-自动修复)
|
|
22
22
|
- [Footer 统计](#footer-统计)
|
|
23
|
+
- [Router / Virtual-channel 扩展作者指南](#router--virtual-channel-扩展作者指南)
|
|
23
24
|
- [卸载](#卸载)
|
|
24
25
|
- [验证效果](#验证效果)
|
|
25
26
|
- [License](#license)
|
|
@@ -33,8 +34,9 @@
|
|
|
33
34
|
- 对缺少缓存 / session-affinity compat 的第三方 OpenAI-compatible 代理给出一次性提醒。
|
|
34
35
|
- 检测 Anthropic adaptive thinking 模型(opus-4.6+、sonnet-4.6+、fable-5+)是否缺少 `forceAdaptiveThinking: true` compat。
|
|
35
36
|
- 为支持的模型家族显示按 session 隔离的底部缓存统计。
|
|
37
|
+
- 通过版本化全局协议(`Symbol.for("pi.routing.registry.v1")` 与 `Symbol.for("pi.cache.hints.v1")`)支持可选的 router extension 集成,而不导入任何 router 包。
|
|
36
38
|
|
|
37
|
-
缓存是 provider 侧的 best-effort
|
|
39
|
+
缓存是 provider 侧的 best-effort 行为。第三方代理和 router extension 仍可能隐藏缓存 usage、拒绝不支持的参数,或把请求路由到多个上游。
|
|
38
40
|
|
|
39
41
|
## 安装
|
|
40
42
|
|
|
@@ -50,6 +52,8 @@ pi remove npm:pi-deepseek-cache-optimizer && pi install npm:pi-cache-optimizer
|
|
|
50
52
|
|
|
51
53
|
安装、更新或移除后,在 Pi 中运行 `/reload`,让 extension hooks 刷新。
|
|
52
54
|
|
|
55
|
+
Pi 0.79.7 及之后,`pi update` 默认只更新 Pi 本体。若要更新已安装的 Pi package(包括本扩展),请运行 `pi update --extensions`(只更新 packages)或 `pi update --all`(Pi 与 packages 一起更新)。
|
|
56
|
+
|
|
53
57
|
## 命令
|
|
54
58
|
|
|
55
59
|
| 命令 | 作用 |
|
|
@@ -212,6 +216,8 @@ Provider 级最小 override:
|
|
|
212
216
|
|
|
213
217
|
统计是只读本地计数,保存在 `~/.pi/agent/pi-cache-optimizer-stats.json`,按 Pi session + provider/model 隔离。文件只包含日期和数字计数,不包含 API key、prompt、payload、headers、响应或模型输出。
|
|
214
218
|
|
|
219
|
+
Pi 0.79+ 已内置 footer `CH` 标记,用于显示最近一次 prompt cache hit rate。本扩展在此基础上补充持久化的 provider/model/session-scoped 计数,以及代理 compat 诊断。
|
|
220
|
+
|
|
215
221
|
示例 footer:
|
|
216
222
|
|
|
217
223
|
```text
|
|
@@ -224,6 +230,113 @@ OpenAI cache 3/10 · 0.002M/0.005M tok (40%) ⚠️ compat
|
|
|
224
230
|
|
|
225
231
|
Adapter 选择只看模型 id/name(以及 message_end 时 assistant message 的 model/name)。仅使用 OpenAI-shaped API 不会被当作 OpenAI-family,除非模型 id/name 匹配受支持的家族。
|
|
226
232
|
|
|
233
|
+
## Router / Virtual-channel 扩展作者指南
|
|
234
|
+
|
|
235
|
+
如果你的 Pi 扩展提供虚拟 routing provider(例如 `router/auto`、`router/smart`,或会转发到真实上游的 profile/channel),本扩展可以为真实上游 provider/model 显示缓存统计,而不是把统计记到虚拟外壳上。集成是可选、版本化的,并且**不需要导入本包**。
|
|
236
|
+
|
|
237
|
+
### 最小集成:最终 assistant message metadata
|
|
238
|
+
|
|
239
|
+
要无缝获得最终缓存统计归因,请在完成的 assistant message 上透传真实上游身份:
|
|
240
|
+
|
|
241
|
+
```ts
|
|
242
|
+
{
|
|
243
|
+
role: "assistant",
|
|
244
|
+
provider: "anthropic", // 真实上游 provider
|
|
245
|
+
responseModel: "claude-opus-4-8", // 或 model: "..."
|
|
246
|
+
api: "anthropic-messages", // 已知时填写上游 Pi API id
|
|
247
|
+
usage: {
|
|
248
|
+
input: 1200, // Pi-normalized 未缓存 input tokens,如可用
|
|
249
|
+
cacheRead: 8000, // 从 provider prompt cache 读取的 tokens
|
|
250
|
+
cacheWrite: 500, // 本次新写入 provider prompt cache 的 tokens
|
|
251
|
+
},
|
|
252
|
+
}
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
`message_end` 会把这些 assistant-message 字段视为权威来源。只要存在 `provider` + `model`/`responseModel` + cache usage,即使 active model 仍是 `router/auto`,统计也会更新真实上游桶。如果上游 usage 没有 cache 字段,请保持缺失或为 0;本扩展不会伪造 cache hit。
|
|
256
|
+
|
|
257
|
+
### 可选:用于预响应 UX 的实时路由注册表
|
|
258
|
+
|
|
259
|
+
最终 message metadata 足以支持响应后的统计。若要支持响应前流程——首次响应前的 footer 显示、`/cache-optimizer doctor`、`/cache-optimizer compat`、`/cache-optimizer reset` 和 OpenAI-compatible `prompt_cache_key` fallback——请在 `Symbol.for("pi.routing.registry.v1")` 下注册 live route adapter。
|
|
260
|
+
|
|
261
|
+
协议形状:
|
|
262
|
+
|
|
263
|
+
```ts
|
|
264
|
+
type PiRouteSnapshot = {
|
|
265
|
+
virtualProvider: string;
|
|
266
|
+
virtualModelId: string;
|
|
267
|
+
provider: string;
|
|
268
|
+
modelId: string;
|
|
269
|
+
api?: string;
|
|
270
|
+
canonicalModelId?: string;
|
|
271
|
+
routeLabel?: string;
|
|
272
|
+
status?: "planned" | "trying" | "selected" | "success" | "failed";
|
|
273
|
+
sessionIdHash?: string;
|
|
274
|
+
requestId?: string;
|
|
275
|
+
timestamp: number;
|
|
276
|
+
};
|
|
277
|
+
|
|
278
|
+
type PiRouterAdapterV1 = {
|
|
279
|
+
virtualProvider: string;
|
|
280
|
+
resolveActiveRoute(
|
|
281
|
+
virtualModelId: string,
|
|
282
|
+
hint?: { sessionIdHash?: string; requestId?: string },
|
|
283
|
+
): PiRouteSnapshot | undefined;
|
|
284
|
+
resolveCandidateRoutes?(virtualModelId: string): PiRouteSnapshot[];
|
|
285
|
+
subscribe?(listener: (event: PiRouteSnapshot) => void): () => void;
|
|
286
|
+
};
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
注册模式:
|
|
290
|
+
|
|
291
|
+
```ts
|
|
292
|
+
const ROUTING = Symbol.for("pi.routing.registry.v1");
|
|
293
|
+
const registry = (globalThis as Record<symbol, unknown>)[ROUTING] as
|
|
294
|
+
| { version: 1; registerRouter(adapter: PiRouterAdapterV1): () => void }
|
|
295
|
+
| undefined;
|
|
296
|
+
|
|
297
|
+
registry?.registerRouter({
|
|
298
|
+
virtualProvider: "router",
|
|
299
|
+
resolveActiveRoute(virtualModelId, hint) {
|
|
300
|
+
return {
|
|
301
|
+
virtualProvider: "router",
|
|
302
|
+
virtualModelId,
|
|
303
|
+
provider: "deepseek",
|
|
304
|
+
modelId: "deepseek-v4",
|
|
305
|
+
api: "openai-completions",
|
|
306
|
+
sessionIdHash: hint?.sessionIdHash,
|
|
307
|
+
timestamp: Date.now(),
|
|
308
|
+
};
|
|
309
|
+
},
|
|
310
|
+
});
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
不要覆盖已有 registry。如果你的扩展比本优化器更早加载,请在 `session_start` 时重试注册,或仅在 registry 不存在时创建同样的 V1 registry 形状。
|
|
314
|
+
|
|
315
|
+
### 可选:按查询过滤的缓存提示
|
|
316
|
+
|
|
317
|
+
会转发到内部 Pi 请求路径的 router,可以从 `Symbol.for("pi.cache.hints.v1")` 读取按查询过滤的提示:
|
|
318
|
+
|
|
319
|
+
```ts
|
|
320
|
+
const CACHE_HINTS = Symbol.for("pi.cache.hints.v1");
|
|
321
|
+
const hints = (globalThis as Record<symbol, any>)[CACHE_HINTS]?.getHints?.({
|
|
322
|
+
sessionIdHash,
|
|
323
|
+
virtualProvider: "router",
|
|
324
|
+
virtualModelId: "auto",
|
|
325
|
+
upstreamProvider: "deepseek",
|
|
326
|
+
upstreamModelId: "deepseek-v4",
|
|
327
|
+
api: "openai-completions",
|
|
328
|
+
});
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
当查询匹配当前 session/route 时,`hints` 可能包含 `systemPrompt`、`promptCacheKey` 和 `cacheRetention: "long"`。这些提示是参考信息且可能敏感:不要记录日志,不要暴露 prompt 文本,也不要覆盖已有 request-level `prompt_cache_key` / `promptCacheKey`。
|
|
332
|
+
|
|
333
|
+
### 安全与正确性规则
|
|
334
|
+
|
|
335
|
+
- 不要导入 `pi-cache-optimizer`;只使用 `Symbol.for(...)` 发现协议。
|
|
336
|
+
- 不要在 route snapshot 或日志中暴露 API key、prompt、payload、headers、response body 或模型输出。
|
|
337
|
+
- 最终归因使用 assistant-message metadata;live registry 只是参考信息,到响应完成时可能已经过期。
|
|
338
|
+
- 保持 usage 真实。缺失 cache usage 时应该显示 0 或低报,而不是合成命中。
|
|
339
|
+
|
|
227
340
|
## 卸载
|
|
228
341
|
|
|
229
342
|
```bash
|
package/index.ts
CHANGED
|
@@ -71,6 +71,8 @@ const NO_OPENAI_CACHE_KEY_ENV = "PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY";
|
|
|
71
71
|
const OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH = 64;
|
|
72
72
|
const NO_SKILL_COMPRESSION_ENV = "PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION";
|
|
73
73
|
const NO_PROMPT_REWRITE_ENV = "PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE";
|
|
74
|
+
const PI_ROUTING_REGISTRY_SYMBOL = Symbol.for("pi.routing.registry.v1");
|
|
75
|
+
const PI_CACHE_HINTS_SYMBOL = Symbol.for("pi.cache.hints.v1");
|
|
74
76
|
|
|
75
77
|
let runtimeOptimizerEnabled = true;
|
|
76
78
|
|
|
@@ -168,6 +170,80 @@ type PersistedRoutedModelRef = {
|
|
|
168
170
|
name?: string;
|
|
169
171
|
};
|
|
170
172
|
|
|
173
|
+
type PiRouteSnapshot = {
|
|
174
|
+
virtualProvider: string;
|
|
175
|
+
virtualModelId: string;
|
|
176
|
+
provider: string;
|
|
177
|
+
modelId: string;
|
|
178
|
+
api?: string;
|
|
179
|
+
canonicalModelId?: string;
|
|
180
|
+
routeLabel?: string;
|
|
181
|
+
status?: "planned" | "trying" | "selected" | "success" | "failed";
|
|
182
|
+
sessionIdHash?: string;
|
|
183
|
+
requestId?: string;
|
|
184
|
+
timestamp: number;
|
|
185
|
+
};
|
|
186
|
+
|
|
187
|
+
type PiRouteResolveHint = {
|
|
188
|
+
sessionIdHash?: string;
|
|
189
|
+
requestId?: string;
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
type PiRouterAdapterV1 = {
|
|
193
|
+
virtualProvider: string;
|
|
194
|
+
resolveActiveRoute(
|
|
195
|
+
virtualModelId: string,
|
|
196
|
+
hint?: PiRouteResolveHint,
|
|
197
|
+
): PiRouteSnapshot | undefined;
|
|
198
|
+
resolveCandidateRoutes?(virtualModelId: string): PiRouteSnapshot[];
|
|
199
|
+
subscribe?(listener: (event: PiRouteSnapshot) => void): () => void;
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
type PiRoutingRegistryV1 = {
|
|
203
|
+
version: 1;
|
|
204
|
+
registerRouter(adapter: PiRouterAdapterV1): () => void;
|
|
205
|
+
getRouter(virtualProvider: string): PiRouterAdapterV1 | undefined;
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
type PiCacheHintsInput = {
|
|
209
|
+
sessionIdHash?: string;
|
|
210
|
+
virtualProvider?: string;
|
|
211
|
+
virtualModelId?: string;
|
|
212
|
+
upstreamProvider?: string;
|
|
213
|
+
upstreamModelId?: string;
|
|
214
|
+
api?: string;
|
|
215
|
+
};
|
|
216
|
+
|
|
217
|
+
type PiCacheHintsOutput = {
|
|
218
|
+
systemPrompt?: string;
|
|
219
|
+
promptCacheKey?: string;
|
|
220
|
+
cacheRetention?: "long";
|
|
221
|
+
};
|
|
222
|
+
|
|
223
|
+
type PiCacheHintSnapshot = PiCacheHintsInput & PiCacheHintsOutput & {
|
|
224
|
+
timestamp: number;
|
|
225
|
+
};
|
|
226
|
+
|
|
227
|
+
type PiCacheHintsV1 = {
|
|
228
|
+
version: 1;
|
|
229
|
+
getHints(input: PiCacheHintsInput): PiCacheHintsOutput | undefined;
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
type ProtocolGlobal = typeof globalThis & Record<symbol, unknown> & {
|
|
233
|
+
__piCacheOptimizerRouter?: unknown;
|
|
234
|
+
__piCacheOptimizerCacheKey__?: unknown;
|
|
235
|
+
};
|
|
236
|
+
|
|
237
|
+
type ModelRegistryLike = {
|
|
238
|
+
find?(provider: string, modelId: string): PiModel | undefined;
|
|
239
|
+
getAvailable?(): PiModel[];
|
|
240
|
+
getAll?(): PiModel[];
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
type ContextWithOptionalModelRegistry = Pick<ExtensionContext, "sessionManager"> & {
|
|
244
|
+
modelRegistry?: ModelRegistryLike;
|
|
245
|
+
};
|
|
246
|
+
|
|
171
247
|
type CacheStatsState = {
|
|
172
248
|
statsByModel: Record<string, CacheStats>;
|
|
173
249
|
legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
|
|
@@ -636,6 +712,210 @@ function hashSessionId(sessionId: string): string {
|
|
|
636
712
|
return createHash("sha256").update(sessionId).digest("hex").slice(0, 16);
|
|
637
713
|
}
|
|
638
714
|
|
|
715
|
+
function getProtocolGlobal(): ProtocolGlobal {
|
|
716
|
+
return globalThis as ProtocolGlobal;
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
function firstNonEmptyString(...values: unknown[]): string | undefined {
|
|
720
|
+
for (const value of values) {
|
|
721
|
+
if (isNonEmptyString(value)) return value.trim();
|
|
722
|
+
}
|
|
723
|
+
return undefined;
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
function sessionHashFromContext(ctx: Pick<ExtensionContext, "sessionManager">): string | undefined {
|
|
727
|
+
const sessionId = ctx.sessionManager.getSessionId();
|
|
728
|
+
return sessionId ? hashSessionId(sessionId) : undefined;
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
function isPiRouterAdapterV1(value: unknown): value is PiRouterAdapterV1 {
|
|
732
|
+
const record = asRecord(value);
|
|
733
|
+
return !!record && isNonEmptyString(record.virtualProvider) && typeof record.resolveActiveRoute === "function";
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
function isRoutingRegistryV1(value: unknown): value is PiRoutingRegistryV1 {
|
|
737
|
+
const record = asRecord(value);
|
|
738
|
+
return !!record && record.version === 1 && typeof record.registerRouter === "function" && typeof record.getRouter === "function";
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
function createRoutingRegistry(): PiRoutingRegistryV1 {
|
|
742
|
+
const routers = new Map<string, PiRouterAdapterV1>();
|
|
743
|
+
return {
|
|
744
|
+
version: 1,
|
|
745
|
+
registerRouter(adapter: PiRouterAdapterV1): () => void {
|
|
746
|
+
if (!isPiRouterAdapterV1(adapter)) return () => undefined;
|
|
747
|
+
const key = adapter.virtualProvider.trim();
|
|
748
|
+
routers.set(key, adapter);
|
|
749
|
+
return () => {
|
|
750
|
+
if (routers.get(key) === adapter) routers.delete(key);
|
|
751
|
+
};
|
|
752
|
+
},
|
|
753
|
+
getRouter(virtualProvider: string): PiRouterAdapterV1 | undefined {
|
|
754
|
+
return routers.get(virtualProvider);
|
|
755
|
+
},
|
|
756
|
+
};
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
function getRoutingRegistry(): PiRoutingRegistryV1 | undefined {
|
|
760
|
+
const candidate = getProtocolGlobal()[PI_ROUTING_REGISTRY_SYMBOL];
|
|
761
|
+
return isRoutingRegistryV1(candidate) ? candidate : undefined;
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
function ensureRoutingRegistry(): PiRoutingRegistryV1 {
|
|
765
|
+
const existing = getRoutingRegistry();
|
|
766
|
+
if (existing) return existing;
|
|
767
|
+
|
|
768
|
+
const created = createRoutingRegistry();
|
|
769
|
+
getProtocolGlobal()[PI_ROUTING_REGISTRY_SYMBOL] = created;
|
|
770
|
+
return created;
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
function parseRouteStatus(value: unknown): PiRouteSnapshot["status"] | undefined {
|
|
774
|
+
return value === "planned" || value === "trying" || value === "selected" || value === "success" || value === "failed"
|
|
775
|
+
? value
|
|
776
|
+
: undefined;
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
function parseRouteSnapshot(
|
|
780
|
+
value: unknown,
|
|
781
|
+
fallbackVirtualProvider?: string,
|
|
782
|
+
fallbackVirtualModelId?: string,
|
|
783
|
+
): PiRouteSnapshot | undefined {
|
|
784
|
+
const record = asRecord(value);
|
|
785
|
+
if (!record) return undefined;
|
|
786
|
+
|
|
787
|
+
const virtualProvider = firstNonEmptyString(record.virtualProvider, fallbackVirtualProvider);
|
|
788
|
+
const virtualModelId = firstNonEmptyString(record.virtualModelId, record.virtualModel, fallbackVirtualModelId);
|
|
789
|
+
const provider = firstNonEmptyString(record.provider, record.upstreamProvider, record.targetProvider);
|
|
790
|
+
const modelId = firstNonEmptyString(record.modelId, record.upstreamModelId, record.targetModelId, record.responseModel);
|
|
791
|
+
if (!virtualProvider || !virtualModelId || !provider || !modelId) return undefined;
|
|
792
|
+
|
|
793
|
+
const timestamp = getNumber(record.timestamp) ?? Date.now();
|
|
794
|
+
return {
|
|
795
|
+
virtualProvider,
|
|
796
|
+
virtualModelId,
|
|
797
|
+
provider,
|
|
798
|
+
modelId,
|
|
799
|
+
api: firstNonEmptyString(record.api),
|
|
800
|
+
canonicalModelId: firstNonEmptyString(record.canonicalModelId),
|
|
801
|
+
routeLabel: firstNonEmptyString(record.routeLabel, record.label),
|
|
802
|
+
status: parseRouteStatus(record.status),
|
|
803
|
+
sessionIdHash: firstNonEmptyString(record.sessionIdHash),
|
|
804
|
+
requestId: firstNonEmptyString(record.requestId),
|
|
805
|
+
timestamp,
|
|
806
|
+
};
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
function resolveActiveRouteSnapshot(
|
|
810
|
+
model: PiModel | undefined,
|
|
811
|
+
ctx?: Pick<ExtensionContext, "sessionManager">,
|
|
812
|
+
): PiRouteSnapshot | undefined {
|
|
813
|
+
if (!model) return undefined;
|
|
814
|
+
const hint: PiRouteResolveHint | undefined = ctx ? { sessionIdHash: sessionHashFromContext(ctx) } : undefined;
|
|
815
|
+
|
|
816
|
+
const adapter = getRoutingRegistry()?.getRouter(model.provider);
|
|
817
|
+
if (adapter) {
|
|
818
|
+
try {
|
|
819
|
+
const snapshot = parseRouteSnapshot(
|
|
820
|
+
adapter.resolveActiveRoute(model.id, hint),
|
|
821
|
+
model.provider,
|
|
822
|
+
model.id,
|
|
823
|
+
);
|
|
824
|
+
if (snapshot) return snapshot;
|
|
825
|
+
} catch (error) {
|
|
826
|
+
console.warn(`${LOG_PREFIX}: routing registry adapter failed`, error);
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
// Temporary migration shim for the prototype global used by early router PRs.
|
|
831
|
+
// New integrations should use Symbol.for("pi.routing.registry.v1") instead.
|
|
832
|
+
const legacy = getProtocolGlobal().__piCacheOptimizerRouter;
|
|
833
|
+
if (!legacy || !lower(model.provider).includes("router")) return undefined;
|
|
834
|
+
try {
|
|
835
|
+
if (typeof legacy === "function") {
|
|
836
|
+
return parseRouteSnapshot(legacy(model.provider, model.id, hint), model.provider, model.id);
|
|
837
|
+
}
|
|
838
|
+
const legacyRecord = asRecord(legacy);
|
|
839
|
+
const resolver = legacyRecord?.resolveActiveRoute;
|
|
840
|
+
if (typeof resolver === "function") {
|
|
841
|
+
return parseRouteSnapshot(resolver.call(legacy, model.id, hint), model.provider, model.id);
|
|
842
|
+
}
|
|
843
|
+
return parseRouteSnapshot(legacy, model.provider, model.id);
|
|
844
|
+
} catch (error) {
|
|
845
|
+
console.warn(`${LOG_PREFIX}: legacy routing global failed`, error);
|
|
846
|
+
return undefined;
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
function routeSnapshotToPiModel(snapshot: PiRouteSnapshot, fallback?: PiModel): PiModel {
|
|
851
|
+
return {
|
|
852
|
+
...(fallback ?? {}),
|
|
853
|
+
id: snapshot.modelId,
|
|
854
|
+
name: snapshot.canonicalModelId ?? snapshot.modelId,
|
|
855
|
+
provider: snapshot.provider,
|
|
856
|
+
api: snapshot.api ?? fallback?.api ?? "",
|
|
857
|
+
baseUrl: fallback?.baseUrl ?? "",
|
|
858
|
+
reasoning: fallback?.reasoning ?? false,
|
|
859
|
+
input: fallback?.input ?? ["text"],
|
|
860
|
+
cost: fallback?.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
861
|
+
contextWindow: fallback?.contextWindow ?? 0,
|
|
862
|
+
maxTokens: fallback?.maxTokens ?? 0,
|
|
863
|
+
compat: fallback?.compat,
|
|
864
|
+
} as PiModel;
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
function findModelInRegistry(registry: ModelRegistryLike | undefined, provider: string, id: string): PiModel | undefined {
|
|
868
|
+
const found = registry?.find?.(provider, id);
|
|
869
|
+
if (found) return found;
|
|
870
|
+
|
|
871
|
+
const available = registry?.getAvailable?.() ?? [];
|
|
872
|
+
const availableMatch = available.find((candidate) => candidate.provider === provider && candidate.id === id);
|
|
873
|
+
if (availableMatch) return availableMatch;
|
|
874
|
+
|
|
875
|
+
const all = registry?.getAll?.() ?? [];
|
|
876
|
+
return all.find((candidate) => candidate.provider === provider && candidate.id === id);
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
function resolveRouteModel(
|
|
880
|
+
model: PiModel | undefined,
|
|
881
|
+
ctx?: ContextWithOptionalModelRegistry,
|
|
882
|
+
): PiModel | undefined {
|
|
883
|
+
const snapshot = resolveActiveRouteSnapshot(model, ctx);
|
|
884
|
+
if (!snapshot) return undefined;
|
|
885
|
+
|
|
886
|
+
return findModelInRegistry(ctx?.modelRegistry, snapshot.provider, snapshot.modelId)
|
|
887
|
+
?? routeSnapshotToPiModel(snapshot, model);
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
function isVirtualRoutingModel(model: PiModel | undefined, ctx?: Pick<ExtensionContext, "sessionManager">): boolean {
|
|
891
|
+
if (!model) return false;
|
|
892
|
+
return isRouterModel(model) || !!getRoutingRegistry()?.getRouter(model.provider) || !!resolveActiveRouteSnapshot(model, ctx);
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
function isCacheHintsServiceV1(value: unknown): value is PiCacheHintsV1 {
|
|
896
|
+
const record = asRecord(value);
|
|
897
|
+
return !!record && record.version === 1 && typeof record.getHints === "function";
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
function getCacheHintsService(): PiCacheHintsV1 | undefined {
|
|
901
|
+
const candidate = getProtocolGlobal()[PI_CACHE_HINTS_SYMBOL];
|
|
902
|
+
return isCacheHintsServiceV1(candidate) ? candidate : undefined;
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
function installCacheHintsService(service: PiCacheHintsV1): () => void {
|
|
906
|
+
const globals = getProtocolGlobal();
|
|
907
|
+
const previous = globals[PI_CACHE_HINTS_SYMBOL];
|
|
908
|
+
globals[PI_CACHE_HINTS_SYMBOL] = service;
|
|
909
|
+
return () => {
|
|
910
|
+
if (globals[PI_CACHE_HINTS_SYMBOL] !== service) return;
|
|
911
|
+
if (previous === undefined) {
|
|
912
|
+
delete globals[PI_CACHE_HINTS_SYMBOL];
|
|
913
|
+
} else {
|
|
914
|
+
globals[PI_CACHE_HINTS_SYMBOL] = previous;
|
|
915
|
+
}
|
|
916
|
+
};
|
|
917
|
+
}
|
|
918
|
+
|
|
639
919
|
/**
|
|
640
920
|
* Build a session-scoped stats key from a session hash + provider/id.
|
|
641
921
|
* Pure function (no closure dependency) for use by tests and internals.
|
|
@@ -1403,10 +1683,10 @@ function modelFromAssistantMessage(message: unknown, fallback: PiModel | undefin
|
|
|
1403
1683
|
const record = getAssistantRecord(message);
|
|
1404
1684
|
if (!record) return fallback;
|
|
1405
1685
|
|
|
1406
|
-
const id =
|
|
1407
|
-
const provider =
|
|
1408
|
-
const api =
|
|
1409
|
-
if (!id || !provider
|
|
1686
|
+
const id = firstNonEmptyString(record.responseModel, record.model, fallback?.id);
|
|
1687
|
+
const provider = firstNonEmptyString(record.provider, fallback?.provider);
|
|
1688
|
+
const api = firstNonEmptyString(record.api, fallback?.api) ?? "";
|
|
1689
|
+
if (!id || !provider) return fallback;
|
|
1410
1690
|
|
|
1411
1691
|
return {
|
|
1412
1692
|
...(fallback ?? {}),
|
|
@@ -2886,7 +3166,10 @@ function selectAdapterForModel(model: PiModel | undefined): CacheProviderAdapter
|
|
|
2886
3166
|
}
|
|
2887
3167
|
|
|
2888
3168
|
function selectAdapterForAssistantMessage(message: unknown, model: PiModel | undefined): CacheProviderAdapter | undefined {
|
|
2889
|
-
|
|
3169
|
+
// Assistant message metadata is request-local and authoritative for virtual
|
|
3170
|
+
// routing providers. Use it first for every model; direct providers normally
|
|
3171
|
+
// echo the same provider/model and therefore remain unchanged.
|
|
3172
|
+
const responseModel = modelFromAssistantMessage(message, model);
|
|
2890
3173
|
return CACHE_PROVIDER_ADAPTERS.find((adapter) => adapter.matchesAssistantMessage(message, responseModel));
|
|
2891
3174
|
}
|
|
2892
3175
|
|
|
@@ -3161,7 +3444,7 @@ function buildExactRouterStatusEntry(
|
|
|
3161
3444
|
sessionHash: string | undefined,
|
|
3162
3445
|
statsByModel: Record<string, CacheStats>,
|
|
3163
3446
|
lastRoutedModel: PersistedRoutedModelRef | undefined,
|
|
3164
|
-
): { adapter: CacheProviderAdapter; stats: CacheStats } | undefined {
|
|
3447
|
+
): { model: PiModel; adapter: CacheProviderAdapter; stats: CacheStats } | undefined {
|
|
3165
3448
|
if (!sessionHash || !lastRoutedModel) return undefined;
|
|
3166
3449
|
|
|
3167
3450
|
const model = routedModelRefToPiModel(lastRoutedModel);
|
|
@@ -3169,7 +3452,7 @@ function buildExactRouterStatusEntry(
|
|
|
3169
3452
|
if (!adapter) return undefined;
|
|
3170
3453
|
|
|
3171
3454
|
const key = makeSessionModelKey(sessionHash, lastRoutedModel.provider, lastRoutedModel.id);
|
|
3172
|
-
return { adapter, stats: statsByModel[key] ?? emptyCacheStats() };
|
|
3455
|
+
return { model, adapter, stats: statsByModel[key] ?? emptyCacheStats() };
|
|
3173
3456
|
}
|
|
3174
3457
|
|
|
3175
3458
|
function parsePersistedCacheStats(value: unknown): CacheStatsState | undefined {
|
|
@@ -5062,6 +5345,18 @@ export const __internals_for_tests = {
|
|
|
5062
5345
|
parsePersistedRoutedModelRef,
|
|
5063
5346
|
routedModelRefToPiModel,
|
|
5064
5347
|
buildExactRouterStatusEntry,
|
|
5348
|
+
// Routing-provider protocol helpers
|
|
5349
|
+
PI_ROUTING_REGISTRY_SYMBOL,
|
|
5350
|
+
PI_CACHE_HINTS_SYMBOL,
|
|
5351
|
+
ensureRoutingRegistry,
|
|
5352
|
+
getRoutingRegistry,
|
|
5353
|
+
parseRouteSnapshot,
|
|
5354
|
+
resolveActiveRouteSnapshot,
|
|
5355
|
+
routeSnapshotToPiModel,
|
|
5356
|
+
resolveRouteModel,
|
|
5357
|
+
isVirtualRoutingModel,
|
|
5358
|
+
installCacheHintsService,
|
|
5359
|
+
getCacheHintsService,
|
|
5065
5360
|
// Persistence helpers (for reload/reset tests)
|
|
5066
5361
|
mergeCacheSessions,
|
|
5067
5362
|
mergeLastRoutedModels,
|
|
@@ -5109,6 +5404,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
5109
5404
|
let currentSessionHash = "";
|
|
5110
5405
|
let currentSessionHashSet = false;
|
|
5111
5406
|
let lastActualRoutedModel: PersistedRoutedModelRef | undefined;
|
|
5407
|
+
let latestCacheHint: PiCacheHintSnapshot | undefined;
|
|
5112
5408
|
const PERSIST_DEBOUNCE_MS = 2000;
|
|
5113
5409
|
/** In-memory recent usage samples per model key (not persisted, cleared on reload). */
|
|
5114
5410
|
const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
|
|
@@ -5123,6 +5419,28 @@ export default function (pi: ExtensionAPI) {
|
|
|
5123
5419
|
}
|
|
5124
5420
|
}
|
|
5125
5421
|
|
|
5422
|
+
const uninstallCacheHintsService = installCacheHintsService({
|
|
5423
|
+
version: 1,
|
|
5424
|
+
getHints(input: PiCacheHintsInput): PiCacheHintsOutput | undefined {
|
|
5425
|
+
if (!runtimeOptimizerEnabled || isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV])) return undefined;
|
|
5426
|
+
const hint = latestCacheHint;
|
|
5427
|
+
if (!hint) return undefined;
|
|
5428
|
+
if (input.sessionIdHash && hint.sessionIdHash && input.sessionIdHash !== hint.sessionIdHash) return undefined;
|
|
5429
|
+
if (input.virtualProvider && hint.virtualProvider && input.virtualProvider !== hint.virtualProvider) return undefined;
|
|
5430
|
+
if (input.virtualModelId && hint.virtualModelId && input.virtualModelId !== hint.virtualModelId) return undefined;
|
|
5431
|
+
if (input.upstreamProvider && hint.upstreamProvider && input.upstreamProvider !== hint.upstreamProvider) return undefined;
|
|
5432
|
+
if (input.upstreamModelId && hint.upstreamModelId && input.upstreamModelId !== hint.upstreamModelId) return undefined;
|
|
5433
|
+
if (input.api && hint.api && input.api !== hint.api) return undefined;
|
|
5434
|
+
|
|
5435
|
+
return {
|
|
5436
|
+
systemPrompt: hint.systemPrompt,
|
|
5437
|
+
promptCacheKey: hint.promptCacheKey,
|
|
5438
|
+
cacheRetention: hint.cacheRetention,
|
|
5439
|
+
};
|
|
5440
|
+
},
|
|
5441
|
+
});
|
|
5442
|
+
void uninstallCacheHintsService;
|
|
5443
|
+
|
|
5126
5444
|
/**
|
|
5127
5445
|
* Build a session-scoped stats key from the current session hash + model key.
|
|
5128
5446
|
* Returns `${sessionHash}:${provider}/${id}`.
|
|
@@ -5206,6 +5524,13 @@ export default function (pi: ExtensionAPI) {
|
|
|
5206
5524
|
return created;
|
|
5207
5525
|
}
|
|
5208
5526
|
|
|
5527
|
+
function resetStatsForModel(model: PiModel): void {
|
|
5528
|
+
const sk = sessionModelKey(model);
|
|
5529
|
+
delete cacheStatsByModel[sk];
|
|
5530
|
+
recentSamplesByModelKey.delete(sk);
|
|
5531
|
+
lastStatusText = undefined;
|
|
5532
|
+
}
|
|
5533
|
+
|
|
5209
5534
|
function resetCurrentSessionStats(): void {
|
|
5210
5535
|
const prefix = `${currentSessionHash || "_nosession"}:`;
|
|
5211
5536
|
for (const key of Object.keys(cacheStatsByModel)) {
|
|
@@ -5214,6 +5539,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
5214
5539
|
for (const key of Array.from(recentSamplesByModelKey.keys())) {
|
|
5215
5540
|
if (key.startsWith(prefix)) recentSamplesByModelKey.delete(key);
|
|
5216
5541
|
}
|
|
5542
|
+
lastActualRoutedModel = undefined;
|
|
5217
5543
|
lastStatusText = undefined;
|
|
5218
5544
|
}
|
|
5219
5545
|
|
|
@@ -5380,9 +5706,13 @@ export default function (pi: ExtensionAPI) {
|
|
|
5380
5706
|
syncSessionHash(ctx);
|
|
5381
5707
|
await rollOverStatsIfNeeded(ctx);
|
|
5382
5708
|
|
|
5383
|
-
const
|
|
5709
|
+
const routedModel = resolveRouteModel(model, ctx);
|
|
5710
|
+
const displayModel = routedModel ?? model;
|
|
5711
|
+
const adapter = selectAdapterForModel(displayModel);
|
|
5712
|
+
const activeIsVirtualRoute = !!routedModel || isVirtualRoutingModel(model, ctx);
|
|
5384
5713
|
let statusText: string | undefined;
|
|
5385
|
-
|
|
5714
|
+
|
|
5715
|
+
if (!adapter && !routedModel && activeIsVirtualRoute) {
|
|
5386
5716
|
// On model_select (existing footer), keep the existing cache footer
|
|
5387
5717
|
// visible instead of clearing it. On session_start (no footer yet
|
|
5388
5718
|
// after reload/fresh start), restore the exact last actual routed model
|
|
@@ -5405,8 +5735,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
5405
5735
|
if (adapter) {
|
|
5406
5736
|
// Display session-scoped stats. A model that has never been used
|
|
5407
5737
|
// in this session shows 0/0. The message_end hook populates
|
|
5408
|
-
// cacheStatsByModel[sessionModelKey(
|
|
5409
|
-
const sk =
|
|
5738
|
+
// cacheStatsByModel[sessionModelKey(displayModel)] on first use.
|
|
5739
|
+
const sk = displayModel ? sessionModelKey(displayModel) : undefined;
|
|
5410
5740
|
const stats = sk ? cacheStatsByModel[sk] : undefined;
|
|
5411
5741
|
const statsText = formatCacheStats(adapter, stats ?? emptyCacheStats());
|
|
5412
5742
|
statusText = runtimeOptimizerEnabled ? statsText : `Cache Optimizer disabled · ${statsText}`;
|
|
@@ -5443,12 +5773,12 @@ export default function (pi: ExtensionAPI) {
|
|
|
5443
5773
|
// Re-evaluated on every status update so the marker persists through stats
|
|
5444
5774
|
// changes and day rollovers. Redundant setStatus calls are blocked by the
|
|
5445
5775
|
// `lastStatusText` early return above.
|
|
5446
|
-
if (runtimeOptimizerEnabled && statusText !== undefined &&
|
|
5776
|
+
if (runtimeOptimizerEnabled && statusText !== undefined && displayModel) {
|
|
5447
5777
|
// Only show ⚠️ compat when there are safe-fixable missing compat keys.
|
|
5448
5778
|
// Optional/advisory-only flags (e.g. supportsLongCacheRetention on generic
|
|
5449
5779
|
// OpenAI-compatible proxies) do NOT trigger the marker — the doctor/compat
|
|
5450
5780
|
// commands still mention them as optional guidance.
|
|
5451
|
-
if (buildFixSuggestion(
|
|
5781
|
+
if (buildFixSuggestion(displayModel) !== undefined) {
|
|
5452
5782
|
statusText = statusText + " ⚠️ compat";
|
|
5453
5783
|
}
|
|
5454
5784
|
}
|
|
@@ -5459,18 +5789,26 @@ export default function (pi: ExtensionAPI) {
|
|
|
5459
5789
|
ctx.ui.setStatus(STATUS_KEY, statusText);
|
|
5460
5790
|
}
|
|
5461
5791
|
|
|
5792
|
+
ensureRoutingRegistry();
|
|
5793
|
+
|
|
5462
5794
|
pi.on("session_start", async (event, ctx) => {
|
|
5463
5795
|
await restoreCacheStats(event.reason, ctx);
|
|
5464
|
-
if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(ctx.model, ctx, warnedModels);
|
|
5796
|
+
if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(resolveRouteModel(ctx.model, ctx) ?? ctx.model, ctx, warnedModels);
|
|
5465
5797
|
await publishStatus(ctx);
|
|
5466
5798
|
});
|
|
5467
5799
|
|
|
5468
5800
|
pi.on("model_select", async (event, ctx) => {
|
|
5469
|
-
if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(event.model, ctx, warnedModels);
|
|
5801
|
+
if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(resolveRouteModel(event.model, ctx) ?? event.model, ctx, warnedModels);
|
|
5470
5802
|
await publishStatus(ctx, event.model);
|
|
5471
5803
|
});
|
|
5472
5804
|
|
|
5473
5805
|
pi.on("before_agent_start", async (event, _ctx) => {
|
|
5806
|
+
latestCacheHint = undefined;
|
|
5807
|
+
const routeSnapshot = resolveActiveRouteSnapshot(_ctx.model, _ctx);
|
|
5808
|
+
const routedModel = routeSnapshot
|
|
5809
|
+
? findModelInRegistry(_ctx.modelRegistry, routeSnapshot.provider, routeSnapshot.modelId) ?? routeSnapshotToPiModel(routeSnapshot, _ctx.model)
|
|
5810
|
+
: undefined;
|
|
5811
|
+
|
|
5474
5812
|
// ────────────────────────────────────────────────────────────────
|
|
5475
5813
|
// OpenAI Responses-family bypass (codex-responses + responses + azure responses)
|
|
5476
5814
|
//
|
|
@@ -5497,7 +5835,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
5497
5835
|
// compression, reorder) for these APIs. Third-party providers
|
|
5498
5836
|
// that use openai-completions are unaffected.
|
|
5499
5837
|
// ────────────────────────────────────────────────────────────────
|
|
5500
|
-
const model = _ctx.model;
|
|
5838
|
+
const model = routedModel ?? _ctx.model;
|
|
5501
5839
|
if (model && isResponsesPromptRewriteBypassApi(model.api)) {
|
|
5502
5840
|
return {};
|
|
5503
5841
|
}
|
|
@@ -5535,7 +5873,27 @@ export default function (pi: ExtensionAPI) {
|
|
|
5535
5873
|
// ships to the provider.
|
|
5536
5874
|
const optimized = optimizeSystemPrompt(compressedPrompt, event.systemPromptOptions);
|
|
5537
5875
|
|
|
5876
|
+
const promptCacheKey = getSessionPromptCacheKey(_ctx);
|
|
5877
|
+
const cacheRetention = process.env[PI_CACHE_RETENTION_ENV] === LONG_CACHE_RETENTION_VALUE ? LONG_CACHE_RETENTION_VALUE : undefined;
|
|
5878
|
+
const publishHint = (systemPrompt: string): void => {
|
|
5879
|
+
latestCacheHint = {
|
|
5880
|
+
sessionIdHash: currentSessionHashSet ? currentSessionHash : sessionHashFromContext(_ctx),
|
|
5881
|
+
virtualProvider: routeSnapshot?.virtualProvider ?? _ctx.model?.provider,
|
|
5882
|
+
virtualModelId: routeSnapshot?.virtualModelId ?? _ctx.model?.id,
|
|
5883
|
+
upstreamProvider: routeSnapshot?.provider ?? model?.provider,
|
|
5884
|
+
upstreamModelId: routeSnapshot?.modelId ?? model?.id,
|
|
5885
|
+
api: model?.api,
|
|
5886
|
+
systemPrompt,
|
|
5887
|
+
promptCacheKey,
|
|
5888
|
+
cacheRetention,
|
|
5889
|
+
timestamp: Date.now(),
|
|
5890
|
+
};
|
|
5891
|
+
const globals = getProtocolGlobal();
|
|
5892
|
+
globals.__piCacheOptimizerCacheKey__ = promptCacheKey;
|
|
5893
|
+
};
|
|
5894
|
+
|
|
5538
5895
|
if (optimized.changed && optimized.systemPrompt.trim().length > 0) {
|
|
5896
|
+
publishHint(optimized.systemPrompt);
|
|
5539
5897
|
return { systemPrompt: optimized.systemPrompt };
|
|
5540
5898
|
}
|
|
5541
5899
|
|
|
@@ -5544,24 +5902,28 @@ export default function (pi: ExtensionAPI) {
|
|
|
5544
5902
|
// the volume cut even when reorder is a no-op (e.g., short sessions
|
|
5545
5903
|
// where no stable candidate is long enough).
|
|
5546
5904
|
if (compressedPrompt !== strippedPrompt && compressedPrompt.trim().length > 0) {
|
|
5905
|
+
publishHint(compressedPrompt);
|
|
5547
5906
|
return { systemPrompt: compressedPrompt };
|
|
5548
5907
|
}
|
|
5549
5908
|
if (strippedPrompt !== event.systemPrompt && strippedPrompt.trim().length > 0) {
|
|
5909
|
+
publishHint(strippedPrompt);
|
|
5550
5910
|
return { systemPrompt: strippedPrompt };
|
|
5551
5911
|
}
|
|
5552
5912
|
|
|
5913
|
+
publishHint(event.systemPrompt);
|
|
5553
5914
|
return {};
|
|
5554
5915
|
});
|
|
5555
5916
|
|
|
5556
5917
|
pi.on("before_provider_request", (event, ctx) => {
|
|
5557
5918
|
if (!shouldInjectOpenAIPromptCacheKey()) return undefined;
|
|
5558
|
-
|
|
5919
|
+
const requestModel = resolveRouteModel(ctx.model, ctx) ?? ctx.model;
|
|
5920
|
+
if (!isOpenAICompatibleApi(requestModel?.api)) return undefined;
|
|
5559
5921
|
|
|
5560
5922
|
return addOpenAIPromptCacheKey(event.payload, getSessionPromptCacheKey(ctx));
|
|
5561
5923
|
});
|
|
5562
5924
|
|
|
5563
5925
|
pi.on("after_provider_response", (event, ctx) => {
|
|
5564
|
-
const model = ctx.model;
|
|
5926
|
+
const model = resolveRouteModel(ctx.model, ctx) ?? ctx.model;
|
|
5565
5927
|
if (!runtimeOptimizerEnabled || !model) return;
|
|
5566
5928
|
if (event.status !== 400) return;
|
|
5567
5929
|
if (!isPromptCacheRetention400Applicable(model)) return;
|
|
@@ -5586,9 +5948,12 @@ export default function (pi: ExtensionAPI) {
|
|
|
5586
5948
|
|
|
5587
5949
|
const usage = adapter.normalizeUsage(event.message);
|
|
5588
5950
|
|
|
5589
|
-
|
|
5951
|
+
// Completed message metadata is request-local and authoritative for virtual
|
|
5952
|
+
// routing providers. Use it whenever it supplies provider/model identity;
|
|
5953
|
+
// fall back to the active context model for direct providers.
|
|
5954
|
+
const statsModel = modelFromAssistantMessage(event.message, ctx.model) ?? ctx.model;
|
|
5590
5955
|
let routedModelChanged = false;
|
|
5591
|
-
if (
|
|
5956
|
+
if (isVirtualRoutingModel(ctx.model, ctx) && statsModel && !isVirtualRoutingModel(statsModel, ctx)) {
|
|
5592
5957
|
const nextRoutedModel: PersistedRoutedModelRef = {
|
|
5593
5958
|
provider: statsModel.provider,
|
|
5594
5959
|
id: statsModel.id,
|
|
@@ -5651,7 +6016,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
5651
6016
|
description: "Diagnose Pi cache configuration",
|
|
5652
6017
|
handler: async (args: string, cmdCtx) => {
|
|
5653
6018
|
syncSessionHash(cmdCtx);
|
|
5654
|
-
const
|
|
6019
|
+
const selectedModel = cmdCtx.model;
|
|
6020
|
+
const model = resolveRouteModel(selectedModel, cmdCtx as unknown as ExtensionContext) ?? selectedModel;
|
|
5655
6021
|
const subcommand = args.trim().toLowerCase().split(/\s+/)[0] || "help";
|
|
5656
6022
|
|
|
5657
6023
|
if (subcommand === "enable") {
|
|
@@ -5719,14 +6085,12 @@ export default function (pi: ExtensionAPI) {
|
|
|
5719
6085
|
return;
|
|
5720
6086
|
}
|
|
5721
6087
|
|
|
5722
|
-
const sk = sessionModelKey(model);
|
|
5723
6088
|
const displayKey = modelKey(model);
|
|
5724
6089
|
|
|
5725
|
-
// Reset session-scoped stats for the active model.
|
|
5726
|
-
|
|
5727
|
-
|
|
5728
|
-
|
|
5729
|
-
recentSamplesByModelKey.delete(sk);
|
|
6090
|
+
// Reset session-scoped stats for the effective active model. If the
|
|
6091
|
+
// selected model is a virtual router and the protocol exposes a live
|
|
6092
|
+
// route, this clears the real upstream bucket, not the router shell.
|
|
6093
|
+
resetStatsForModel(model);
|
|
5730
6094
|
|
|
5731
6095
|
// Persist immediately.
|
|
5732
6096
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
@@ -6069,10 +6433,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
6069
6433
|
if (!adapter) {
|
|
6070
6434
|
cmdCtx.ui.notify("ℹ️ Active model does not match a cache adapter. No stats to reset.", "info");
|
|
6071
6435
|
} else {
|
|
6072
|
-
const sk = sessionModelKey(model);
|
|
6073
6436
|
const displayKey = modelKey(model);
|
|
6074
|
-
|
|
6075
|
-
recentSamplesByModelKey.delete(sk);
|
|
6437
|
+
resetStatsForModel(model);
|
|
6076
6438
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6077
6439
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6078
6440
|
cmdCtx.ui.notify(
|
package/package.json
CHANGED