pi-cache-optimizer 2.4.8 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -8
- package/README.zh-CN.md +41 -66
- package/index.ts +694 -29
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -254,11 +254,30 @@ Stats rules:
|
|
|
254
254
|
- Stats are persisted in a small local JSON state file at `~/.pi/agent/pi-cache-optimizer-stats.json`. Earlier 1.x releases used `~/.pi/agent/deepseek-cache-optimizer-stats.json`; on first run after upgrade the old file is read once, copied into the new path, and best-effort deleted. The file stores only counters and the local day; it does not store API keys, prompts, messages, headers, or model output.
|
|
255
255
|
- Existing v1 state files from DeepSeek-only releases are migrated into the DeepSeek adapter counters automatically.
|
|
256
256
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
-
|
|
260
|
-
-
|
|
261
|
-
|
|
257
|
+
Session scope:
|
|
258
|
+
|
|
259
|
+
- Stats are now scoped per Pi session + provider/model, not global.
|
|
260
|
+
- Each Pi process (session) starts with fresh counters. Different sessions using the
|
|
261
|
+
same provider/model do not share footer statistics or reset effects.
|
|
262
|
+
- Within the same Pi session, stats accumulate normally for each provider/model.
|
|
263
|
+
- Pi restarts start fresh stats for the new session.
|
|
264
|
+
- `/reload` does **not** clear accumulated session-scoped stats; it only clears transient
|
|
265
|
+
in-memory data (recent samples, integrity notification state).
|
|
266
|
+
- Crossing the local natural-day boundary resets counters on the next status update or
|
|
267
|
+
supported-provider response.
|
|
268
|
+
- Persisted stats are stored under an opaque session hash key (SHA-256 hash of session id)
|
|
269
|
+
so that different sessions' data is isolated on disk. Raw session ids are never logged,
|
|
270
|
+
displayed, or written to the stats file.
|
|
271
|
+
|
|
272
|
+
> **Concurrent-write caveat**: Stats are persisted atomically (write-temp then rename),
|
|
273
|
+
> but multiple Pi processes reading and writing simultaneously can still experience
|
|
274
|
+
> a lost-update window (the classic read-modify-write race). The implementation
|
|
275
|
+
> preserves sequential operation semantics (each write replaces only the current
|
|
276
|
+
> session's data and re-appends other sessions from the previous read), but does
|
|
277
|
+
> **not** guarantee concurrent-safety across processes. If you run multiple Pi
|
|
278
|
+
> instances using the same `models.json` with different provider/model IDs, their
|
|
279
|
+
> stats files may occasionally overwrite each other's session data. This affects
|
|
280
|
+
> only the on-disk persistence; in-memory counters per process remain correct.
|
|
262
281
|
|
|
263
282
|
## Suggested compat config
|
|
264
283
|
|
|
@@ -304,14 +323,38 @@ The extension registers a Pi command `/cache-optimizer` for interactive diagnosi
|
|
|
304
323
|
```
|
|
305
324
|
/cache-optimizer — interactive menu (or text help when no UI)
|
|
306
325
|
/cache-optimizer doctor — show provider, model, API, base URL, compat status
|
|
326
|
+
and low-hit cause diagnosis
|
|
327
|
+
/cache-optimizer stats — show active model stats bucket and recent trend
|
|
307
328
|
/cache-optimizer compat — show compat suggestion with edit instructions
|
|
329
|
+
/cache-optimizer reset — reset local session stats for the current model
|
|
330
|
+
(does not affect upstream provider prompt cache)
|
|
308
331
|
```
|
|
309
332
|
|
|
310
333
|
When run without arguments, `/cache-optimizer` shows an interactive selection menu
|
|
311
|
-
(Doctor / Compat / Cancel) when the Pi UI supports it (`ctx.ui.select`).
|
|
312
|
-
non-interactive terminals, it falls back to text help with current model compat
|
|
334
|
+
(Doctor / Stats / Compat / Reset / Cancel) when the Pi UI supports it (`ctx.ui.select`).
|
|
335
|
+
In non-interactive terminals, it falls back to text help with current model compat
|
|
313
336
|
status.
|
|
314
337
|
|
|
338
|
+
### `/cache-optimizer reset`
|
|
339
|
+
|
|
340
|
+
Resets only the current Pi session's stats bucket for the active provider/model.
|
|
341
|
+
Clears today's request counters (hit/total), cached token counts, and recent trend
|
|
342
|
+
samples for that model. Other provider/model buckets within the same session are
|
|
343
|
+
unaffected, and other sessions' data is preserved.
|
|
344
|
+
|
|
345
|
+
```text
|
|
346
|
+
Provider: otokapi
|
|
347
|
+
Model: gpt-5.5
|
|
348
|
+
|
|
349
|
+
✅ Reset local session cache stats for "otokapi/gpt-5.5".
|
|
350
|
+
Upstream provider prompt cache was not modified.
|
|
351
|
+
New requests will start a fresh stats bucket for this Pi session.
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
If no active model is selected, a warning is shown. If the active model does not
|
|
355
|
+
match a cache adapter (footer stats are not shown for it), a friendly no-op message
|
|
356
|
+
is displayed instead.
|
|
357
|
+
|
|
315
358
|
### `/cache-optimizer doctor`
|
|
316
359
|
|
|
317
360
|
Displays the active model's provider, model id, name, API type, base URL, current
|
|
@@ -345,7 +388,52 @@ it shows `✅ Compat fully configured.` if the model is an applicable
|
|
|
345
388
|
third-party proxy, or `ℹ️ Compat check not applicable for this model.`
|
|
346
389
|
otherwise.
|
|
347
390
|
|
|
348
|
-
|
|
391
|
+
### `/cache-optimizer stats`
|
|
392
|
+
|
|
393
|
+
Displays the active model's stats bucket (`provider/modelId`), today's request
|
|
394
|
+
count (hit/total), cached input tokens vs total input tokens, and the hit rate
|
|
395
|
+
percentage. Also shows recent trend summaries (last 10 and last 30 samples):
|
|
396
|
+
|
|
397
|
+
```text
|
|
398
|
+
Model key: otokapi/gpt-5.5
|
|
399
|
+
Adapter: OpenAI cache
|
|
400
|
+
|
|
401
|
+
── Today ──
|
|
402
|
+
Requests: 3 hit / 10 total · 30%
|
|
403
|
+
Cached tokens: 0.0015M / 0.005M input · 30%
|
|
404
|
+
|
|
405
|
+
── Recent trend ──
|
|
406
|
+
Recent 10/10: 3/10 hits · 30% tok cached
|
|
407
|
+
Recent 10/10: 3/10 hits · 30% tok cached
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
If the active model has no adapter match, a friendly message is shown. If
|
|
411
|
+
no samples have been recorded yet in this session, trend shows "no samples".
|
|
412
|
+
|
|
413
|
+
### Low-hit cause diagnosis
|
|
414
|
+
|
|
415
|
+
The `/cache-optimizer doctor` output includes a "Cache diagnosis" section
|
|
416
|
+
with prioritized low-hit cause analysis:
|
|
417
|
+
|
|
418
|
+
1. **Missing compat flags** — flags that enable prompt caching and session-affinity
|
|
419
|
+
routing are absent.
|
|
420
|
+
2. **Router/channel risk** — multi-backend routing may split the cache across
|
|
421
|
+
different upstream instances.
|
|
422
|
+
3. **Missing usage fields** — the proxy may not return prompt-level usage
|
|
423
|
+
fields, causing the footer to under-report hits.
|
|
424
|
+
4. **Recent low trend** — when today's cache hit rate is below 30%,
|
|
425
|
+
the diagnosis suggests proxy route instability or prompt prefix churn.
|
|
426
|
+
|
|
427
|
+
For fully configured models that still have low cache hit rates, the diagnosis
|
|
428
|
+
emphasizes sticky routing and upstream cache usage verification rather than
|
|
429
|
+
pointing to compat flags.
|
|
430
|
+
|
|
431
|
+
### Router/channel diagnostics
|
|
432
|
+
|
|
433
|
+
For models using OpenAI-compatible APIs (`openai-completions` or
|
|
434
|
+
`openai-responses`) through a non-official base URL, the extension detects
|
|
435
|
+
common router/channel proxy patterns from `provider`, `baseUrl`, and `compat`
|
|
436
|
+
metadata:
|
|
349
437
|
Vercel AI Gateway, LiteLLM, OneAPI/NewAPI/VoAPI, or a generic third-party
|
|
350
438
|
OpenAI-compatible proxy), both `doctor` and `compat` subcommands append
|
|
351
439
|
router/channel diagnostics with targeted recommendations.
|
package/README.zh-CN.md
CHANGED
|
@@ -249,11 +249,48 @@ Gemini cache 1/2 · 0.18M/0.50M tok (36%)
|
|
|
249
249
|
- 统计会持久化到本地小 JSON 文件:`~/.pi/agent/pi-cache-optimizer-stats.json`。早期 1.x 版本使用 `~/.pi/agent/deepseek-cache-optimizer-stats.json`;首次运行新版时会从旧路径读一次、复制到新路径、然后 best-effort 删除旧文件。该文件只保存计数器和本地日期,不保存 API key、prompt、消息内容、headers 或模型输出。
|
|
250
250
|
- DeepSeek-only 旧版本的 v1 状态文件会自动迁移到 DeepSeek adapter 计数器。
|
|
251
251
|
|
|
252
|
-
|
|
252
|
+
## 统计桶隔离(Session-scoped)
|
|
253
253
|
|
|
254
|
-
- Pi
|
|
255
|
-
-
|
|
256
|
-
-
|
|
254
|
+
- 统计现在按 Pi session + provider/model 隔离,不再全局聚合。
|
|
255
|
+
- 每个 Pi 进程(session)从零开始计数。不同 session 对同一 provider/model 的统计不共享。
|
|
256
|
+
- 同一 Pi session 中,同一 provider/model 的统计正常累积。
|
|
257
|
+
- Pi 进程重新启动时,新的 session 从头开始统计。
|
|
258
|
+
- `/reload` **不会**清空累计的 session-scoped 统计;只清除临时内存状态(recent samples、integrity notification)。
|
|
259
|
+
- 跨过本地自然日时,下一次状态更新或受支持 provider 响应时会自动按本地日期清零。
|
|
260
|
+
- 持久化统计文件使用不透明的 session hash key(SHA-256 哈希后的 session id)来隔离不同 session 的数据。原始 session id 不会写入文件。
|
|
261
|
+
|
|
262
|
+
> **并发写入说明**:统计以原子方式持久化(写 temp 文件再 rename),但多个 Pi 进程同时读写仍然存在 lost-update 窗口(经典的 read-modify-write 竞态)。实现中尽可能保留顺序语义(每次写入只替换当前 session 的数据,其他 session 的数据从前次读取追加),但**不保证**跨进程并发安全。如果同时运行多个 Pi 实例使用不同 provider/model,统计文件偶尔可能覆盖彼此 session 的数据。这只影响磁盘持久化;每个进程的内存统计始终正确。
|
|
263
|
+
|
|
264
|
+
## 诊断命令
|
|
265
|
+
|
|
266
|
+
扩展注册了 Pi 命令 `/cache-optimizer` 用于交互式诊断。
|
|
267
|
+
|
|
268
|
+
```
|
|
269
|
+
/cache-optimizer — 交互菜单(无 UI 时显示文字帮助)
|
|
270
|
+
/cache-optimizer doctor — 显示 provider、model、API、base URL、compat 状态
|
|
271
|
+
及低命中原因诊断
|
|
272
|
+
/cache-optimizer stats — 显示当前模型的 stats 桶和近期趋势
|
|
273
|
+
/cache-optimizer compat — 显示 compat 建议和编辑说明
|
|
274
|
+
/cache-optimizer reset — 重置当前 Pi session 中当前模型的本地统计
|
|
275
|
+
(不影响上游 provider prompt cache)
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
不带参数时,当 Pi UI 支持时(`ctx.ui.select` 可用),`/cache-optimizer` 会显示交互选择菜单(Doctor / Stats / Compat / Reset / Cancel)。在非交互终端中,会回退到文字帮助和当前模型 compat 状态。
|
|
279
|
+
|
|
280
|
+
### `/cache-optimizer reset`
|
|
281
|
+
|
|
282
|
+
仅重置当前 Pi session 中活跃 provider/model 的统计桶。清除今日请求计数(命中/总数)、缓存 token 计数和近期趋势样本。同一 session 中其他 provider/model 的桶不受影响,其他 session 的数据也不受影响。
|
|
283
|
+
|
|
284
|
+
```text
|
|
285
|
+
Provider: otokapi
|
|
286
|
+
Model: gpt-5.5
|
|
287
|
+
|
|
288
|
+
✅ 已重置 "otokapi/gpt-5.5" 的本地 session 缓存统计。
|
|
289
|
+
上游 provider prompt cache 未被修改。
|
|
290
|
+
新的请求将为这个 Pi session 重新开始统计。
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
如果没有选中的模型,显示警告。如果当前模型不匹配缓存的 adapter(不显示底部统计),则显示友好的无操作提示。
|
|
257
294
|
|
|
258
295
|
## 建议的 compat 配置
|
|
259
296
|
|
|
@@ -291,68 +328,6 @@ Gemini cache 1/2 · 0.18M/0.50M tok (36%)
|
|
|
291
328
|
|
|
292
329
|
> 提醒:只有在 endpoint 或代理明确支持时,才建议启用 session-affinity headers 或 cache-control compat。
|
|
293
330
|
|
|
294
|
-
## 诊断命令
|
|
295
|
-
|
|
296
|
-
扩展注册了 Pi 命令 `/cache-optimizer` 用于交互式诊断。
|
|
297
|
-
|
|
298
|
-
```
|
|
299
|
-
/cache-optimizer — 交互菜单(无 UI 时显示文字帮助)
|
|
300
|
-
/cache-optimizer doctor — 显示 provider、model、API、base URL、compat 状态
|
|
301
|
-
/cache-optimizer compat — 显示 compat 建议和编辑说明
|
|
302
|
-
```
|
|
303
|
-
|
|
304
|
-
不带参数时,当 Pi UI 支持时(`ctx.ui.select` 可用),`/cache-optimizer` 会显示交互选择菜单(Doctor / Compat / Cancel)。在非交互终端中,会回退到文字帮助和当前模型 compat 状态。
|
|
305
|
-
|
|
306
|
-
### `/cache-optimizer doctor`
|
|
307
|
-
|
|
308
|
-
显示当前模型的 provider、model id、名称、API 类型、base URL、当前 `compat` 标志以及缺少的缓存/session-affinity 标志。如果缺少标志,还会显示可复制的 JSON 片段和精确编辑位置。
|
|
309
|
-
|
|
310
|
-
如果所有 compat 标志都已配置且适用(第三方 `openai-completions` 代理),输出显示 `✅ Compat fully configured.`。对于不适用 compat 检查的模型(官方 OpenAI、非 `openai-completions` API、custom transport),显示 `ℹ️ Compat check not applicable for this model.`:
|
|
311
|
-
|
|
312
|
-
```text
|
|
313
|
-
Provider: otokapi
|
|
314
|
-
Model: gpt-5.5
|
|
315
|
-
API: openai-completions
|
|
316
|
-
Base URL: https://otokapi.example.com/v1
|
|
317
|
-
Compat: {}
|
|
318
|
-
⚠️ Missing compat flags: supportsLongCacheRetention, sendSessionAffinityHeaders
|
|
319
|
-
Edit ~/.pi/agent/models.json -> providers["otokapi"] -> compat (same level as baseUrl/api/apiKey/models):
|
|
320
|
-
{
|
|
321
|
-
"supportsLongCacheRetention": true,
|
|
322
|
-
"sendSessionAffinityHeaders": true
|
|
323
|
-
}
|
|
324
|
-
```
|
|
325
|
-
|
|
326
|
-
### `/cache-optimizer compat`
|
|
327
|
-
|
|
328
|
-
显示当前模型的 compat 建议,包括文件路径、provider 路径和可复制 JSON 片段。当没有缺失的 compat 标志时,如果模型是适用的第三方代理则显示 `✅ Compat fully configured.`,否则显示 `ℹ️ Compat check not applicable for this model.`。
|
|
329
|
-
|
|
330
|
-
当模型通过已知的路由器/通道代理(OpenRouter、Vercel AI Gateway、LiteLLM、OneAPI/NewAPI/VoAPI 或通用第三方 OpenAI-compatible 代理)时,`doctor` 和 `compat` 子命令都会附加路由/通道诊断信息和建议。
|
|
331
|
-
|
|
332
|
-
### 路由/通道诊断
|
|
333
|
-
|
|
334
|
-
对于通过非官方 base URL 使用 OpenAI-compatible API(`openai-completions` 或 `openai-responses`)的模型,扩展会从 `provider`、`baseUrl` 和 `compat` 元数据中检测常见的路由/通道代理模式:
|
|
335
|
-
|
|
336
|
-
| 类型 | 检测方式 | 建议 |
|
|
337
|
-
|------|----------|------|
|
|
338
|
-
| **OpenRouter** | baseUrl 或 provider 包含 `openrouter`/`openrouter.ai` | 在 compat 中用 `openRouterRouting.only` 或 `.order` 固定上游 provider |
|
|
339
|
-
| **Vercel AI Gateway** | baseUrl 包含 `ai-gateway.vercel.sh` 或 provider 包含 `vercel` | 在 compat 中用 `vercelGatewayRouting.only` 或 `.order` 固定上游 |
|
|
340
|
-
| **LiteLLM / OneAPI / NewAPI / VoAPI** | baseUrl 或 provider 包含 `litellm`、`oneapi`/`one-api`、`newapi`/`new-api`、`voapi`/`vo-api` | 确保每 session 固定路由,转发 `prompt_cache_key` + session-affinity headers,返回缓存用量字段 |
|
|
341
|
-
| **通用第三方代理** | 任何非官方 base URL 的 `openai-completions` 模型,且不匹配以上类型 | 通用建议:验证单上游路由、转发 `prompt_cache_key` + session-affinity headers、返回缓存用量 |
|
|
342
|
-
|
|
343
|
-
这些诊断**仅用于建议**。它们不参与 adapter selection(仍基于 id/name)、不参与 `prompt_cache_key` 注入、不参与 footer 统计、也不做任何自动化配置修改。检测仅使用 Pi 暴露的元数据(`provider`、`api`、`baseUrl`、`compat`),不会读取或暴露 API key、prompt、payload、headers 或模型输出。
|
|
344
|
-
|
|
345
|
-
官方 OpenAI(`api.openai.com`)和 custom transport(`kiro-api`、`anthropic-messages`、`bedrock-converse-stream`)不会触发路由/通道诊断。
|
|
346
|
-
|
|
347
|
-
### 安全说明
|
|
348
|
-
|
|
349
|
-
命令只读取 Pi 通过 `ctx.model` 暴露的元数据:provider、id、name、api、baseUrl、compat。它**不会**读取或暴露:
|
|
350
|
-
- API key 或环境密钥
|
|
351
|
-
- 请求/响应 payload
|
|
352
|
-
- Prompt 或模型输出
|
|
353
|
-
- HTTP headers
|
|
354
|
-
- `~/.pi/agent/models.json` 的原始内容
|
|
355
|
-
|
|
356
331
|
## 原理
|
|
357
332
|
|
|
358
333
|
Provider 缓存通常依赖精确或近似精确的前缀匹配。Pi 的 system prompt 包含跨会话稳定的内容(工具定义、技能、规范),也包含每次变化的动态内容(git status、当前任务)。
|
package/index.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
1
2
|
import { mkdir, readFile, rename, unlink, writeFile } from "node:fs/promises";
|
|
2
3
|
import { homedir } from "node:os";
|
|
3
4
|
import { dirname, join } from "node:path";
|
|
@@ -135,6 +136,19 @@ type PersistedCacheStatsV3 = {
|
|
|
135
136
|
legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
|
|
136
137
|
};
|
|
137
138
|
|
|
139
|
+
/**
|
|
140
|
+
* V4 format: session-scoped stats buckets.
|
|
141
|
+
* Each Pi process/session gets its own stats isolated by a hashed session id.
|
|
142
|
+
*
|
|
143
|
+
* sessions: sessionHash → modelKey (provider/id) → CacheStats
|
|
144
|
+
* legacyFamily: unchanged from v3 (migration/fallback when ctx.model is unknown)
|
|
145
|
+
*/
|
|
146
|
+
type PersistedCacheStatsV4 = {
|
|
147
|
+
version: 4;
|
|
148
|
+
sessions: Record<string, Record<string, CacheStats>>;
|
|
149
|
+
legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
|
|
150
|
+
};
|
|
151
|
+
|
|
138
152
|
type UsageSnapshot = {
|
|
139
153
|
cacheRead: number;
|
|
140
154
|
cacheWrite: number;
|
|
@@ -147,6 +161,23 @@ type OptimizedSystemPrompt = {
|
|
|
147
161
|
changed: boolean;
|
|
148
162
|
};
|
|
149
163
|
|
|
164
|
+
/**
|
|
165
|
+
* Per-request sample stored for trend analysis and usage-field-missing detection.
|
|
166
|
+
* Contains only numeric counters and booleans — never message content, prompts,
|
|
167
|
+
* payloads, headers, API keys, or model outputs.
|
|
168
|
+
*/
|
|
169
|
+
type CacheUsageSample = {
|
|
170
|
+
timestamp: number;
|
|
171
|
+
hit: boolean;
|
|
172
|
+
cachedInputTokens: number;
|
|
173
|
+
cacheWriteInputTokens: number;
|
|
174
|
+
totalInputTokens: number;
|
|
175
|
+
missingUsageFields: boolean;
|
|
176
|
+
};
|
|
177
|
+
|
|
178
|
+
/** Maximum number of recent samples kept per model key (in-memory only, not persisted). */
|
|
179
|
+
const MAX_RECENT_SAMPLES = 50;
|
|
180
|
+
|
|
150
181
|
type CacheProviderAdapter = {
|
|
151
182
|
id: CacheProviderId;
|
|
152
183
|
label: string;
|
|
@@ -545,6 +576,32 @@ function getSessionPromptCacheKey(ctx: ExtensionContext): string | undefined {
|
|
|
545
576
|
return clampPromptCacheKey(ctx.sessionManager.getSessionId());
|
|
546
577
|
}
|
|
547
578
|
|
|
579
|
+
/**
|
|
580
|
+
* Hash a session id for use as a non-reversible opaque scope key.
|
|
581
|
+
* Returns a 16-character hex string (64 bits of SHA-256 digest prefix)
|
|
582
|
+
* suitable for scoping stats buckets without exposing the raw session id.
|
|
583
|
+
*/
|
|
584
|
+
function hashSessionId(sessionId: string): string {
|
|
585
|
+
return createHash("sha256").update(sessionId).digest("hex").slice(0, 16);
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
/**
|
|
589
|
+
* Build a session-scoped stats key from a session hash + provider/id.
|
|
590
|
+
* Pure function (no closure dependency) for use by tests and internals.
|
|
591
|
+
*/
|
|
592
|
+
function makeSessionModelKey(sessionHash: string, provider: string, id: string): string {
|
|
593
|
+
return `${sessionHash}:${provider}/${id}`;
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
/**
|
|
597
|
+
* Extract the user-facing model key from a session-scoped key.
|
|
598
|
+
* "abc123:otokapi/gpt-5.5" → "otokapi/gpt-5.5"
|
|
599
|
+
*/
|
|
600
|
+
function modelKeyFromSessionKey(sessionModelKey: string): string {
|
|
601
|
+
const idx = sessionModelKey.indexOf(":");
|
|
602
|
+
return idx >= 0 ? sessionModelKey.slice(idx + 1) : sessionModelKey;
|
|
603
|
+
}
|
|
604
|
+
|
|
548
605
|
function asRecord(value: unknown): UnknownRecord | undefined {
|
|
549
606
|
if (typeof value !== "object" || value === null || Array.isArray(value)) return undefined;
|
|
550
607
|
return value as UnknownRecord;
|
|
@@ -1141,6 +1198,10 @@ function modelKey(model: PiModel): string {
|
|
|
1141
1198
|
return `${model.provider}/${model.id}`;
|
|
1142
1199
|
}
|
|
1143
1200
|
|
|
1201
|
+
function keyForModelExt(model: { provider: string; id: string }): string {
|
|
1202
|
+
return `${model.provider}/${model.id}`;
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1144
1205
|
function usageRecordFromAssistant(message: unknown): UnknownRecord | undefined {
|
|
1145
1206
|
return asRecord(getAssistantRecord(message)?.usage);
|
|
1146
1207
|
}
|
|
@@ -2473,6 +2534,119 @@ function formatCacheStats(adapter: CacheProviderAdapter, stats: CacheStats): str
|
|
|
2473
2534
|
return `${adapter.label} ${stats.hitRequests}/${stats.totalRequests} · ${formatTokenCount(stats.cachedInputTokens)}/${formatTokenCount(stats.totalInputTokens)} tok${percent}${writeText}`;
|
|
2474
2535
|
}
|
|
2475
2536
|
|
|
2537
|
+
/**
|
|
2538
|
+
* Compute a hit-ratio percentage string for a value between 0 and 1.
|
|
2539
|
+
* Returns e.g. "75%", "0%", "100%", or "N/A" for zero total.
|
|
2540
|
+
*/
|
|
2541
|
+
function formatHitRatio(hits: number, total: number): string {
|
|
2542
|
+
if (total <= 0) return "N/A";
|
|
2543
|
+
return `${Math.round((hits / total) * 100)}%`;
|
|
2544
|
+
}
|
|
2545
|
+
|
|
2546
|
+
/**
|
|
2547
|
+
* Format a token-to-M abbreviation for stats output.
|
|
2548
|
+
* Example: 1500000 → "1.50M"
|
|
2549
|
+
*/
|
|
2550
|
+
function formatTokenM(value: number): string {
|
|
2551
|
+
const millions = Math.max(0, Math.round(value)) / 1_000_000;
|
|
2552
|
+
if (millions === 0) return "0";
|
|
2553
|
+
if (millions < 0.01) return millions.toFixed(4);
|
|
2554
|
+
if (millions >= 10) return millions.toFixed(1);
|
|
2555
|
+
return millions.toFixed(2);
|
|
2556
|
+
}
|
|
2557
|
+
|
|
2558
|
+
/**
|
|
2559
|
+
* Check if an assistant message's usage fields appear to be missing or empty.
|
|
2560
|
+
* Returns true when Pi-normalized fields (input, cacheRead, cacheWrite) are all
|
|
2561
|
+
* absent/zero AND raw usage fields (prompt_tokens, etc.) are also absent/zero
|
|
2562
|
+
* for the given adapter.
|
|
2563
|
+
*/
|
|
2564
|
+
function hasMissingUsageFields(message: unknown, adapter: CacheProviderAdapter): boolean {
|
|
2565
|
+
const usage = usageRecordFromAssistant(message);
|
|
2566
|
+
if (!usage) return true;
|
|
2567
|
+
|
|
2568
|
+
// Check Pi-normalized fields
|
|
2569
|
+
const input = getNonNegativeNumber(usage, "input");
|
|
2570
|
+
const cacheRead = getNonNegativeNumber(usage, "cacheRead");
|
|
2571
|
+
const cacheWrite = getNonNegativeNumber(usage, "cacheWrite");
|
|
2572
|
+
|
|
2573
|
+
// If Pi-normalized fields exist with non-zero values, usage is present
|
|
2574
|
+
if (cacheRead !== undefined || cacheWrite !== undefined || (input !== undefined && input > 0)) {
|
|
2575
|
+
return false;
|
|
2576
|
+
}
|
|
2577
|
+
|
|
2578
|
+
// Check raw usage for the adapter's provider family
|
|
2579
|
+
const rawUsage = adapter.normalizeUsage(message);
|
|
2580
|
+
if (!rawUsage || (rawUsage.cacheRead === 0 && rawUsage.cacheWrite === 0 && rawUsage.totalInput === 0)) {
|
|
2581
|
+
return true;
|
|
2582
|
+
}
|
|
2583
|
+
|
|
2584
|
+
return false;
|
|
2585
|
+
}
|
|
2586
|
+
|
|
2587
|
+
/**
|
|
2588
|
+
* Build a summary string for the recent trend (last N samples).
|
|
2589
|
+
* Example: "Recent 10: 7/10 hits · 65% tok cached · no missing usage"
|
|
2590
|
+
*/
|
|
2591
|
+
function formatRecentTrendSummary(samples: CacheUsageSample[], maxCount: number): string {
|
|
2592
|
+
const recent = samples.slice(-maxCount);
|
|
2593
|
+
if (recent.length === 0) return `Recent ${maxCount}: no samples yet`;
|
|
2594
|
+
|
|
2595
|
+
const hits = recent.filter((s) => s.hit).length;
|
|
2596
|
+
const totalCached = recent.reduce((sum, s) => sum + s.cachedInputTokens, 0);
|
|
2597
|
+
const totalInput = recent.reduce((sum, s) => sum + s.totalInputTokens, 0);
|
|
2598
|
+
const missingCount = recent.filter((s) => s.missingUsageFields).length;
|
|
2599
|
+
|
|
2600
|
+
const hitRatio = formatHitRatio(hits, recent.length);
|
|
2601
|
+
const tokenRatio = totalInput > 0 ? formatHitRatio(totalCached, totalInput) : "N/A";
|
|
2602
|
+
|
|
2603
|
+
let result = `Recent ${recent.length}/${maxCount}: ${hits}/${recent.length} hits · ${tokenRatio} tok cached`;
|
|
2604
|
+
if (missingCount > 0) {
|
|
2605
|
+
result += ` · ${missingCount} missing usage`;
|
|
2606
|
+
}
|
|
2607
|
+
return result;
|
|
2608
|
+
}
|
|
2609
|
+
|
|
2610
|
+
/**
|
|
2611
|
+
* Build the output for `/cache-optimizer stats`.
|
|
2612
|
+
*/
|
|
2613
|
+
function buildStatsOutput(model: PiModel | undefined, adapter: CacheProviderAdapter | undefined, stats: CacheStats | undefined, recentSamples: CacheUsageSample[]): string {
|
|
2614
|
+
const lines: string[] = [];
|
|
2615
|
+
|
|
2616
|
+
if (!model || !adapter) {
|
|
2617
|
+
lines.push("ℹ️ No cache-adapter-matched model active. Select a model with a recognized provider family.");
|
|
2618
|
+
return lines.join("\n");
|
|
2619
|
+
}
|
|
2620
|
+
|
|
2621
|
+
const key = modelKey(model);
|
|
2622
|
+
const currentStats = stats ?? emptyCacheStats();
|
|
2623
|
+
|
|
2624
|
+
lines.push(`Model key: ${key}`);
|
|
2625
|
+
lines.push(`Adapter: ${adapter.label}`);
|
|
2626
|
+
lines.push("");
|
|
2627
|
+
lines.push("── Today ──");
|
|
2628
|
+
lines.push(`Requests: ${currentStats.hitRequests} hit / ${currentStats.totalRequests} total · ${formatHitRatio(currentStats.hitRequests, currentStats.totalRequests)}`);
|
|
2629
|
+
lines.push(`Cached tokens: ${formatTokenM(currentStats.cachedInputTokens)}M / ${formatTokenM(currentStats.totalInputTokens)}M input · ${currentStats.totalInputTokens > 0 ? `${Math.round((currentStats.cachedInputTokens / currentStats.totalInputTokens) * 100)}%` : "N/A"}`);
|
|
2630
|
+
if (currentStats.cacheWriteInputTokens > 0) {
|
|
2631
|
+
lines.push(`Cache write: ${formatTokenM(currentStats.cacheWriteInputTokens)}M tok`);
|
|
2632
|
+
}
|
|
2633
|
+
|
|
2634
|
+
lines.push("");
|
|
2635
|
+
lines.push("── Recent trend ──");
|
|
2636
|
+
lines.push(formatRecentTrendSummary(recentSamples, 10));
|
|
2637
|
+
lines.push(formatRecentTrendSummary(recentSamples, 30));
|
|
2638
|
+
|
|
2639
|
+
// Check if any sample has missingUsageFields flagged
|
|
2640
|
+
const missingAny = recentSamples.some((s) => s.missingUsageFields);
|
|
2641
|
+
if (missingAny) {
|
|
2642
|
+
lines.push("");
|
|
2643
|
+
lines.push("⚠️ Some recent responses had missing or empty cache usage fields. Footer may under-report hits.");
|
|
2644
|
+
lines.push(" The proxy may not return prompt_cache_hit_tokens or usage.input/cacheRead in responses.");
|
|
2645
|
+
}
|
|
2646
|
+
|
|
2647
|
+
return lines.join("\n");
|
|
2648
|
+
}
|
|
2649
|
+
|
|
2476
2650
|
function getErrorCode(error: unknown): string | undefined {
|
|
2477
2651
|
return typeof error === "object" && error !== null && "code" in error
|
|
2478
2652
|
? String((error as { code?: unknown }).code)
|
|
@@ -2517,7 +2691,39 @@ function parsePersistedCacheStats(value: unknown): CacheStatsState | undefined {
|
|
|
2517
2691
|
const record = asRecord(value);
|
|
2518
2692
|
if (!record) return undefined;
|
|
2519
2693
|
|
|
2520
|
-
// version
|
|
2694
|
+
// version 4: session-scoped stats + legacy family fallback
|
|
2695
|
+
if (record.version === 4) {
|
|
2696
|
+
const legacyFamily: Partial<Record<CacheProviderId, CacheStats>> = {};
|
|
2697
|
+
const rawFamily = asRecord(record.legacyFamily);
|
|
2698
|
+
if (rawFamily) {
|
|
2699
|
+
for (const id of CACHE_PROVIDER_IDS) {
|
|
2700
|
+
const stats = parseCacheStats(rawFamily[id]);
|
|
2701
|
+
if (stats) legacyFamily[id] = stats;
|
|
2702
|
+
}
|
|
2703
|
+
}
|
|
2704
|
+
|
|
2705
|
+
// Collect all session entries into statsByModel with session-hash-prefixed keys
|
|
2706
|
+
// (e.g. "abc123:otokapi/gpt-5.5") so that writePersistedCacheStats can later
|
|
2707
|
+
// reconstruct individual sessions from the flat key format and other sessions'
|
|
2708
|
+
// data is not silently lost on round-trip.
|
|
2709
|
+
const statsByModel: Record<string, CacheStats> = {};
|
|
2710
|
+
const rawSessions = asRecord(record.sessions);
|
|
2711
|
+
if (rawSessions) {
|
|
2712
|
+
for (const [sessionHash, modelMap] of Object.entries(rawSessions)) {
|
|
2713
|
+
const parsedMap = asRecord(modelMap);
|
|
2714
|
+
if (parsedMap) {
|
|
2715
|
+
for (const [modelKey, val] of Object.entries(parsedMap)) {
|
|
2716
|
+
const parsed = parseCacheStats(val);
|
|
2717
|
+
if (parsed) statsByModel[`${sessionHash}:${modelKey}`] = parsed;
|
|
2718
|
+
}
|
|
2719
|
+
}
|
|
2720
|
+
}
|
|
2721
|
+
}
|
|
2722
|
+
|
|
2723
|
+
return { statsByModel, legacyFamily };
|
|
2724
|
+
}
|
|
2725
|
+
|
|
2726
|
+
// version 3: migrate to v4 semantics by wrapping statsByModel into sessions
|
|
2521
2727
|
if (record.version === 3) {
|
|
2522
2728
|
const statsByModel: Record<string, CacheStats> = {};
|
|
2523
2729
|
const rawModelMap = asRecord(record.statsByModel);
|
|
@@ -2602,11 +2808,122 @@ async function readPersistedCacheStats(): Promise<CacheStatsState | undefined> {
|
|
|
2602
2808
|
return undefined;
|
|
2603
2809
|
}
|
|
2604
2810
|
|
|
2605
|
-
|
|
2811
|
+
/**
|
|
2812
|
+
* The closure-internal writer. Since the closure has access to currentSessionHash,
|
|
2813
|
+
* it passes the hash and statsByModel here. This function wraps them in the v4
|
|
2814
|
+
* sessions format, combining with any previously-persisted sessions for safety.
|
|
2815
|
+
*
|
|
2816
|
+
* When called from the closure, `state.statsByModel` contains only the current
|
|
2817
|
+
* session's entries (keyed by `${sessionHash}:${provider}/${id}`). We extract
|
|
2818
|
+
* the model-key-only entries and store them under the session hash.
|
|
2819
|
+
*/
|
|
2820
|
+
/**
|
|
2821
|
+
* Merge in-memory stats state into an existing sessions map for persistence.
|
|
2822
|
+
*
|
|
2823
|
+
* When `currentSessionHash` is provided (explicit hash mode):
|
|
2824
|
+
* - Current-session entries are extracted from `state.statsByModel` (keys
|
|
2825
|
+
* prefixed with `currentSessionHash:`) and written under the session hash.
|
|
2826
|
+
* - The transitional legacy `_nosession` bucket is DELETED — its entries
|
|
2827
|
+
* were already consumed and migrated into memory by `restoreCacheStats`.
|
|
2828
|
+
* Keeping `_nosession` on disk would allow resurrection of reset stats
|
|
2829
|
+
* on the next reload (the reset-undo bug).
|
|
2830
|
+
* - Other real session hashes are preserved intact.
|
|
2831
|
+
*
|
|
2832
|
+
* When `currentSessionHash` is undefined (no-hash mode):
|
|
2833
|
+
* - Keys with a hash prefix (`hash:provider/model`) are grouped under their
|
|
2834
|
+
* respective session hashes.
|
|
2835
|
+
* - Keys without a hash prefix (legacy v3) are grouped under `_nosession` so
|
|
2836
|
+
* `restoreCacheStats` can migrate them on the next load before the session
|
|
2837
|
+
* id is known.
|
|
2838
|
+
*
|
|
2839
|
+
* Pure function (no I/O) — suitable for unit tests without touching the real
|
|
2840
|
+
* state file at `~/.pi/agent/pi-cache-optimizer-stats.json`.
|
|
2841
|
+
*/
|
|
2842
|
+
function mergeCacheSessions(
|
|
2843
|
+
existingSessions: Record<string, Record<string, CacheStats>>,
|
|
2844
|
+
state: CacheStatsState,
|
|
2845
|
+
currentSessionHash?: string,
|
|
2846
|
+
): Record<string, Record<string, CacheStats>> {
|
|
2847
|
+
// Deep-copy to avoid mutating the caller's object.
|
|
2848
|
+
const sessions: Record<string, Record<string, CacheStats>> = {};
|
|
2849
|
+
for (const [hash, models] of Object.entries(existingSessions)) {
|
|
2850
|
+
sessions[hash] = { ...models };
|
|
2851
|
+
}
|
|
2852
|
+
|
|
2853
|
+
if (currentSessionHash !== undefined) {
|
|
2854
|
+
// Explicit hash mode: extract this session's data from state.statsByModel.
|
|
2855
|
+
// When the session has no entries (e.g. after reset of sole bucket), this
|
|
2856
|
+
// still sets an empty map, ensuring the deleted bucket does not return.
|
|
2857
|
+
const prefix = `${currentSessionHash}:`;
|
|
2858
|
+
const currentModelStats: Record<string, CacheStats> = {};
|
|
2859
|
+
for (const [fullKey, stats] of Object.entries(state.statsByModel)) {
|
|
2860
|
+
if (fullKey.startsWith(prefix)) {
|
|
2861
|
+
currentModelStats[fullKey.slice(prefix.length)] = stats;
|
|
2862
|
+
}
|
|
2863
|
+
}
|
|
2864
|
+
sessions[currentSessionHash] = currentModelStats;
|
|
2865
|
+
|
|
2866
|
+
// _nosession is a transitional legacy migration bucket — once we write
|
|
2867
|
+
// under an authoritative session hash, those entries have already been
|
|
2868
|
+
// consumed and migrated into memory by restoreCacheStats. Delete to
|
|
2869
|
+
// prevent resurrection of reset stats on the next reload.
|
|
2870
|
+
delete sessions["_nosession"];
|
|
2871
|
+
} else {
|
|
2872
|
+
// No-hash mode: group entries by their existing hash prefix to avoid
|
|
2873
|
+
// collapsing multiple sessions into one bucket. Keys without a hash
|
|
2874
|
+
// prefix (legacy v3) go under "_nosession" so restoreCacheStats can
|
|
2875
|
+
// migrate them to the current session on next load.
|
|
2876
|
+
const nosessionMap: Record<string, CacheStats> = {};
|
|
2877
|
+
for (const [fullKey, stats] of Object.entries(state.statsByModel)) {
|
|
2878
|
+
const idx = fullKey.indexOf(":");
|
|
2879
|
+
if (idx >= 0) {
|
|
2880
|
+
const hash = fullKey.slice(0, idx);
|
|
2881
|
+
const modelKey = fullKey.slice(idx + 1);
|
|
2882
|
+
if (!sessions[hash]) sessions[hash] = {};
|
|
2883
|
+
sessions[hash][modelKey] = stats;
|
|
2884
|
+
} else {
|
|
2885
|
+
// Key without hash prefix (legacy v3) — group under _nosession.
|
|
2886
|
+
nosessionMap[fullKey] = stats;
|
|
2887
|
+
}
|
|
2888
|
+
}
|
|
2889
|
+
if (Object.keys(nosessionMap).length > 0) {
|
|
2890
|
+
sessions["_nosession"] = nosessionMap;
|
|
2891
|
+
}
|
|
2892
|
+
}
|
|
2893
|
+
|
|
2894
|
+
return sessions;
|
|
2895
|
+
}
|
|
2896
|
+
|
|
2897
|
+
async function writePersistedCacheStats(state: CacheStatsState, currentSessionHash?: string): Promise<void> {
|
|
2606
2898
|
await mkdir(STATE_DIR, { recursive: true });
|
|
2607
|
-
|
|
2608
|
-
|
|
2609
|
-
|
|
2899
|
+
|
|
2900
|
+
// Read existing file to preserve other sessions' data.
|
|
2901
|
+
let existingSessions: Record<string, Record<string, CacheStats>> = {};
|
|
2902
|
+
try {
|
|
2903
|
+
const raw = await readFile(STATE_FILE_PATH, "utf8");
|
|
2904
|
+
const parsed = parsePersistedCacheStats(JSON.parse(raw));
|
|
2905
|
+
if (parsed) {
|
|
2906
|
+
// Reconstruct sessions from statsByModel keys.
|
|
2907
|
+
// Each key has form `${hash}:${provider}/${id}`; group by hash.
|
|
2908
|
+
for (const [fullKey, stats] of Object.entries(parsed.statsByModel)) {
|
|
2909
|
+
const idx = fullKey.indexOf(":");
|
|
2910
|
+
if (idx >= 0) {
|
|
2911
|
+
const hash = fullKey.slice(0, idx);
|
|
2912
|
+
const modelKey = fullKey.slice(idx + 1);
|
|
2913
|
+
if (!existingSessions[hash]) existingSessions[hash] = {};
|
|
2914
|
+
existingSessions[hash][modelKey] = stats;
|
|
2915
|
+
}
|
|
2916
|
+
}
|
|
2917
|
+
}
|
|
2918
|
+
} catch {
|
|
2919
|
+
// Ignore read errors (file may not exist yet).
|
|
2920
|
+
}
|
|
2921
|
+
|
|
2922
|
+
const sessions = mergeCacheSessions(existingSessions, state, currentSessionHash);
|
|
2923
|
+
|
|
2924
|
+
const payload: PersistedCacheStatsV4 = {
|
|
2925
|
+
version: 4,
|
|
2926
|
+
sessions,
|
|
2610
2927
|
legacyFamily: state.legacyFamily,
|
|
2611
2928
|
};
|
|
2612
2929
|
const tempPath = `${STATE_FILE_PATH}.${process.pid}.${Date.now()}.tmp`;
|
|
@@ -2842,6 +3159,115 @@ function buildDoctorDiagnosis(model: PiModel): string {
|
|
|
2842
3159
|
return lines.join("\n");
|
|
2843
3160
|
}
|
|
2844
3161
|
|
|
3162
|
+
/**
|
|
3163
|
+
* Build a "Cache diagnosis" section for low-hit causes, appended to doctor output.
|
|
3164
|
+
* This is a separate function because it depends on per-session state (recent samples,
|
|
3165
|
+
* per-model stats) that is not available at the module level.
|
|
3166
|
+
*/
|
|
3167
|
+
function buildLowHitDiagnosis(
|
|
3168
|
+
model: PiModel,
|
|
3169
|
+
adapter: CacheProviderAdapter | undefined,
|
|
3170
|
+
stats: CacheStats | undefined,
|
|
3171
|
+
samples: CacheUsageSample[],
|
|
3172
|
+
): string[] {
|
|
3173
|
+
const lines: string[] = [];
|
|
3174
|
+
|
|
3175
|
+
// 1. Missing compat flags (reuse existing check)
|
|
3176
|
+
const missingCompat = describeMissingOpenAICompatibleProxyCompat(model);
|
|
3177
|
+
|
|
3178
|
+
// 2. Router/channel risk (reuse existing check)
|
|
3179
|
+
const routerNotes = describeRouterChannelDiagnostics(model);
|
|
3180
|
+
|
|
3181
|
+
// 3. Recent samples missing usage fields
|
|
3182
|
+
const missingUsageSamples = samples.filter((s) => s.missingUsageFields).length;
|
|
3183
|
+
|
|
3184
|
+
// 4. Recent trend analysis
|
|
3185
|
+
const recent10 = samples.slice(-10);
|
|
3186
|
+
const recent10Hits = recent10.filter((s) => s.hit).length;
|
|
3187
|
+
const recent10Total = recent10.length;
|
|
3188
|
+
const recent10Cached = recent10.reduce((sum, s) => sum + s.cachedInputTokens, 0);
|
|
3189
|
+
const recent10Input = recent10.reduce((sum, s) => sum + s.totalInputTokens, 0);
|
|
3190
|
+
|
|
3191
|
+
// 5. Today's overall trend from persisted stats
|
|
3192
|
+
const todayStats = stats ?? emptyCacheStats();
|
|
3193
|
+
|
|
3194
|
+
const hasMissingCompat = missingCompat.length > 0;
|
|
3195
|
+
const hasRouterRisk = routerNotes.length > 0;
|
|
3196
|
+
const hasUsageMissing = missingUsageSamples > 0;
|
|
3197
|
+
|
|
3198
|
+
// Determine if there are actual issues worth flagging
|
|
3199
|
+
const hasActualIssues = hasMissingCompat || hasUsageMissing ||
|
|
3200
|
+
// Low hit trend (today total > 3 and hit ratio < 30%)
|
|
3201
|
+
(todayStats.totalRequests > 3 && todayStats.totalInputTokens > 0 &&
|
|
3202
|
+
(todayStats.cachedInputTokens / todayStats.totalInputTokens) < 0.3) ||
|
|
3203
|
+
// Low hit rate in recent samples (recent10Total >= 3 and all misses)
|
|
3204
|
+
(recent10Total >= 3 && recent10Hits === 0);
|
|
3205
|
+
|
|
3206
|
+
// Skip section if no issues
|
|
3207
|
+
if (!hasActualIssues && !(hasRouterRisk && (hasMissingCompat || hasUsageMissing))) {
|
|
3208
|
+
return lines;
|
|
3209
|
+
}
|
|
3210
|
+
|
|
3211
|
+
lines.push("");
|
|
3212
|
+
lines.push("── Cache diagnosis ──");
|
|
3213
|
+
|
|
3214
|
+
// Priority 1: missing compat flags
|
|
3215
|
+
if (hasMissingCompat) {
|
|
3216
|
+
lines.push(`⚠️ Missing compat flags: ${missingCompat.join(", ")}`);
|
|
3217
|
+
lines.push(" These flags enable prompt caching and session-affinity routing.");
|
|
3218
|
+
lines.push(" Run /cache-optimizer compat for edit instructions.");
|
|
3219
|
+
}
|
|
3220
|
+
|
|
3221
|
+
// Priority 2: router/channel risk (only flag when there are other issues)
|
|
3222
|
+
// Router notes are already shown in the main doctor output, so we only
|
|
3223
|
+
// mention them in the diagnosis section when they compound a problem.
|
|
3224
|
+
if (hasRouterRisk && (hasMissingCompat || hasUsageMissing || hasActualIssues)) {
|
|
3225
|
+
lines.push("🔀 Router/channel proxy detected — see routing notes above.");
|
|
3226
|
+
}
|
|
3227
|
+
|
|
3228
|
+
// Priority 3: usage fields missing
|
|
3229
|
+
if (hasUsageMissing) {
|
|
3230
|
+
lines.push(`⚠️ ${missingUsageSamples}/${samples.length} recent responses had missing/empty usage fields.`);
|
|
3231
|
+
lines.push(" Footer may under-report cache hit rate.");
|
|
3232
|
+
lines.push(" Verify the proxy returns prompt-level usage (prompt_tokens, input_tokens_details).");
|
|
3233
|
+
}
|
|
3234
|
+
|
|
3235
|
+
// Priority 4: recent trend low
|
|
3236
|
+
if (recent10Total > 0) {
|
|
3237
|
+
const hitRatio = recent10Input > 0 ? Math.round((recent10Cached / recent10Input) * 100) : 0;
|
|
3238
|
+
const todayHitRatio = todayStats.totalInputTokens > 0
|
|
3239
|
+
? Math.round((todayStats.cachedInputTokens / todayStats.totalInputTokens) * 100)
|
|
3240
|
+
: 0;
|
|
3241
|
+
|
|
3242
|
+
if (recent10Hits === 0 && todayStats.totalRequests > 3 && todayHitRatio < 30) {
|
|
3243
|
+
lines.push(`📉 Cache hit rate is low: ${todayHitRatio}% today (${recent10Total} recent samples).`);
|
|
3244
|
+
lines.push(" Likely causes: proxy routing to different backends per request,");
|
|
3245
|
+
lines.push(" or prompt prefix changes across turns.");
|
|
3246
|
+
lines.push(" Verify session affinity (sendSessionAffinityHeaders) and long cache retention.");
|
|
3247
|
+
} else if (todayHitRatio < 30 && todayStats.totalRequests > 3) {
|
|
3248
|
+
lines.push(`📉 Cache hit rate is low: ${todayHitRatio}% today (${todayStats.totalRequests} total requests).`);
|
|
3249
|
+
lines.push(" Check compat flags and proxy upstream routing.");
|
|
3250
|
+
}
|
|
3251
|
+
|
|
3252
|
+
// Show brief trend summary if there are enough samples
|
|
3253
|
+
if (recent10Total >= 3) {
|
|
3254
|
+
const trend = formatRecentTrendSummary(samples, 10);
|
|
3255
|
+
lines.push(`📊 ${trend}`);
|
|
3256
|
+
}
|
|
3257
|
+
}
|
|
3258
|
+
|
|
3259
|
+
// For fully configured but low hit models, emphasize sticky routing
|
|
3260
|
+
if (!hasMissingCompat && !hasRouterRisk && todayStats.totalRequests > 3 && todayHitRatio < 30) {
|
|
3261
|
+
lines.push("💡 Compat is configured but cache hit rate remains low.");
|
|
3262
|
+
lines.push(" Possible causes:");
|
|
3263
|
+
lines.push(" • Proxy still routes to multiple backends — check session affinity on the proxy side.");
|
|
3264
|
+
lines.push(" • Prompt prefix varies per turn — check dynamic context in system prompt.");
|
|
3265
|
+
lines.push(" • Provider does not return cache usage fields — footer can't measure hits.");
|
|
3266
|
+
}
|
|
3267
|
+
|
|
3268
|
+
return lines;
|
|
3269
|
+
}
|
|
3270
|
+
|
|
2845
3271
|
function buildCompatDiagnosis(model: PiModel): string | undefined {
|
|
2846
3272
|
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2847
3273
|
const routerNotes = describeRouterChannelDiagnostics(model);
|
|
@@ -3042,6 +3468,26 @@ export const __internals_for_tests = {
|
|
|
3042
3468
|
emptyAllCacheStats,
|
|
3043
3469
|
parseCacheStats,
|
|
3044
3470
|
parsePersistedCacheStats,
|
|
3471
|
+
// Recent sample / stats output / diagnosis helpers
|
|
3472
|
+
MAX_RECENT_SAMPLES,
|
|
3473
|
+
buildStatsOutput,
|
|
3474
|
+
buildLowHitDiagnosis,
|
|
3475
|
+
formatRecentTrendSummary,
|
|
3476
|
+
formatHitRatio,
|
|
3477
|
+
formatTokenM,
|
|
3478
|
+
hasMissingUsageFields,
|
|
3479
|
+
keyForModelExt,
|
|
3480
|
+
// Session-scoped helpers
|
|
3481
|
+
hashSessionId,
|
|
3482
|
+
makeSessionModelKey,
|
|
3483
|
+
modelKeyFromSessionKey,
|
|
3484
|
+
// Persistence helpers (for reload/reset tests)
|
|
3485
|
+
mergeCacheSessions,
|
|
3486
|
+
writePersistedCacheStats,
|
|
3487
|
+
readPersistedCacheStats,
|
|
3488
|
+
STATE_FILE_PATH,
|
|
3489
|
+
LEGACY_STATE_FILE_PATH,
|
|
3490
|
+
STATE_DIR,
|
|
3045
3491
|
};
|
|
3046
3492
|
|
|
3047
3493
|
export default function (pi: ExtensionAPI) {
|
|
@@ -3052,8 +3498,57 @@ export default function (pi: ExtensionAPI) {
|
|
|
3052
3498
|
let persistenceWarningShown = false;
|
|
3053
3499
|
let persistTimer: ReturnType<typeof setTimeout> | null = null;
|
|
3054
3500
|
let integrityNotificationShown = false;
|
|
3501
|
+
let currentSessionId = "";
|
|
3502
|
+
let currentSessionHash = "";
|
|
3503
|
+
let currentSessionHashSet = false;
|
|
3055
3504
|
const PERSIST_DEBOUNCE_MS = 2000;
|
|
3505
|
+
/** In-memory recent usage samples per model key (not persisted, cleared on reload). */
|
|
3506
|
+
const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
|
|
3507
|
+
|
|
3508
|
+
/**
|
|
3509
|
+
* Build a session-scoped stats key from the current session hash + model key.
|
|
3510
|
+
* Returns `${sessionHash}:${provider}/${id}`.
|
|
3511
|
+
*/
|
|
3512
|
+
function sessionModelKey(model: { provider: string; id: string }): string {
|
|
3513
|
+
const hash = currentSessionHash || "_nosession";
|
|
3514
|
+
return `${hash}:${model.provider}/${model.id}`;
|
|
3515
|
+
}
|
|
3056
3516
|
|
|
3517
|
+
/**
|
|
3518
|
+
* Extract the user-facing model key from a session-scoped key.
|
|
3519
|
+
* "abc123:otokapi/gpt-5.5" → "otokapi/gpt-5.5"
|
|
3520
|
+
*/
|
|
3521
|
+
function modelKeyFromSessionScoped(sKey: string): string {
|
|
3522
|
+
const idx = sKey.indexOf(":");
|
|
3523
|
+
return idx >= 0 ? sKey.slice(idx + 1) : sKey;
|
|
3524
|
+
}
|
|
3525
|
+
|
|
3526
|
+
function recordRecentSample(modelKeyStr: string, usage: UsageSnapshot, missingUsageFields: boolean): void {
|
|
3527
|
+
let samples = recentSamplesByModelKey.get(modelKeyStr);
|
|
3528
|
+
if (!samples) {
|
|
3529
|
+
samples = [];
|
|
3530
|
+
recentSamplesByModelKey.set(modelKeyStr, samples);
|
|
3531
|
+
}
|
|
3532
|
+
samples.push({
|
|
3533
|
+
timestamp: Date.now(),
|
|
3534
|
+
hit: usage.cacheRead > 0,
|
|
3535
|
+
cachedInputTokens: usage.cacheRead,
|
|
3536
|
+
cacheWriteInputTokens: usage.cacheWrite,
|
|
3537
|
+
totalInputTokens: usage.totalInput,
|
|
3538
|
+
missingUsageFields,
|
|
3539
|
+
});
|
|
3540
|
+
if (samples.length > MAX_RECENT_SAMPLES) {
|
|
3541
|
+
samples.splice(0, samples.length - MAX_RECENT_SAMPLES);
|
|
3542
|
+
}
|
|
3543
|
+
}
|
|
3544
|
+
|
|
3545
|
+
function getRecentSamples(modelKeyStr: string): CacheUsageSample[] {
|
|
3546
|
+
return recentSamplesByModelKey.get(modelKeyStr) ?? [];
|
|
3547
|
+
}
|
|
3548
|
+
|
|
3549
|
+
function clearRecentSamples(): void {
|
|
3550
|
+
recentSamplesByModelKey.clear();
|
|
3551
|
+
}
|
|
3057
3552
|
|
|
3058
3553
|
function getCacheStatsState(): CacheStatsState {
|
|
3059
3554
|
return { statsByModel: cacheStatsByModel, legacyFamily: cacheStatsLegacyFamily };
|
|
@@ -3062,7 +3557,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
3062
3557
|
/** Look up active stats for a model, falling back to legacy family. */
|
|
3063
3558
|
function getStatsForModel(model: PiModel | undefined, adapter: CacheProviderAdapter): CacheStats {
|
|
3064
3559
|
if (model) {
|
|
3065
|
-
const key =
|
|
3560
|
+
const key = sessionModelKey(model);
|
|
3066
3561
|
const existing = cacheStatsByModel[key];
|
|
3067
3562
|
if (existing) return existing;
|
|
3068
3563
|
}
|
|
@@ -3089,7 +3584,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
3089
3584
|
|
|
3090
3585
|
async function persistCacheStats(ctx?: ExtensionContext): Promise<void> {
|
|
3091
3586
|
try {
|
|
3092
|
-
await writePersistedCacheStats(getCacheStatsState());
|
|
3587
|
+
await writePersistedCacheStats(getCacheStatsState(), currentSessionHashSet ? currentSessionHash : undefined);
|
|
3093
3588
|
} catch (error) {
|
|
3094
3589
|
console.warn(`${LOG_PREFIX}: failed to persist cache stats`, error);
|
|
3095
3590
|
if (!persistenceWarningShown) {
|
|
@@ -3155,19 +3650,80 @@ export default function (pi: ExtensionAPI) {
|
|
|
3155
3650
|
}
|
|
3156
3651
|
|
|
3157
3652
|
async function restoreCacheStats(reason: string, ctx: ExtensionContext): Promise<void> {
|
|
3653
|
+
// Set session id on first load and on reload (same session).
|
|
3654
|
+
const sid = ctx.sessionManager.getSessionId();
|
|
3655
|
+
if (sid && (sid !== currentSessionId || !currentSessionHashSet)) {
|
|
3656
|
+
currentSessionId = sid;
|
|
3657
|
+
currentSessionHash = hashSessionId(sid);
|
|
3658
|
+
currentSessionHashSet = true;
|
|
3659
|
+
}
|
|
3660
|
+
|
|
3158
3661
|
if (reason === "reload") {
|
|
3159
|
-
|
|
3160
|
-
|
|
3662
|
+
// /reload: preserve session-scoped stats (same session hash).
|
|
3663
|
+
// Pi extension reload creates a fresh closure, so cacheStatsByModel
|
|
3664
|
+
// starts empty. Read persisted data and filter for current session.
|
|
3161
3665
|
lastStatusText = undefined;
|
|
3162
|
-
// Reset integrity diagnostics on reload
|
|
3163
3666
|
lastPromptIntegrityWarningAt = 0;
|
|
3164
3667
|
integrityNotificationShown = false;
|
|
3165
|
-
|
|
3668
|
+
clearRecentSamples();
|
|
3669
|
+
|
|
3670
|
+
const persisted = await readPersistedCacheStats();
|
|
3671
|
+
if (persisted && currentSessionHash) {
|
|
3672
|
+
const prefix = `${currentSessionHash}:`;
|
|
3673
|
+
const filteredModelStats: Record<string, CacheStats> = {};
|
|
3674
|
+
for (const [fullKey, stats] of Object.entries(persisted.statsByModel)) {
|
|
3675
|
+
if (fullKey.startsWith(prefix)) {
|
|
3676
|
+
// Current session's data
|
|
3677
|
+
filteredModelStats[fullKey] = stats;
|
|
3678
|
+
} else if (!fullKey.includes(":")) {
|
|
3679
|
+
// Legacy v3-style key without session hash — migrate to current session
|
|
3680
|
+
filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
|
|
3681
|
+
} else if (fullKey.startsWith("_nosession:")) {
|
|
3682
|
+
// _nosession migration remnant from old-path v4 write — migrate to current session
|
|
3683
|
+
filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
|
|
3684
|
+
}
|
|
3685
|
+
}
|
|
3686
|
+
cacheStatsByModel = filteredModelStats;
|
|
3687
|
+
cacheStatsLegacyFamily = persisted.legacyFamily;
|
|
3688
|
+
} else if (persisted) {
|
|
3689
|
+
cacheStatsByModel = persisted.statsByModel;
|
|
3690
|
+
cacheStatsLegacyFamily = persisted.legacyFamily;
|
|
3691
|
+
} else {
|
|
3692
|
+
cacheStatsByModel = {};
|
|
3693
|
+
cacheStatsLegacyFamily = emptyAllCacheStats();
|
|
3694
|
+
}
|
|
3695
|
+
|
|
3696
|
+
await rollOverStatsIfNeeded(ctx);
|
|
3166
3697
|
return;
|
|
3167
3698
|
}
|
|
3168
3699
|
|
|
3700
|
+
// First load / process start: read persisted stats and filter for
|
|
3701
|
+
// this session's entries. If the session has no persisted data yet,
|
|
3702
|
+
// start fresh.
|
|
3169
3703
|
const persisted = await readPersistedCacheStats();
|
|
3170
|
-
if (persisted) {
|
|
3704
|
+
if (persisted && currentSessionHash) {
|
|
3705
|
+
const prefix = `${currentSessionHash}:`;
|
|
3706
|
+
const filteredModelStats: Record<string, CacheStats> = {};
|
|
3707
|
+
for (const [fullKey, stats] of Object.entries(persisted.statsByModel)) {
|
|
3708
|
+
if (fullKey.startsWith(prefix)) {
|
|
3709
|
+
// Current session's data — load it.
|
|
3710
|
+
filteredModelStats[fullKey] = stats;
|
|
3711
|
+
} else if (!fullKey.includes(":")) {
|
|
3712
|
+
// Legacy v3-style key without session hash (e.g. "otokapi/gpt-5.5").
|
|
3713
|
+
// Migrate to current session by prefixing with the session hash.
|
|
3714
|
+
filteredModelStats[`${currentSessionHash}:${fullKey}`] = stats;
|
|
3715
|
+
} else if (fullKey.startsWith("_nosession:")) {
|
|
3716
|
+
// _nosession migration remnant from old-path v4 write — migrate to current session
|
|
3717
|
+
filteredModelStats[`${currentSessionHash}:${fullKey.slice("_nosession:".length)}`] = stats;
|
|
3718
|
+
}
|
|
3719
|
+
// Other sessions' entries are preserved in the file but not loaded
|
|
3720
|
+
// into memory; they'll be rewritten on next persist.
|
|
3721
|
+
}
|
|
3722
|
+
cacheStatsByModel = filteredModelStats;
|
|
3723
|
+
cacheStatsLegacyFamily = persisted.legacyFamily;
|
|
3724
|
+
} else if (persisted) {
|
|
3725
|
+
// Persisted data exists but no session hash set yet.
|
|
3726
|
+
// This shouldn't normally happen — use the data as-is.
|
|
3171
3727
|
cacheStatsByModel = persisted.statsByModel;
|
|
3172
3728
|
cacheStatsLegacyFamily = persisted.legacyFamily;
|
|
3173
3729
|
} else {
|
|
@@ -3184,13 +3740,11 @@ export default function (pi: ExtensionAPI) {
|
|
|
3184
3740
|
const adapter = selectAdapterForModel(model);
|
|
3185
3741
|
let statusText: string | undefined;
|
|
3186
3742
|
if (adapter) {
|
|
3187
|
-
// Display
|
|
3188
|
-
//
|
|
3189
|
-
//
|
|
3190
|
-
|
|
3191
|
-
|
|
3192
|
-
const key = model ? modelKey(model) : undefined;
|
|
3193
|
-
const stats = key ? cacheStatsByModel[key] : undefined;
|
|
3743
|
+
// Display session-scoped stats. A model that has never been used
|
|
3744
|
+
// in this session shows 0/0. The message_end hook populates
|
|
3745
|
+
// cacheStatsByModel[sessionModelKey(model)] on first use.
|
|
3746
|
+
const sk = model ? sessionModelKey(model) : undefined;
|
|
3747
|
+
const stats = sk ? cacheStatsByModel[sk] : undefined;
|
|
3194
3748
|
statusText = formatCacheStats(adapter, stats ?? emptyCacheStats());
|
|
3195
3749
|
}
|
|
3196
3750
|
|
|
@@ -3345,15 +3899,25 @@ export default function (pi: ExtensionAPI) {
|
|
|
3345
3899
|
if (!adapter) return;
|
|
3346
3900
|
|
|
3347
3901
|
const usage = adapter.normalizeUsage(event.message);
|
|
3902
|
+
|
|
3903
|
+
// Record recent sample (even when usage is missing, for trend diagnosis)
|
|
3904
|
+
if (ctx.model) {
|
|
3905
|
+
const sk = sessionModelKey(ctx.model);
|
|
3906
|
+
const missingFields = usage === undefined || (usage.cacheRead === 0 && usage.cacheWrite === 0 && usage.totalInput === 0)
|
|
3907
|
+
? true
|
|
3908
|
+
: hasMissingUsageFields(event.message, adapter);
|
|
3909
|
+
recordRecentSample(sk, usage ?? { cacheRead: 0, cacheWrite: 0, totalInput: 0 }, missingFields);
|
|
3910
|
+
}
|
|
3911
|
+
|
|
3348
3912
|
if (!usage) return;
|
|
3349
3913
|
|
|
3350
3914
|
await rollOverStatsIfNeeded(ctx);
|
|
3351
3915
|
|
|
3352
|
-
// Update stats scoped to
|
|
3916
|
+
// Update stats scoped to current session + active model.
|
|
3353
3917
|
// Falls back to legacy family when ctx.model is undefined.
|
|
3354
3918
|
if (ctx.model) {
|
|
3355
|
-
const
|
|
3356
|
-
addUsageToCacheStats(getOrCreateStatsByModelKey(
|
|
3919
|
+
const sk = sessionModelKey(ctx.model);
|
|
3920
|
+
addUsageToCacheStats(getOrCreateStatsByModelKey(sk), usage);
|
|
3357
3921
|
} else {
|
|
3358
3922
|
addUsageToCacheStats(getStatsForModel(undefined, adapter), usage);
|
|
3359
3923
|
}
|
|
@@ -3366,8 +3930,11 @@ export default function (pi: ExtensionAPI) {
|
|
|
3366
3930
|
// Register /cache-optimizer command
|
|
3367
3931
|
// Subcommands:
|
|
3368
3932
|
// doctor — show current model/provider/api/baseUrl/compat status
|
|
3933
|
+
// with low-hit diagnosis
|
|
3934
|
+
// stats — show active model stats bucket, recent trend, usage
|
|
3369
3935
|
// compat — show compat suggestion with file path
|
|
3370
|
-
//
|
|
3936
|
+
// reset — reset current session model stats bucket (local only)
|
|
3937
|
+
// (no args) — interactive menu (with UI) or help summary
|
|
3371
3938
|
// ────────────────────────────────────────────────────────────────
|
|
3372
3939
|
pi.registerCommand("cache-optimizer", {
|
|
3373
3940
|
description: "Diagnose Pi cache configuration",
|
|
@@ -3380,7 +3947,27 @@ export default function (pi: ExtensionAPI) {
|
|
|
3380
3947
|
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
3381
3948
|
return;
|
|
3382
3949
|
}
|
|
3383
|
-
|
|
3950
|
+
const diagnosis = buildDoctorDiagnosis(model);
|
|
3951
|
+
const adapter = selectAdapterForModel(model);
|
|
3952
|
+
const sk = model ? sessionModelKey(model) : undefined;
|
|
3953
|
+
const statsState = sk ? cacheStatsByModel[sk] : undefined;
|
|
3954
|
+
const samples = sk ? getRecentSamples(sk) : [];
|
|
3955
|
+
const lowHitLines = buildLowHitDiagnosis(model, adapter, statsState, samples);
|
|
3956
|
+
const fullDiagnosis = lowHitLines.length > 0
|
|
3957
|
+
? diagnosis + "\n" + lowHitLines.join("\n")
|
|
3958
|
+
: diagnosis;
|
|
3959
|
+
cmdCtx.ui.notify(fullDiagnosis, "info");
|
|
3960
|
+
} else if (subcommand === "stats") {
|
|
3961
|
+
if (!model) {
|
|
3962
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
3963
|
+
return;
|
|
3964
|
+
}
|
|
3965
|
+
const adapter = selectAdapterForModel(model);
|
|
3966
|
+
const sk = model ? sessionModelKey(model) : undefined;
|
|
3967
|
+
const statsState = sk ? cacheStatsByModel[sk] : undefined;
|
|
3968
|
+
const samples = sk ? getRecentSamples(sk) : [];
|
|
3969
|
+
const output = buildStatsOutput(model, adapter, statsState, samples);
|
|
3970
|
+
cmdCtx.ui.notify(output, "info");
|
|
3384
3971
|
} else if (subcommand === "compat") {
|
|
3385
3972
|
if (!model) {
|
|
3386
3973
|
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
@@ -3397,12 +3984,46 @@ export default function (pi: ExtensionAPI) {
|
|
|
3397
3984
|
"info",
|
|
3398
3985
|
);
|
|
3399
3986
|
}
|
|
3987
|
+
} else if (subcommand === "reset") {
|
|
3988
|
+
if (!model) {
|
|
3989
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
3990
|
+
return;
|
|
3991
|
+
}
|
|
3992
|
+
const adapter = selectAdapterForModel(model);
|
|
3993
|
+
if (!adapter) {
|
|
3994
|
+
cmdCtx.ui.notify("ℹ️ Active model does not match a cache adapter. No stats to reset.", "info");
|
|
3995
|
+
return;
|
|
3996
|
+
}
|
|
3997
|
+
|
|
3998
|
+
const sk = sessionModelKey(model);
|
|
3999
|
+
const displayKey = modelKey(model);
|
|
4000
|
+
|
|
4001
|
+
// Reset session-scoped stats for the active model.
|
|
4002
|
+
delete cacheStatsByModel[sk];
|
|
4003
|
+
|
|
4004
|
+
// Clear recent samples for this session+model key.
|
|
4005
|
+
recentSamplesByModelKey.delete(sk);
|
|
4006
|
+
|
|
4007
|
+
// Persist immediately.
|
|
4008
|
+
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
4009
|
+
|
|
4010
|
+
// Update footer to show 0/0.
|
|
4011
|
+
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
4012
|
+
|
|
4013
|
+
cmdCtx.ui.notify(
|
|
4014
|
+
`✅ Reset local session cache stats for "${displayKey}". ` +
|
|
4015
|
+
"Upstream provider prompt cache was not modified. " +
|
|
4016
|
+
"New requests will start a fresh stats bucket for this Pi session.",
|
|
4017
|
+
"info",
|
|
4018
|
+
);
|
|
3400
4019
|
} else {
|
|
3401
4020
|
// Try interactive selection menu when UI supports it
|
|
3402
4021
|
if (cmdCtx.hasUI) {
|
|
3403
4022
|
const menuOptions = [
|
|
3404
4023
|
"🩺 Doctor — Show current model cache configuration",
|
|
4024
|
+
"📊 Stats — Show active model stats bucket and trend",
|
|
3405
4025
|
"⚙️ Compat — Show compat suggestion with edit instructions",
|
|
4026
|
+
"🔄 Reset — Reset local session stats for current model",
|
|
3406
4027
|
"❌ Cancel",
|
|
3407
4028
|
];
|
|
3408
4029
|
const choice = await cmdCtx.ui.select("Cache Optimizer", menuOptions);
|
|
@@ -3410,9 +4031,29 @@ export default function (pi: ExtensionAPI) {
|
|
|
3410
4031
|
if (!model) {
|
|
3411
4032
|
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
3412
4033
|
} else {
|
|
3413
|
-
|
|
4034
|
+
const diagnosis = buildDoctorDiagnosis(model);
|
|
4035
|
+
const adapter = selectAdapterForModel(model);
|
|
4036
|
+
const sk = model ? sessionModelKey(model) : undefined;
|
|
4037
|
+
const statsState = sk ? cacheStatsByModel[sk] : undefined;
|
|
4038
|
+
const samples = sk ? getRecentSamples(sk) : [];
|
|
4039
|
+
const lowHitLines = buildLowHitDiagnosis(model, adapter, statsState, samples);
|
|
4040
|
+
const fullDiagnosis = lowHitLines.length > 0
|
|
4041
|
+
? diagnosis + "\n" + lowHitLines.join("\n")
|
|
4042
|
+
: diagnosis;
|
|
4043
|
+
cmdCtx.ui.notify(fullDiagnosis, "info");
|
|
3414
4044
|
}
|
|
3415
4045
|
} else if (choice === menuOptions[1]) {
|
|
4046
|
+
if (!model) {
|
|
4047
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
4048
|
+
} else {
|
|
4049
|
+
const adapter = selectAdapterForModel(model);
|
|
4050
|
+
const sk = model ? sessionModelKey(model) : undefined;
|
|
4051
|
+
const statsState = sk ? cacheStatsByModel[sk] : undefined;
|
|
4052
|
+
const samples = sk ? getRecentSamples(sk) : [];
|
|
4053
|
+
const output = buildStatsOutput(model, adapter, statsState, samples);
|
|
4054
|
+
cmdCtx.ui.notify(output, "info");
|
|
4055
|
+
}
|
|
4056
|
+
} else if (choice === menuOptions[2]) {
|
|
3416
4057
|
if (!model) {
|
|
3417
4058
|
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
3418
4059
|
} else {
|
|
@@ -3428,6 +4069,27 @@ export default function (pi: ExtensionAPI) {
|
|
|
3428
4069
|
);
|
|
3429
4070
|
}
|
|
3430
4071
|
}
|
|
4072
|
+
} else if (choice === menuOptions[3]) {
|
|
4073
|
+
if (!model) {
|
|
4074
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
4075
|
+
} else {
|
|
4076
|
+
const adapter = selectAdapterForModel(model);
|
|
4077
|
+
if (!adapter) {
|
|
4078
|
+
cmdCtx.ui.notify("ℹ️ Active model does not match a cache adapter. No stats to reset.", "info");
|
|
4079
|
+
} else {
|
|
4080
|
+
const sk = sessionModelKey(model);
|
|
4081
|
+
const displayKey = modelKey(model);
|
|
4082
|
+
delete cacheStatsByModel[sk];
|
|
4083
|
+
recentSamplesByModelKey.delete(sk);
|
|
4084
|
+
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
4085
|
+
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
4086
|
+
cmdCtx.ui.notify(
|
|
4087
|
+
`✅ Reset local session cache stats for "${displayKey}". ` +
|
|
4088
|
+
"Upstream provider prompt cache was not modified.",
|
|
4089
|
+
"info",
|
|
4090
|
+
);
|
|
4091
|
+
}
|
|
4092
|
+
}
|
|
3431
4093
|
}
|
|
3432
4094
|
// choice === "cancel" or undefined → no action
|
|
3433
4095
|
return;
|
|
@@ -3436,18 +4098,21 @@ export default function (pi: ExtensionAPI) {
|
|
|
3436
4098
|
// Fallback: text help when no interactive UI
|
|
3437
4099
|
const diagnosis: string[] = [];
|
|
3438
4100
|
diagnosis.push("📋 /cache-optimizer commands:");
|
|
3439
|
-
diagnosis.push(" doctor — Show current model/provider/api/baseUrl/compat
|
|
4101
|
+
diagnosis.push(" doctor — Show current model/provider/api/baseUrl/compat and low-hit diagnosis");
|
|
4102
|
+
diagnosis.push(" stats — Show active model stats bucket and recent trend");
|
|
3440
4103
|
diagnosis.push(" compat — Show compat suggestion with edit location");
|
|
4104
|
+
diagnosis.push(" reset — Reset local session stats for current model (does not affect upstream)");
|
|
3441
4105
|
diagnosis.push("");
|
|
3442
4106
|
if (model) {
|
|
4107
|
+
const displayKey = modelKey(model);
|
|
3443
4108
|
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
3444
4109
|
if (missing.length > 0) {
|
|
3445
|
-
diagnosis.push(`⚠️ Active model "${
|
|
4110
|
+
diagnosis.push(`⚠️ Active model "${displayKey}" missing compat: ${missing.join(", ")}`);
|
|
3446
4111
|
diagnosis.push('Run "/cache-optimizer compat" for edit instructions.');
|
|
3447
4112
|
} else if (isCompatCheckApplicable(model)) {
|
|
3448
|
-
diagnosis.push(`✅ Active model "${
|
|
4113
|
+
diagnosis.push(`✅ Active model "${displayKey}": compat fully configured.`);
|
|
3449
4114
|
} else {
|
|
3450
|
-
diagnosis.push(`ℹ️ Active model "${
|
|
4115
|
+
diagnosis.push(`ℹ️ Active model "${displayKey}": compat check not applicable.`);
|
|
3451
4116
|
}
|
|
3452
4117
|
} else {
|
|
3453
4118
|
diagnosis.push("No active model selected.");
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-cache-optimizer",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.5.0",
|
|
4
4
|
"description": "Pi extension that improves provider-side KV/prompt cache hit rates (DeepSeek, OpenAI, Claude, Gemini) by reordering the system prompt, requesting long retention, and showing footer cache stats. Renamed from pi-deepseek-cache-optimizer.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|