pi-cache-optimizer 2.4.7 → 2.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -5
- package/README.zh-CN.md +48 -2
- package/index.ts +550 -17
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -304,11 +304,13 @@ The extension registers a Pi command `/cache-optimizer` for interactive diagnosi
|
|
|
304
304
|
```
|
|
305
305
|
/cache-optimizer — interactive menu (or text help when no UI)
|
|
306
306
|
/cache-optimizer doctor — show provider, model, API, base URL, compat status
|
|
307
|
+
and low-hit cause diagnosis
|
|
308
|
+
/cache-optimizer stats — show active model stats bucket and recent trend
|
|
307
309
|
/cache-optimizer compat — show compat suggestion with edit instructions
|
|
308
310
|
```
|
|
309
311
|
|
|
310
312
|
When run without arguments, `/cache-optimizer` shows an interactive selection menu
|
|
311
|
-
(Doctor / Compat / Cancel) when the Pi UI supports it (`ctx.ui.select`). In
|
|
313
|
+
(Doctor / Stats / Compat / Cancel) when the Pi UI supports it (`ctx.ui.select`). In
|
|
312
314
|
non-interactive terminals, it falls back to text help with current model compat
|
|
313
315
|
status.
|
|
314
316
|
|
|
@@ -339,10 +341,85 @@ Edit ~/.pi/agent/models.json -> providers["otokapi"] -> compat (same level as ba
|
|
|
339
341
|
|
|
340
342
|
### `/cache-optimizer compat`
|
|
341
343
|
|
|
342
|
-
Shows
|
|
343
|
-
provider path, and copyable JSON snippet. When no flags are missing,
|
|
344
|
-
`✅ Compat fully configured.` if the model is an applicable
|
|
345
|
-
or `ℹ️ Compat check not applicable for this model.`
|
|
344
|
+
Shows the compat suggestion for the active model, including file path,
|
|
345
|
+
provider path, and copyable JSON snippet. When no compat flags are missing,
|
|
346
|
+
it shows `✅ Compat fully configured.` if the model is an applicable
|
|
347
|
+
third-party proxy, or `ℹ️ Compat check not applicable for this model.`
|
|
348
|
+
otherwise.
|
|
349
|
+
|
|
350
|
+
### `/cache-optimizer stats`
|
|
351
|
+
|
|
352
|
+
Displays the active model's stats bucket (`provider/modelId`), today's request
|
|
353
|
+
count (hit/total), cached input tokens vs total input tokens, and the hit rate
|
|
354
|
+
percentage. Also shows recent trend summaries (last 10 and last 30 samples):
|
|
355
|
+
|
|
356
|
+
```text
|
|
357
|
+
Model key: otokapi/gpt-5.5
|
|
358
|
+
Adapter: OpenAI cache
|
|
359
|
+
|
|
360
|
+
── Today ──
|
|
361
|
+
Requests: 3 hit / 10 total · 30%
|
|
362
|
+
Cached tokens: 0.0015M / 0.005M input · 30%
|
|
363
|
+
|
|
364
|
+
── Recent trend ──
|
|
365
|
+
Recent 10/10: 3/10 hits · 30% tok cached
|
|
366
|
+
Recent 10/10: 3/10 hits · 30% tok cached
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
If the active model has no adapter match, a friendly message is shown. If
|
|
370
|
+
no samples have been recorded yet in this session, trend shows "no samples".
|
|
371
|
+
|
|
372
|
+
### Low-hit cause diagnosis
|
|
373
|
+
|
|
374
|
+
The `/cache-optimizer doctor` output includes a "Cache diagnosis" section
|
|
375
|
+
with prioritized low-hit cause analysis:
|
|
376
|
+
|
|
377
|
+
1. **Missing compat flags** — flags that enable prompt caching and session-affinity
|
|
378
|
+
routing are absent.
|
|
379
|
+
2. **Router/channel risk** — multi-backend routing may split the cache across
|
|
380
|
+
different upstream instances.
|
|
381
|
+
3. **Missing usage fields** — the proxy may not return prompt-level usage
|
|
382
|
+
fields, causing the footer to under-report hits.
|
|
383
|
+
4. **Recent low trend** — when today's cache hit rate is below 30%,
|
|
384
|
+
the diagnosis suggests proxy route instability or prompt prefix churn.
|
|
385
|
+
|
|
386
|
+
For fully configured models that still have low cache hit rates, the diagnosis
|
|
387
|
+
emphasizes sticky routing and upstream cache usage verification rather than
|
|
388
|
+
pointing to compat flags.
|
|
389
|
+
|
|
390
|
+
### Router/channel diagnostics
|
|
391
|
+
|
|
392
|
+
For models using OpenAI-compatible APIs (`openai-completions` or
|
|
393
|
+
`openai-responses`) through a non-official base URL, the extension detects
|
|
394
|
+
common router/channel proxy patterns from `provider`, `baseUrl`, and `compat`
|
|
395
|
+
metadata:
|
|
396
|
+
Vercel AI Gateway, LiteLLM, OneAPI/NewAPI/VoAPI, or a generic third-party
|
|
397
|
+
OpenAI-compatible proxy), both `doctor` and `compat` subcommands append
|
|
398
|
+
router/channel diagnostics with targeted recommendations.
|
|
399
|
+
|
|
400
|
+
### Router/channel diagnostics
|
|
401
|
+
|
|
402
|
+
For models using OpenAI-compatible APIs (`openai-completions` or
|
|
403
|
+
`openai-responses`) through a non-official base URL, the extension detects
|
|
404
|
+
common router/channel proxy patterns from `provider`, `baseUrl`, and `compat`
|
|
405
|
+
metadata:
|
|
406
|
+
|
|
407
|
+
| Profile | Detection | Recommendation |
|
|
408
|
+
|---------|-----------|----------------|
|
|
409
|
+
| **OpenRouter** | baseUrl or provider contains `openrouter`/`openrouter.ai` | Fix the upstream provider with `openRouterRouting.only` or `.order` in compat |
|
|
410
|
+
| **Vercel AI Gateway** | baseUrl contains `ai-gateway.vercel.sh` or provider contains `vercel` | Fix the upstream with `vercelGatewayRouting.only` or `.order` in compat |
|
|
411
|
+
| **LiteLLM / OneAPI / NewAPI / VoAPI** | baseUrl or provider contains `litellm`, `oneapi`/`one-api`, `newapi`/`new-api`, `voapi`/`vo-api` | Ensure sticky session routing, forward `prompt_cache_key` + session-affinity headers, return cache usage fields |
|
|
412
|
+
| **Generic third-party proxy** | Any `openai-completions` model with non-official base URL not matching above | General guidance: verify single-upstream routing, forward `prompt_cache_key` + session-affinity headers, return cache usage |
|
|
413
|
+
|
|
414
|
+
These diagnostics are **advisory only**. They do not participate in adapter
|
|
415
|
+
selection (still id/name-only), prompt_cache_key injection, footer stats, or
|
|
416
|
+
any automated configuration changes. Detection uses only metadata exposed by
|
|
417
|
+
Pi (`provider`, `api`, `baseUrl`, `compat`) — no API keys, prompts, payloads,
|
|
418
|
+
headers, or model outputs are read or exposed.
|
|
419
|
+
|
|
420
|
+
Official OpenAI (`api.openai.com`) and custom transports (`kiro-api`,
|
|
421
|
+
`anthropic-messages`, `bedrock-converse-stream`) are excluded from router/
|
|
422
|
+
channel diagnostics.
|
|
346
423
|
|
|
347
424
|
### Security
|
|
348
425
|
|
package/README.zh-CN.md
CHANGED
|
@@ -298,15 +298,44 @@ Gemini cache 1/2 · 0.18M/0.50M tok (36%)
|
|
|
298
298
|
```
|
|
299
299
|
/cache-optimizer — 交互菜单(无 UI 时显示文字帮助)
|
|
300
300
|
/cache-optimizer doctor — 显示 provider、model、API、base URL、compat 状态
|
|
301
|
+
及低命中原因诊断
|
|
302
|
+
/cache-optimizer stats — 显示当前模型的 stats 桶和近期趋势
|
|
301
303
|
/cache-optimizer compat — 显示 compat 建议和编辑说明
|
|
302
304
|
```
|
|
303
305
|
|
|
304
|
-
不带参数时,当 Pi UI 支持时(`ctx.ui.select` 可用),`/cache-optimizer` 会显示交互选择菜单(Doctor / Compat / Cancel)。在非交互终端中,会回退到文字帮助和当前模型 compat 状态。
|
|
306
|
+
不带参数时,当 Pi UI 支持时(`ctx.ui.select` 可用),`/cache-optimizer` 会显示交互选择菜单(Doctor / Stats / Compat / Cancel)。在非交互终端中,会回退到文字帮助和当前模型 compat 状态。
|
|
305
307
|
|
|
306
308
|
### `/cache-optimizer doctor`
|
|
307
309
|
|
|
308
310
|
显示当前模型的 provider、model id、名称、API 类型、base URL、当前 `compat` 标志以及缺少的缓存/session-affinity 标志。如果缺少标志,还会显示可复制的 JSON 片段和精确编辑位置。
|
|
309
311
|
|
|
312
|
+
输出中还会包含 "Cache diagnosis"(缓存诊断)章节,按优先级分析低命中原因:
|
|
313
|
+
1. **缺少 compat 标志** — 缺少启用 prompt 缓存和 session-affinity 路由的标志。
|
|
314
|
+
2. **路由/渠道风险** — 多后端路由可能导致缓存分散到不同上游实例。
|
|
315
|
+
3. **缺少 usage 字段** — 代理可能未返回 prompt 层级的使用情况字段,导致 footer 低估命中率。
|
|
316
|
+
4. **近期趋势低** — 当今日缓存命中率低于 30% 时,诊断提示代理路由不稳定或 prompt 前缀变化。
|
|
317
|
+
|
|
318
|
+
对于已完整配置但命中率仍低的模型,诊断会重点提示粘性路由和上游缓存使用验证,而非 compat 标志。
|
|
319
|
+
|
|
320
|
+
### `/cache-optimizer stats`
|
|
321
|
+
|
|
322
|
+
显示当前模型的 stats 桶(`provider/modelId`),今日请求计数(命中/总数)、缓存输入令牌 vs 总输入令牌及命中率百分比。同时显示近期趋势摘要(最近 10 条和最近 30 条样本):
|
|
323
|
+
|
|
324
|
+
```text
|
|
325
|
+
Model key: otokapi/gpt-5.5
|
|
326
|
+
Adapter: OpenAI cache
|
|
327
|
+
|
|
328
|
+
── Today ──
|
|
329
|
+
Requests: 3 hit / 10 total · 30%
|
|
330
|
+
Cached tokens: 0.0015M / 0.005M input · 30%
|
|
331
|
+
|
|
332
|
+
── Recent trend ──
|
|
333
|
+
Recent 10/10: 3/10 hits · 30% tok cached
|
|
334
|
+
Recent 10/10: 3/10 hits · 30% tok cached
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
如果当前模型没有匹配的 adapter,显示友好提示。如果尚未记录样本,趋势显示 "no samples"。
|
|
338
|
+
|
|
310
339
|
如果所有 compat 标志都已配置且适用(第三方 `openai-completions` 代理),输出显示 `✅ Compat fully configured.`。对于不适用 compat 检查的模型(官方 OpenAI、非 `openai-completions` API、custom transport),显示 `ℹ️ Compat check not applicable for this model.`:
|
|
311
340
|
|
|
312
341
|
```text
|
|
@@ -325,7 +354,24 @@ Edit ~/.pi/agent/models.json -> providers["otokapi"] -> compat (same level as ba
|
|
|
325
354
|
|
|
326
355
|
### `/cache-optimizer compat`
|
|
327
356
|
|
|
328
|
-
|
|
357
|
+
显示当前模型的 compat 建议,包括文件路径、provider 路径和可复制 JSON 片段。当没有缺失的 compat 标志时,如果模型是适用的第三方代理则显示 `✅ Compat fully configured.`,否则显示 `ℹ️ Compat check not applicable for this model.`。
|
|
358
|
+
|
|
359
|
+
当模型通过已知的路由器/通道代理(OpenRouter、Vercel AI Gateway、LiteLLM、OneAPI/NewAPI/VoAPI 或通用第三方 OpenAI-compatible 代理)时,`doctor` 和 `compat` 子命令都会附加路由/通道诊断信息和建议。
|
|
360
|
+
|
|
361
|
+
### 路由/通道诊断
|
|
362
|
+
|
|
363
|
+
对于通过非官方 base URL 使用 OpenAI-compatible API(`openai-completions` 或 `openai-responses`)的模型,扩展会从 `provider`、`baseUrl` 和 `compat` 元数据中检测常见的路由/通道代理模式:
|
|
364
|
+
|
|
365
|
+
| 类型 | 检测方式 | 建议 |
|
|
366
|
+
|------|----------|------|
|
|
367
|
+
| **OpenRouter** | baseUrl 或 provider 包含 `openrouter`/`openrouter.ai` | 在 compat 中用 `openRouterRouting.only` 或 `.order` 固定上游 provider |
|
|
368
|
+
| **Vercel AI Gateway** | baseUrl 包含 `ai-gateway.vercel.sh` 或 provider 包含 `vercel` | 在 compat 中用 `vercelGatewayRouting.only` 或 `.order` 固定上游 |
|
|
369
|
+
| **LiteLLM / OneAPI / NewAPI / VoAPI** | baseUrl 或 provider 包含 `litellm`、`oneapi`/`one-api`、`newapi`/`new-api`、`voapi`/`vo-api` | 确保每 session 固定路由,转发 `prompt_cache_key` + session-affinity headers,返回缓存用量字段 |
|
|
370
|
+
| **通用第三方代理** | 任何非官方 base URL 的 `openai-completions` 模型,且不匹配以上类型 | 通用建议:验证单上游路由、转发 `prompt_cache_key` + session-affinity headers、返回缓存用量 |
|
|
371
|
+
|
|
372
|
+
这些诊断**仅用于建议**。它们不参与 adapter selection(仍基于 id/name)、不参与 `prompt_cache_key` 注入、不参与 footer 统计、也不做任何自动化配置修改。检测仅使用 Pi 暴露的元数据(`provider`、`api`、`baseUrl`、`compat`),不会读取或暴露 API key、prompt、payload、headers 或模型输出。
|
|
373
|
+
|
|
374
|
+
官方 OpenAI(`api.openai.com`)和 custom transport(`kiro-api`、`anthropic-messages`、`bedrock-converse-stream`)不会触发路由/通道诊断。
|
|
329
375
|
|
|
330
376
|
### 安全说明
|
|
331
377
|
|
package/index.ts
CHANGED
|
@@ -147,6 +147,23 @@ type OptimizedSystemPrompt = {
|
|
|
147
147
|
changed: boolean;
|
|
148
148
|
};
|
|
149
149
|
|
|
150
|
+
/**
|
|
151
|
+
* Per-request sample stored for trend analysis and usage-field-missing detection.
|
|
152
|
+
* Contains only numeric counters and booleans — never message content, prompts,
|
|
153
|
+
* payloads, headers, API keys, or model outputs.
|
|
154
|
+
*/
|
|
155
|
+
type CacheUsageSample = {
|
|
156
|
+
timestamp: number;
|
|
157
|
+
hit: boolean;
|
|
158
|
+
cachedInputTokens: number;
|
|
159
|
+
cacheWriteInputTokens: number;
|
|
160
|
+
totalInputTokens: number;
|
|
161
|
+
missingUsageFields: boolean;
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
/** Maximum number of recent samples kept per model key (in-memory only, not persisted). */
|
|
165
|
+
const MAX_RECENT_SAMPLES = 50;
|
|
166
|
+
|
|
150
167
|
type CacheProviderAdapter = {
|
|
151
168
|
id: CacheProviderId;
|
|
152
169
|
label: string;
|
|
@@ -1141,6 +1158,10 @@ function modelKey(model: PiModel): string {
|
|
|
1141
1158
|
return `${model.provider}/${model.id}`;
|
|
1142
1159
|
}
|
|
1143
1160
|
|
|
1161
|
+
function keyForModelExt(model: { provider: string; id: string }): string {
|
|
1162
|
+
return `${model.provider}/${model.id}`;
|
|
1163
|
+
}
|
|
1164
|
+
|
|
1144
1165
|
function usageRecordFromAssistant(message: unknown): UnknownRecord | undefined {
|
|
1145
1166
|
return asRecord(getAssistantRecord(message)?.usage);
|
|
1146
1167
|
}
|
|
@@ -2473,6 +2494,119 @@ function formatCacheStats(adapter: CacheProviderAdapter, stats: CacheStats): str
|
|
|
2473
2494
|
return `${adapter.label} ${stats.hitRequests}/${stats.totalRequests} · ${formatTokenCount(stats.cachedInputTokens)}/${formatTokenCount(stats.totalInputTokens)} tok${percent}${writeText}`;
|
|
2474
2495
|
}
|
|
2475
2496
|
|
|
2497
|
+
/**
|
|
2498
|
+
* Compute a hit-ratio percentage string for a value between 0 and 1.
|
|
2499
|
+
* Returns e.g. "75%", "0%", "100%", or "N/A" for zero total.
|
|
2500
|
+
*/
|
|
2501
|
+
function formatHitRatio(hits: number, total: number): string {
|
|
2502
|
+
if (total <= 0) return "N/A";
|
|
2503
|
+
return `${Math.round((hits / total) * 100)}%`;
|
|
2504
|
+
}
|
|
2505
|
+
|
|
2506
|
+
/**
|
|
2507
|
+
* Format a token-to-M abbreviation for stats output.
|
|
2508
|
+
* Example: 1500000 → "1.50M"
|
|
2509
|
+
*/
|
|
2510
|
+
function formatTokenM(value: number): string {
|
|
2511
|
+
const millions = Math.max(0, Math.round(value)) / 1_000_000;
|
|
2512
|
+
if (millions === 0) return "0";
|
|
2513
|
+
if (millions < 0.01) return millions.toFixed(4);
|
|
2514
|
+
if (millions >= 10) return millions.toFixed(1);
|
|
2515
|
+
return millions.toFixed(2);
|
|
2516
|
+
}
|
|
2517
|
+
|
|
2518
|
+
/**
|
|
2519
|
+
* Check if an assistant message's usage fields appear to be missing or empty.
|
|
2520
|
+
* Returns true when Pi-normalized fields (input, cacheRead, cacheWrite) are all
|
|
2521
|
+
* absent/zero AND raw usage fields (prompt_tokens, etc.) are also absent/zero
|
|
2522
|
+
* for the given adapter.
|
|
2523
|
+
*/
|
|
2524
|
+
function hasMissingUsageFields(message: unknown, adapter: CacheProviderAdapter): boolean {
|
|
2525
|
+
const usage = usageRecordFromAssistant(message);
|
|
2526
|
+
if (!usage) return true;
|
|
2527
|
+
|
|
2528
|
+
// Check Pi-normalized fields
|
|
2529
|
+
const input = getNonNegativeNumber(usage, "input");
|
|
2530
|
+
const cacheRead = getNonNegativeNumber(usage, "cacheRead");
|
|
2531
|
+
const cacheWrite = getNonNegativeNumber(usage, "cacheWrite");
|
|
2532
|
+
|
|
2533
|
+
// If Pi-normalized fields exist with non-zero values, usage is present
|
|
2534
|
+
if (cacheRead !== undefined || cacheWrite !== undefined || (input !== undefined && input > 0)) {
|
|
2535
|
+
return false;
|
|
2536
|
+
}
|
|
2537
|
+
|
|
2538
|
+
// Check raw usage for the adapter's provider family
|
|
2539
|
+
const rawUsage = adapter.normalizeUsage(message);
|
|
2540
|
+
if (!rawUsage || (rawUsage.cacheRead === 0 && rawUsage.cacheWrite === 0 && rawUsage.totalInput === 0)) {
|
|
2541
|
+
return true;
|
|
2542
|
+
}
|
|
2543
|
+
|
|
2544
|
+
return false;
|
|
2545
|
+
}
|
|
2546
|
+
|
|
2547
|
+
/**
|
|
2548
|
+
* Build a summary string for the recent trend (last N samples).
|
|
2549
|
+
* Example: "Recent 10: 7/10 hits · 65% tok cached · no missing usage"
|
|
2550
|
+
*/
|
|
2551
|
+
function formatRecentTrendSummary(samples: CacheUsageSample[], maxCount: number): string {
|
|
2552
|
+
const recent = samples.slice(-maxCount);
|
|
2553
|
+
if (recent.length === 0) return `Recent ${maxCount}: no samples yet`;
|
|
2554
|
+
|
|
2555
|
+
const hits = recent.filter((s) => s.hit).length;
|
|
2556
|
+
const totalCached = recent.reduce((sum, s) => sum + s.cachedInputTokens, 0);
|
|
2557
|
+
const totalInput = recent.reduce((sum, s) => sum + s.totalInputTokens, 0);
|
|
2558
|
+
const missingCount = recent.filter((s) => s.missingUsageFields).length;
|
|
2559
|
+
|
|
2560
|
+
const hitRatio = formatHitRatio(hits, recent.length);
|
|
2561
|
+
const tokenRatio = totalInput > 0 ? formatHitRatio(totalCached, totalInput) : "N/A";
|
|
2562
|
+
|
|
2563
|
+
let result = `Recent ${recent.length}/${maxCount}: ${hits}/${recent.length} hits · ${tokenRatio} tok cached`;
|
|
2564
|
+
if (missingCount > 0) {
|
|
2565
|
+
result += ` · ${missingCount} missing usage`;
|
|
2566
|
+
}
|
|
2567
|
+
return result;
|
|
2568
|
+
}
|
|
2569
|
+
|
|
2570
|
+
/**
|
|
2571
|
+
* Build the output for `/cache-optimizer stats`.
|
|
2572
|
+
*/
|
|
2573
|
+
function buildStatsOutput(model: PiModel | undefined, adapter: CacheProviderAdapter | undefined, stats: CacheStats | undefined, recentSamples: CacheUsageSample[]): string {
|
|
2574
|
+
const lines: string[] = [];
|
|
2575
|
+
|
|
2576
|
+
if (!model || !adapter) {
|
|
2577
|
+
lines.push("ℹ️ No cache-adapter-matched model active. Select a model with a recognized provider family.");
|
|
2578
|
+
return lines.join("\n");
|
|
2579
|
+
}
|
|
2580
|
+
|
|
2581
|
+
const key = modelKey(model);
|
|
2582
|
+
const currentStats = stats ?? emptyCacheStats();
|
|
2583
|
+
|
|
2584
|
+
lines.push(`Model key: ${key}`);
|
|
2585
|
+
lines.push(`Adapter: ${adapter.label}`);
|
|
2586
|
+
lines.push("");
|
|
2587
|
+
lines.push("── Today ──");
|
|
2588
|
+
lines.push(`Requests: ${currentStats.hitRequests} hit / ${currentStats.totalRequests} total · ${formatHitRatio(currentStats.hitRequests, currentStats.totalRequests)}`);
|
|
2589
|
+
lines.push(`Cached tokens: ${formatTokenM(currentStats.cachedInputTokens)}M / ${formatTokenM(currentStats.totalInputTokens)}M input · ${currentStats.totalInputTokens > 0 ? `${Math.round((currentStats.cachedInputTokens / currentStats.totalInputTokens) * 100)}%` : "N/A"}`);
|
|
2590
|
+
if (currentStats.cacheWriteInputTokens > 0) {
|
|
2591
|
+
lines.push(`Cache write: ${formatTokenM(currentStats.cacheWriteInputTokens)}M tok`);
|
|
2592
|
+
}
|
|
2593
|
+
|
|
2594
|
+
lines.push("");
|
|
2595
|
+
lines.push("── Recent trend ──");
|
|
2596
|
+
lines.push(formatRecentTrendSummary(recentSamples, 10));
|
|
2597
|
+
lines.push(formatRecentTrendSummary(recentSamples, 30));
|
|
2598
|
+
|
|
2599
|
+
// Check if any sample has missingUsageFields flagged
|
|
2600
|
+
const missingAny = recentSamples.some((s) => s.missingUsageFields);
|
|
2601
|
+
if (missingAny) {
|
|
2602
|
+
lines.push("");
|
|
2603
|
+
lines.push("⚠️ Some recent responses had missing or empty cache usage fields. Footer may under-report hits.");
|
|
2604
|
+
lines.push(" The proxy may not return prompt_cache_hit_tokens or usage.input/cacheRead in responses.");
|
|
2605
|
+
}
|
|
2606
|
+
|
|
2607
|
+
return lines.join("\n");
|
|
2608
|
+
}
|
|
2609
|
+
|
|
2476
2610
|
function getErrorCode(error: unknown): string | undefined {
|
|
2477
2611
|
return typeof error === "object" && error !== null && "code" in error
|
|
2478
2612
|
? String((error as { code?: unknown }).code)
|
|
@@ -2621,6 +2755,171 @@ function isCompatCheckApplicable(model: PiModel): boolean {
|
|
|
2621
2755
|
return lower(model.api) === "openai-completions" && !isOfficialOpenAIBaseUrl(model);
|
|
2622
2756
|
}
|
|
2623
2757
|
|
|
2758
|
+
/**
|
|
2759
|
+
* Detect router / channel profiles from a PiModel and return diagnostic notes.
|
|
2760
|
+
*
|
|
2761
|
+
* This function is advisory only — it does NOT participate in adapter selection,
|
|
2762
|
+
* prompt_cache_key injection, or footer stats. It inspects provider, api, baseUrl,
|
|
2763
|
+
* and compat to identify common proxy/router patterns where cache performance may
|
|
2764
|
+
* be degraded due to multi-backend routing.
|
|
2765
|
+
*
|
|
2766
|
+
* Known profiles (checked in order):
|
|
2767
|
+
* 1. OpenRouter — baseUrl or provider id matching openrouter.ai / openrouter
|
|
2768
|
+
* 2. Vercel AI Gateway — baseUrl matching ai-gateway.vercel.sh, or provider
|
|
2769
|
+
* matching vercel / vercel-ai-gateway
|
|
2770
|
+
* 3. LiteLLM / OneAPI / NewAPI / VoAPI — baseUrl or provider matching litellm,
|
|
2771
|
+
* oneapi, one-api, newapi, new-api, voapi, vo-api (self-hosted aggregation)
|
|
2772
|
+
* 4. Generic third-party OpenAI-compatible proxy — any openai-completions model
|
|
2773
|
+
* with a non-official base URL that does not match a higher-profile above.
|
|
2774
|
+
*
|
|
2775
|
+
* Official OpenAI (api.openai.com) and custom transports (kiro-api, anthropic-messages,
|
|
2776
|
+
* bedrock-converse-stream) do NOT produce notes.
|
|
2777
|
+
*/
|
|
2778
|
+
function describeRouterChannelDiagnostics(model: PiModel): string[] {
|
|
2779
|
+
const notes: string[] = [];
|
|
2780
|
+
const api = lower(model.api);
|
|
2781
|
+
const baseUrl = lower(model.baseUrl || "");
|
|
2782
|
+
const provider = lower(model.provider);
|
|
2783
|
+
|
|
2784
|
+
// Only OpenAI-compatible APIs are applicable for router/channel diagnostics.
|
|
2785
|
+
// Custom transports like kiro-api, anthropic-messages, bedrock-converse-stream
|
|
2786
|
+
// or non-OpenAI APIs are excluded.
|
|
2787
|
+
if (api !== "openai-completions" && api !== "openai-responses") {
|
|
2788
|
+
return notes;
|
|
2789
|
+
}
|
|
2790
|
+
|
|
2791
|
+
// Official OpenAI bypass — no notes needed.
|
|
2792
|
+
if (isOfficialOpenAIBaseUrl(model)) {
|
|
2793
|
+
return notes;
|
|
2794
|
+
}
|
|
2795
|
+
|
|
2796
|
+
// ── 1. OpenRouter ────────────────────────────────────────────────
|
|
2797
|
+
if (
|
|
2798
|
+
baseUrl.includes("openrouter.ai") ||
|
|
2799
|
+
baseUrl.includes("openrouter") ||
|
|
2800
|
+
provider.includes("openrouter")
|
|
2801
|
+
) {
|
|
2802
|
+
const compat = getCompat(model);
|
|
2803
|
+
const hasOnly = !!(compat as Record<string, unknown>)["openRouterRouting"]?.only;
|
|
2804
|
+
const hasOrder = !!(compat as Record<string, unknown>)["openRouterRouting"]?.order;
|
|
2805
|
+
|
|
2806
|
+
notes.push(
|
|
2807
|
+
"🔀 Router/channel: OpenRouter detected. OpenRouter is a multi-provider router; " +
|
|
2808
|
+
"low cache hit rates are common when each turn lands on a different upstream provider.",
|
|
2809
|
+
);
|
|
2810
|
+
|
|
2811
|
+
if (!hasOnly && !hasOrder) {
|
|
2812
|
+
notes.push(
|
|
2813
|
+
" Suggestion: Add an openRouterRouting config to fix the upstream provider. " +
|
|
2814
|
+
"Example for models.json -> providers[\"<providerId>\"] -> compat:",
|
|
2815
|
+
);
|
|
2816
|
+
notes.push(
|
|
2817
|
+
` { "sendSessionAffinityHeaders": true, "supportsLongCacheRetention": true, ` +
|
|
2818
|
+
`"openRouterRouting": { "only": ["<provider-slug>"] } }`,
|
|
2819
|
+
);
|
|
2820
|
+
notes.push(
|
|
2821
|
+
' Replace <provider-slug> with the actual OpenRouter provider slug (e.g. "openai", "anthropic").',
|
|
2822
|
+
);
|
|
2823
|
+
notes.push(
|
|
2824
|
+
" Alternatively, use openRouterRouting.order: [\"<provider-slug>\", \"...\"] for fallback order. " +
|
|
2825
|
+
"Only set supportsLongCacheRetention if your upstream supports long cache retention.",
|
|
2826
|
+
);
|
|
2827
|
+
}
|
|
2828
|
+
|
|
2829
|
+
return notes;
|
|
2830
|
+
}
|
|
2831
|
+
|
|
2832
|
+
// ── 2. Vercel AI Gateway ─────────────────────────────────────────
|
|
2833
|
+
if (
|
|
2834
|
+
baseUrl.includes("ai-gateway.vercel.sh") ||
|
|
2835
|
+
provider.includes("vercel") ||
|
|
2836
|
+
provider.includes("vercel-ai-gateway")
|
|
2837
|
+
) {
|
|
2838
|
+
const compat = getCompat(model);
|
|
2839
|
+
const hasOnly = !!(compat as Record<string, unknown>)["vercelGatewayRouting"]?.only;
|
|
2840
|
+
const hasOrder = !!(compat as Record<string, unknown>)["vercelGatewayRouting"]?.order;
|
|
2841
|
+
|
|
2842
|
+
notes.push(
|
|
2843
|
+
"🔀 Router/channel: Vercel AI Gateway detected. The gateway may route to different " +
|
|
2844
|
+
"provider endpoints per request, reducing cache locality.",
|
|
2845
|
+
);
|
|
2846
|
+
|
|
2847
|
+
if (!hasOnly && !hasOrder) {
|
|
2848
|
+
notes.push(
|
|
2849
|
+
" Suggestion: Add a vercelGatewayRouting config to fix the upstream. " +
|
|
2850
|
+
"Example for models.json -> providers[\"<providerId>\"] -> compat:",
|
|
2851
|
+
);
|
|
2852
|
+
notes.push(
|
|
2853
|
+
` { "sendSessionAffinityHeaders": true, "supportsLongCacheRetention": true, ` +
|
|
2854
|
+
`"vercelGatewayRouting": { "only": ["<provider-id>"] } }`,
|
|
2855
|
+
);
|
|
2856
|
+
notes.push(
|
|
2857
|
+
" Replace <provider-id> with the actual Vercel provider ID (e.g. \"openai\").",
|
|
2858
|
+
);
|
|
2859
|
+
notes.push(
|
|
2860
|
+
" Only set supportsLongCacheRetention if your upstream supports it.",
|
|
2861
|
+
);
|
|
2862
|
+
}
|
|
2863
|
+
|
|
2864
|
+
return notes;
|
|
2865
|
+
}
|
|
2866
|
+
|
|
2867
|
+
// ── 3. LiteLLM / OneAPI / NewAPI / VoAPI (self-hosted aggregation) ──
|
|
2868
|
+
const aggregationPatterns = ["litellm", "oneapi", "one-api", "newapi", "new-api", "voapi", "vo-api"];
|
|
2869
|
+
if (
|
|
2870
|
+
aggregationPatterns.some((p) => baseUrl.includes(p)) ||
|
|
2871
|
+
aggregationPatterns.some((p) => provider.includes(p))
|
|
2872
|
+
) {
|
|
2873
|
+
notes.push(
|
|
2874
|
+
"🔀 Router/channel: Self-hosted aggregation proxy detected (LiteLLM / OneAPI / NewAPI / VoAPI). " +
|
|
2875
|
+
"These proxies route to multiple upstream accounts or instances, which can split the cache.",
|
|
2876
|
+
);
|
|
2877
|
+
notes.push(
|
|
2878
|
+
" Suggestions:",
|
|
2879
|
+
);
|
|
2880
|
+
notes.push(
|
|
2881
|
+
" • Ensure the proxy can fix to a single upstream per session (session_id affinity).",
|
|
2882
|
+
);
|
|
2883
|
+
notes.push(
|
|
2884
|
+
" • Forward prompt_cache_key and session-affinity headers to the upstream.",
|
|
2885
|
+
);
|
|
2886
|
+
notes.push(
|
|
2887
|
+
" • Return cache usage fields (prompt_cache_hit_tokens, etc.) in the response.",
|
|
2888
|
+
);
|
|
2889
|
+
notes.push(
|
|
2890
|
+
` Example compat: { "sendSessionAffinityHeaders": true, "supportsLongCacheRetention": true }`,
|
|
2891
|
+
);
|
|
2892
|
+
|
|
2893
|
+
return notes;
|
|
2894
|
+
}
|
|
2895
|
+
|
|
2896
|
+
// ── 4. Generic third-party OpenAI-compatible proxy ─────────────────
|
|
2897
|
+
if (api === "openai-completions" && baseUrl) {
|
|
2898
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2899
|
+
notes.push(
|
|
2900
|
+
"🔀 Router/channel: Third-party OpenAI-compatible proxy. If cache hit rates are low:",
|
|
2901
|
+
);
|
|
2902
|
+
notes.push(
|
|
2903
|
+
" • Verify the proxy routes to the same upstream account/instance per session.",
|
|
2904
|
+
);
|
|
2905
|
+
notes.push(
|
|
2906
|
+
" • Ensure the proxy forwards prompt_cache_key and sends session-affinity headers.",
|
|
2907
|
+
);
|
|
2908
|
+
notes.push(
|
|
2909
|
+
" • Check that the proxy returns cache usage fields (prompt_cache_hit_tokens etc.).",
|
|
2910
|
+
);
|
|
2911
|
+
if (missing.length > 0) {
|
|
2912
|
+
notes.push(
|
|
2913
|
+
` • The compat flags above (${missing.join(", ")}) are recommended for cache stability.`,
|
|
2914
|
+
);
|
|
2915
|
+
}
|
|
2916
|
+
|
|
2917
|
+
return notes;
|
|
2918
|
+
}
|
|
2919
|
+
|
|
2920
|
+
return notes;
|
|
2921
|
+
}
|
|
2922
|
+
|
|
2624
2923
|
function buildDoctorDiagnosis(model: PiModel): string {
|
|
2625
2924
|
const lines: string[] = [];
|
|
2626
2925
|
lines.push(`Provider: ${model.provider}`);
|
|
@@ -2648,6 +2947,15 @@ function buildDoctorDiagnosis(model: PiModel): string {
|
|
|
2648
2947
|
lines.push("ℹ️ Compat check not applicable for this model.");
|
|
2649
2948
|
}
|
|
2650
2949
|
|
|
2950
|
+
// ── Router/channel diagnostics ──
|
|
2951
|
+
const routerNotes = describeRouterChannelDiagnostics(model);
|
|
2952
|
+
if (routerNotes.length > 0) {
|
|
2953
|
+
lines.push("");
|
|
2954
|
+
for (const note of routerNotes) {
|
|
2955
|
+
lines.push(note);
|
|
2956
|
+
}
|
|
2957
|
+
}
|
|
2958
|
+
|
|
2651
2959
|
// ── Integrity diagnostics ──
|
|
2652
2960
|
if (lastPromptIntegrityWarningAt > 0) {
|
|
2653
2961
|
const ago = Date.now() - lastPromptIntegrityWarningAt;
|
|
@@ -2668,23 +2976,157 @@ function buildDoctorDiagnosis(model: PiModel): string {
|
|
|
2668
2976
|
return lines.join("\n");
|
|
2669
2977
|
}
|
|
2670
2978
|
|
|
2979
|
+
/**
|
|
2980
|
+
* Build a "Cache diagnosis" section for low-hit causes, appended to doctor output.
|
|
2981
|
+
* This is a separate function because it depends on per-session state (recent samples,
|
|
2982
|
+
* per-model stats) that is not available at the module level.
|
|
2983
|
+
*/
|
|
2984
|
+
function buildLowHitDiagnosis(
|
|
2985
|
+
model: PiModel,
|
|
2986
|
+
adapter: CacheProviderAdapter | undefined,
|
|
2987
|
+
stats: CacheStats | undefined,
|
|
2988
|
+
samples: CacheUsageSample[],
|
|
2989
|
+
): string[] {
|
|
2990
|
+
const lines: string[] = [];
|
|
2991
|
+
|
|
2992
|
+
// 1. Missing compat flags (reuse existing check)
|
|
2993
|
+
const missingCompat = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2994
|
+
|
|
2995
|
+
// 2. Router/channel risk (reuse existing check)
|
|
2996
|
+
const routerNotes = describeRouterChannelDiagnostics(model);
|
|
2997
|
+
|
|
2998
|
+
// 3. Recent samples missing usage fields
|
|
2999
|
+
const missingUsageSamples = samples.filter((s) => s.missingUsageFields).length;
|
|
3000
|
+
|
|
3001
|
+
// 4. Recent trend analysis
|
|
3002
|
+
const recent10 = samples.slice(-10);
|
|
3003
|
+
const recent10Hits = recent10.filter((s) => s.hit).length;
|
|
3004
|
+
const recent10Total = recent10.length;
|
|
3005
|
+
const recent10Cached = recent10.reduce((sum, s) => sum + s.cachedInputTokens, 0);
|
|
3006
|
+
const recent10Input = recent10.reduce((sum, s) => sum + s.totalInputTokens, 0);
|
|
3007
|
+
|
|
3008
|
+
// 5. Today's overall trend from persisted stats
|
|
3009
|
+
const todayStats = stats ?? emptyCacheStats();
|
|
3010
|
+
|
|
3011
|
+
const hasMissingCompat = missingCompat.length > 0;
|
|
3012
|
+
const hasRouterRisk = routerNotes.length > 0;
|
|
3013
|
+
const hasUsageMissing = missingUsageSamples > 0;
|
|
3014
|
+
|
|
3015
|
+
// Determine if there are actual issues worth flagging
|
|
3016
|
+
const hasActualIssues = hasMissingCompat || hasUsageMissing ||
|
|
3017
|
+
// Low hit trend (today total > 3 and hit ratio < 30%)
|
|
3018
|
+
(todayStats.totalRequests > 3 && todayStats.totalInputTokens > 0 &&
|
|
3019
|
+
(todayStats.cachedInputTokens / todayStats.totalInputTokens) < 0.3) ||
|
|
3020
|
+
// Low hit rate in recent samples (recent10Total >= 3 and all misses)
|
|
3021
|
+
(recent10Total >= 3 && recent10Hits === 0);
|
|
3022
|
+
|
|
3023
|
+
// Skip section if no issues
|
|
3024
|
+
if (!hasActualIssues && !(hasRouterRisk && (hasMissingCompat || hasUsageMissing))) {
|
|
3025
|
+
return lines;
|
|
3026
|
+
}
|
|
3027
|
+
|
|
3028
|
+
lines.push("");
|
|
3029
|
+
lines.push("── Cache diagnosis ──");
|
|
3030
|
+
|
|
3031
|
+
// Priority 1: missing compat flags
|
|
3032
|
+
if (hasMissingCompat) {
|
|
3033
|
+
lines.push(`⚠️ Missing compat flags: ${missingCompat.join(", ")}`);
|
|
3034
|
+
lines.push(" These flags enable prompt caching and session-affinity routing.");
|
|
3035
|
+
lines.push(" Run /cache-optimizer compat for edit instructions.");
|
|
3036
|
+
}
|
|
3037
|
+
|
|
3038
|
+
// Priority 2: router/channel risk (only flag when there are other issues)
|
|
3039
|
+
// Router notes are already shown in the main doctor output, so we only
|
|
3040
|
+
// mention them in the diagnosis section when they compound a problem.
|
|
3041
|
+
if (hasRouterRisk && (hasMissingCompat || hasUsageMissing || hasActualIssues)) {
|
|
3042
|
+
lines.push("🔀 Router/channel proxy detected — see routing notes above.");
|
|
3043
|
+
}
|
|
3044
|
+
|
|
3045
|
+
// Priority 3: usage fields missing
|
|
3046
|
+
if (hasUsageMissing) {
|
|
3047
|
+
lines.push(`⚠️ ${missingUsageSamples}/${samples.length} recent responses had missing/empty usage fields.`);
|
|
3048
|
+
lines.push(" Footer may under-report cache hit rate.");
|
|
3049
|
+
lines.push(" Verify the proxy returns prompt-level usage (prompt_tokens, input_tokens_details).");
|
|
3050
|
+
}
|
|
3051
|
+
|
|
3052
|
+
// Priority 4: recent trend low
|
|
3053
|
+
if (recent10Total > 0) {
|
|
3054
|
+
const hitRatio = recent10Input > 0 ? Math.round((recent10Cached / recent10Input) * 100) : 0;
|
|
3055
|
+
const todayHitRatio = todayStats.totalInputTokens > 0
|
|
3056
|
+
? Math.round((todayStats.cachedInputTokens / todayStats.totalInputTokens) * 100)
|
|
3057
|
+
: 0;
|
|
3058
|
+
|
|
3059
|
+
if (recent10Hits === 0 && todayStats.totalRequests > 3 && todayHitRatio < 30) {
|
|
3060
|
+
lines.push(`📉 Cache hit rate is low: ${todayHitRatio}% today (${recent10Total} recent samples).`);
|
|
3061
|
+
lines.push(" Likely causes: proxy routing to different backends per request,");
|
|
3062
|
+
lines.push(" or prompt prefix changes across turns.");
|
|
3063
|
+
lines.push(" Verify session affinity (sendSessionAffinityHeaders) and long cache retention.");
|
|
3064
|
+
} else if (todayHitRatio < 30 && todayStats.totalRequests > 3) {
|
|
3065
|
+
lines.push(`📉 Cache hit rate is low: ${todayHitRatio}% today (${todayStats.totalRequests} total requests).`);
|
|
3066
|
+
lines.push(" Check compat flags and proxy upstream routing.");
|
|
3067
|
+
}
|
|
3068
|
+
|
|
3069
|
+
// Show brief trend summary if there are enough samples
|
|
3070
|
+
if (recent10Total >= 3) {
|
|
3071
|
+
const trend = formatRecentTrendSummary(samples, 10);
|
|
3072
|
+
lines.push(`📊 ${trend}`);
|
|
3073
|
+
}
|
|
3074
|
+
}
|
|
3075
|
+
|
|
3076
|
+
// For fully configured but low hit models, emphasize sticky routing
|
|
3077
|
+
if (!hasMissingCompat && !hasRouterRisk && todayStats.totalRequests > 3 && todayHitRatio < 30) {
|
|
3078
|
+
lines.push("💡 Compat is configured but cache hit rate remains low.");
|
|
3079
|
+
lines.push(" Possible causes:");
|
|
3080
|
+
lines.push(" • Proxy still routes to multiple backends — check session affinity on the proxy side.");
|
|
3081
|
+
lines.push(" • Prompt prefix varies per turn — check dynamic context in system prompt.");
|
|
3082
|
+
lines.push(" • Provider does not return cache usage fields — footer can't measure hits.");
|
|
3083
|
+
}
|
|
3084
|
+
|
|
3085
|
+
return lines;
|
|
3086
|
+
}
|
|
3087
|
+
|
|
2671
3088
|
function buildCompatDiagnosis(model: PiModel): string | undefined {
|
|
2672
3089
|
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2673
|
-
|
|
3090
|
+
const routerNotes = describeRouterChannelDiagnostics(model);
|
|
3091
|
+
|
|
3092
|
+
if (missing.length === 0 && routerNotes.length === 0) return undefined;
|
|
2674
3093
|
|
|
2675
3094
|
const key = modelKey(model);
|
|
2676
|
-
const
|
|
2677
|
-
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
|
|
2681
|
-
|
|
2682
|
-
|
|
2683
|
-
`
|
|
2684
|
-
` (
|
|
2685
|
-
|
|
2686
|
-
`
|
|
2687
|
-
|
|
3095
|
+
const lines: string[] = [];
|
|
3096
|
+
|
|
3097
|
+
if (missing.length > 0) {
|
|
3098
|
+
const slashIdx = key.indexOf("/");
|
|
3099
|
+
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
3100
|
+
const suggestion = Object.fromEntries(missing.map((f) => [f, true]));
|
|
3101
|
+
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
3102
|
+
lines.push(`Active model: ${key}`);
|
|
3103
|
+
lines.push(`Missing: ${missing.join(", ")}`);
|
|
3104
|
+
lines.push("");
|
|
3105
|
+
lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
|
|
3106
|
+
lines.push(`(at the same level as baseUrl/api/apiKey/models) and add:`);
|
|
3107
|
+
lines.push(JSON.stringify(suggestion, null, 2));
|
|
3108
|
+
lines.push("");
|
|
3109
|
+
lines.push(`Only enable if your endpoint supports them.`);
|
|
3110
|
+
}
|
|
3111
|
+
|
|
3112
|
+
// When compat is fully configured but router notes exist, prefix the status.
|
|
3113
|
+
if (routerNotes.length > 0 && missing.length === 0) {
|
|
3114
|
+
if (isCompatCheckApplicable(model)) {
|
|
3115
|
+
lines.push("✅ Compat fully configured.");
|
|
3116
|
+
} else {
|
|
3117
|
+
lines.push("ℹ️ Compat check not applicable for this model.");
|
|
3118
|
+
}
|
|
3119
|
+
lines.push("");
|
|
3120
|
+
}
|
|
3121
|
+
|
|
3122
|
+
if (routerNotes.length > 0) {
|
|
3123
|
+
if (missing.length > 0) lines.push("");
|
|
3124
|
+
for (const note of routerNotes) {
|
|
3125
|
+
lines.push(note);
|
|
3126
|
+
}
|
|
3127
|
+
}
|
|
3128
|
+
|
|
3129
|
+
return lines.join("\n");
|
|
2688
3130
|
}
|
|
2689
3131
|
|
|
2690
3132
|
// Internal helpers exported only so the task verification script
|
|
@@ -2835,6 +3277,7 @@ export const __internals_for_tests = {
|
|
|
2835
3277
|
isCompatCheckApplicable,
|
|
2836
3278
|
buildDoctorDiagnosis,
|
|
2837
3279
|
buildCompatDiagnosis,
|
|
3280
|
+
describeRouterChannelDiagnostics,
|
|
2838
3281
|
// Cache stats helpers (module-level, usable from verify script)
|
|
2839
3282
|
addUsageToCacheStats,
|
|
2840
3283
|
formatCacheStats,
|
|
@@ -2842,6 +3285,15 @@ export const __internals_for_tests = {
|
|
|
2842
3285
|
emptyAllCacheStats,
|
|
2843
3286
|
parseCacheStats,
|
|
2844
3287
|
parsePersistedCacheStats,
|
|
3288
|
+
// Recent sample / stats output / diagnosis helpers
|
|
3289
|
+
MAX_RECENT_SAMPLES,
|
|
3290
|
+
buildStatsOutput,
|
|
3291
|
+
buildLowHitDiagnosis,
|
|
3292
|
+
formatRecentTrendSummary,
|
|
3293
|
+
formatHitRatio,
|
|
3294
|
+
formatTokenM,
|
|
3295
|
+
hasMissingUsageFields,
|
|
3296
|
+
keyForModelExt,
|
|
2845
3297
|
};
|
|
2846
3298
|
|
|
2847
3299
|
export default function (pi: ExtensionAPI) {
|
|
@@ -2853,7 +3305,35 @@ export default function (pi: ExtensionAPI) {
|
|
|
2853
3305
|
let persistTimer: ReturnType<typeof setTimeout> | null = null;
|
|
2854
3306
|
let integrityNotificationShown = false;
|
|
2855
3307
|
const PERSIST_DEBOUNCE_MS = 2000;
|
|
3308
|
+
/** In-memory recent usage samples per model key (not persisted, cleared on reload). */
|
|
3309
|
+
const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
|
|
3310
|
+
|
|
3311
|
+
function recordRecentSample(modelKeyStr: string, usage: UsageSnapshot, missingUsageFields: boolean): void {
|
|
3312
|
+
let samples = recentSamplesByModelKey.get(modelKeyStr);
|
|
3313
|
+
if (!samples) {
|
|
3314
|
+
samples = [];
|
|
3315
|
+
recentSamplesByModelKey.set(modelKeyStr, samples);
|
|
3316
|
+
}
|
|
3317
|
+
samples.push({
|
|
3318
|
+
timestamp: Date.now(),
|
|
3319
|
+
hit: usage.cacheRead > 0,
|
|
3320
|
+
cachedInputTokens: usage.cacheRead,
|
|
3321
|
+
cacheWriteInputTokens: usage.cacheWrite,
|
|
3322
|
+
totalInputTokens: usage.totalInput,
|
|
3323
|
+
missingUsageFields,
|
|
3324
|
+
});
|
|
3325
|
+
if (samples.length > MAX_RECENT_SAMPLES) {
|
|
3326
|
+
samples.splice(0, samples.length - MAX_RECENT_SAMPLES);
|
|
3327
|
+
}
|
|
3328
|
+
}
|
|
3329
|
+
|
|
3330
|
+
function getRecentSamples(modelKeyStr: string): CacheUsageSample[] {
|
|
3331
|
+
return recentSamplesByModelKey.get(modelKeyStr) ?? [];
|
|
3332
|
+
}
|
|
2856
3333
|
|
|
3334
|
+
function clearRecentSamples(): void {
|
|
3335
|
+
recentSamplesByModelKey.clear();
|
|
3336
|
+
}
|
|
2857
3337
|
|
|
2858
3338
|
function getCacheStatsState(): CacheStatsState {
|
|
2859
3339
|
return { statsByModel: cacheStatsByModel, legacyFamily: cacheStatsLegacyFamily };
|
|
@@ -2962,6 +3442,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
2962
3442
|
// Reset integrity diagnostics on reload
|
|
2963
3443
|
lastPromptIntegrityWarningAt = 0;
|
|
2964
3444
|
integrityNotificationShown = false;
|
|
3445
|
+
clearRecentSamples();
|
|
2965
3446
|
await flushPersistCacheStats(ctx);
|
|
2966
3447
|
return;
|
|
2967
3448
|
}
|
|
@@ -3145,6 +3626,16 @@ export default function (pi: ExtensionAPI) {
|
|
|
3145
3626
|
if (!adapter) return;
|
|
3146
3627
|
|
|
3147
3628
|
const usage = adapter.normalizeUsage(event.message);
|
|
3629
|
+
|
|
3630
|
+
// Record recent sample (even when usage is missing, for trend diagnosis)
|
|
3631
|
+
if (ctx.model) {
|
|
3632
|
+
const key = modelKey(ctx.model);
|
|
3633
|
+
const missingFields = usage === undefined || (usage.cacheRead === 0 && usage.cacheWrite === 0 && usage.totalInput === 0)
|
|
3634
|
+
? true
|
|
3635
|
+
: hasMissingUsageFields(event.message, adapter);
|
|
3636
|
+
recordRecentSample(key, usage ?? { cacheRead: 0, cacheWrite: 0, totalInput: 0 }, missingFields);
|
|
3637
|
+
}
|
|
3638
|
+
|
|
3148
3639
|
if (!usage) return;
|
|
3149
3640
|
|
|
3150
3641
|
await rollOverStatsIfNeeded(ctx);
|
|
@@ -3166,8 +3657,10 @@ export default function (pi: ExtensionAPI) {
|
|
|
3166
3657
|
// Register /cache-optimizer command
|
|
3167
3658
|
// Subcommands:
|
|
3168
3659
|
// doctor — show current model/provider/api/baseUrl/compat status
|
|
3660
|
+
// with low-hit diagnosis
|
|
3661
|
+
// stats — show active model stats bucket, recent trend, usage
|
|
3169
3662
|
// compat — show compat suggestion with file path
|
|
3170
|
-
// (no args) —
|
|
3663
|
+
// (no args) — interactive menu (with UI) or help summary
|
|
3171
3664
|
// ────────────────────────────────────────────────────────────────
|
|
3172
3665
|
pi.registerCommand("cache-optimizer", {
|
|
3173
3666
|
description: "Diagnose Pi cache configuration",
|
|
@@ -3180,7 +3673,26 @@ export default function (pi: ExtensionAPI) {
|
|
|
3180
3673
|
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
3181
3674
|
return;
|
|
3182
3675
|
}
|
|
3183
|
-
|
|
3676
|
+
const diagnosis = buildDoctorDiagnosis(model);
|
|
3677
|
+
const adapter = selectAdapterForModel(model);
|
|
3678
|
+
const statsState = model ? cacheStatsByModel[modelKey(model)] : undefined;
|
|
3679
|
+
const samples = model ? getRecentSamples(modelKey(model)) : [];
|
|
3680
|
+
const lowHitLines = buildLowHitDiagnosis(model, adapter, statsState, samples);
|
|
3681
|
+
const fullDiagnosis = lowHitLines.length > 0
|
|
3682
|
+
? diagnosis + "\n" + lowHitLines.join("\n")
|
|
3683
|
+
: diagnosis;
|
|
3684
|
+
cmdCtx.ui.notify(fullDiagnosis, "info");
|
|
3685
|
+
} else if (subcommand === "stats") {
|
|
3686
|
+
if (!model) {
|
|
3687
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
3688
|
+
return;
|
|
3689
|
+
}
|
|
3690
|
+
const adapter = selectAdapterForModel(model);
|
|
3691
|
+
const key = model ? modelKey(model) : undefined;
|
|
3692
|
+
const statsState = key ? cacheStatsByModel[key] : undefined;
|
|
3693
|
+
const samples = model ? getRecentSamples(modelKey(model)) : [];
|
|
3694
|
+
const output = buildStatsOutput(model, adapter, statsState, samples);
|
|
3695
|
+
cmdCtx.ui.notify(output, "info");
|
|
3184
3696
|
} else if (subcommand === "compat") {
|
|
3185
3697
|
if (!model) {
|
|
3186
3698
|
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
@@ -3202,6 +3714,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
3202
3714
|
if (cmdCtx.hasUI) {
|
|
3203
3715
|
const menuOptions = [
|
|
3204
3716
|
"🩺 Doctor — Show current model cache configuration",
|
|
3717
|
+
"📊 Stats — Show active model stats bucket and trend",
|
|
3205
3718
|
"⚙️ Compat — Show compat suggestion with edit instructions",
|
|
3206
3719
|
"❌ Cancel",
|
|
3207
3720
|
];
|
|
@@ -3210,9 +3723,28 @@ export default function (pi: ExtensionAPI) {
|
|
|
3210
3723
|
if (!model) {
|
|
3211
3724
|
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
3212
3725
|
} else {
|
|
3213
|
-
|
|
3726
|
+
const diagnosis = buildDoctorDiagnosis(model);
|
|
3727
|
+
const adapter = selectAdapterForModel(model);
|
|
3728
|
+
const statsState = model ? cacheStatsByModel[modelKey(model)] : undefined;
|
|
3729
|
+
const samples = model ? getRecentSamples(modelKey(model)) : [];
|
|
3730
|
+
const lowHitLines = buildLowHitDiagnosis(model, adapter, statsState, samples);
|
|
3731
|
+
const fullDiagnosis = lowHitLines.length > 0
|
|
3732
|
+
? diagnosis + "\n" + lowHitLines.join("\n")
|
|
3733
|
+
: diagnosis;
|
|
3734
|
+
cmdCtx.ui.notify(fullDiagnosis, "info");
|
|
3214
3735
|
}
|
|
3215
3736
|
} else if (choice === menuOptions[1]) {
|
|
3737
|
+
if (!model) {
|
|
3738
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
3739
|
+
} else {
|
|
3740
|
+
const adapter = selectAdapterForModel(model);
|
|
3741
|
+
const key = model ? modelKey(model) : undefined;
|
|
3742
|
+
const statsState = key ? cacheStatsByModel[key] : undefined;
|
|
3743
|
+
const samples = model ? getRecentSamples(modelKey(model)) : [];
|
|
3744
|
+
const output = buildStatsOutput(model, adapter, statsState, samples);
|
|
3745
|
+
cmdCtx.ui.notify(output, "info");
|
|
3746
|
+
}
|
|
3747
|
+
} else if (choice === menuOptions[2]) {
|
|
3216
3748
|
if (!model) {
|
|
3217
3749
|
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
3218
3750
|
} else {
|
|
@@ -3236,7 +3768,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
3236
3768
|
// Fallback: text help when no interactive UI
|
|
3237
3769
|
const diagnosis: string[] = [];
|
|
3238
3770
|
diagnosis.push("📋 /cache-optimizer commands:");
|
|
3239
|
-
diagnosis.push(" doctor — Show current model/provider/api/baseUrl/compat
|
|
3771
|
+
diagnosis.push(" doctor — Show current model/provider/api/baseUrl/compat and low-hit diagnosis");
|
|
3772
|
+
diagnosis.push(" stats — Show active model stats bucket and recent trend");
|
|
3240
3773
|
diagnosis.push(" compat — Show compat suggestion with edit location");
|
|
3241
3774
|
diagnosis.push("");
|
|
3242
3775
|
if (model) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-cache-optimizer",
|
|
3
|
-
"version": "2.4.
|
|
3
|
+
"version": "2.4.9",
|
|
4
4
|
"description": "Pi extension that improves provider-side KV/prompt cache hit rates (DeepSeek, OpenAI, Claude, Gemini) by reordering the system prompt, requesting long retention, and showing footer cache stats. Renamed from pi-deepseek-cache-optimizer.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|