pi-cache-optimizer 2.3.0 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -12
- package/README.zh-CN.md +10 -12
- package/index.ts +255 -292
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -62,15 +62,13 @@ State files under `~/.pi/agent/` are resolved via Node's `os.homedir()`, so on W
|
|
|
62
62
|
pi install npm:pi-cache-optimizer
|
|
63
63
|
```
|
|
64
64
|
|
|
65
|
-
3.
|
|
66
|
-
4. Export your DeepSeek API key in the same shell where you run `pi`:
|
|
65
|
+
3. Export your DeepSeek API key in the same shell where you run `pi` (if you use a DeepSeek model):
|
|
67
66
|
|
|
68
67
|
```bash
|
|
69
68
|
export DEEPSEEK_API_KEY='...'
|
|
70
69
|
```
|
|
71
70
|
|
|
72
|
-
|
|
73
|
-
5. Opt out of auto-seeding by exporting `PI_CACHE_OPTIMIZER_NO_AUTO_CONFIG=1` before launching Pi. With opt-out, no write or backup happens, and no provider entry is added or modified.
|
|
71
|
+
This extension never reads, stores, or prints the key value.
|
|
74
72
|
|
|
75
73
|
## Install
|
|
76
74
|
|
|
@@ -78,15 +76,15 @@ State files under `~/.pi/agent/` are resolved via Node's `os.homedir()`, so on W
|
|
|
78
76
|
pi install npm:pi-cache-optimizer
|
|
79
77
|
```
|
|
80
78
|
|
|
81
|
-
After installation, `PI_CACHE_RETENTION=long` is applied automatically, the system prompt is reordered and skills are compressed automatically, session-overview churn is stripped automatically,
|
|
79
|
+
After installation, `PI_CACHE_RETENTION=long` is applied automatically, the system prompt is reordered and skills are compressed automatically, session-overview churn is stripped automatically, and the footer shows cache stats after supported model-family responses with exposed usage.
|
|
82
80
|
|
|
83
81
|
## Opt-out
|
|
84
82
|
|
|
85
83
|
| Env var | Effect |
|
|
86
84
|
|---------|--------|
|
|
87
|
-
| `PI_CACHE_OPTIMIZER_NO_AUTO_CONFIG=1` | Skip DeepSeek `models.json` auto-seed |
|
|
88
85
|
| `PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION=1` | Keep pi's verbose `<available_skills>` XML (opt out of one-line index) |
|
|
89
|
-
| `PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY=
|
|
86
|
+
| `PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY=0` | Disable the OpenAI-family `prompt_cache_key` fallback (default is enabled) |
|
|
87
|
+
| `PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY=1` | Disable the OpenAI-family `prompt_cache_key` fallback |
|
|
90
88
|
|
|
91
89
|
## Uninstall
|
|
92
90
|
|
|
@@ -114,7 +112,7 @@ rm ~/.pi/agent/pi-cache-optimizer-stats.json
|
|
|
114
112
|
rm -f ~/.pi/agent/deepseek-cache-optimizer-stats.json
|
|
115
113
|
```
|
|
116
114
|
|
|
117
|
-
|
|
115
|
+
|
|
118
116
|
|
|
119
117
|
## Footer cache stats
|
|
120
118
|
|
|
@@ -193,7 +191,7 @@ After: [stable tools + rules | dynamic git status | task context]
|
|
|
193
191
|
↓ stable prefix → higher chance of cache reuse
|
|
194
192
|
```
|
|
195
193
|
|
|
196
|
-
Pi itself decides whether to send cache-related fields such as `prompt_cache_retention`, session-affinity headers, or Anthropic-style `cache_control` based on model compat and `PI_CACHE_RETENTION`.
|
|
194
|
+
Pi itself decides whether to send cache-related fields such as `prompt_cache_retention`, session-affinity headers, or Anthropic-style `cache_control` based on model compat and `PI_CACHE_RETENTION`. This extension now adds only one conservative request-body fallback by default: for OpenAI-family models using OpenAI-compatible Pi APIs, it fills a missing or blank top-level `prompt_cache_key` with the Pi session id and never overwrites an existing non-empty key. The extension does not fake cache hits; it helps configuration, improves stable-prefix probability, and summarizes exposed usage in the footer.
|
|
197
195
|
|
|
198
196
|
## Improving cache hit rate
|
|
199
197
|
|
|
@@ -208,7 +206,7 @@ What the extension does automatically:
|
|
|
208
206
|
Provider notes:
|
|
209
207
|
|
|
210
208
|
- DeepSeek: current behavior remains the reference path. Stable prefix ordering plus long-retention/session-affinity compat gives the best chance of automatic KV prefix reuse.
|
|
211
|
-
- OpenAI-family: prompt caching is automatic only on supported upstreams and sufficiently long prompts. Keep static instructions, tools, examples, and specs before changing user/task context. Pi owns retention transport by default.
|
|
209
|
+
- OpenAI-family: prompt caching is automatic only on supported upstreams and sufficiently long prompts. Keep static instructions, tools, examples, and specs before changing user/task context. Pi owns retention transport by default. For OpenAI-compatible Pi APIs, the extension fills a missing or blank top-level `prompt_cache_key` with the Pi session id (matching Pi core's official OpenAI behavior) and never overwrites an existing non-empty `prompt_cache_key` / `promptCacheKey`. Disable this fallback with `PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY=1` or `PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY=0`. Unsupported OpenAI-compatible proxies may reject unknown fields; custom APIs are not targeted.
|
|
212
210
|
- Claude: prompt caching depends on Anthropic `cache_control` breakpoints. This extension does not inject breakpoints itself; for compatible endpoints, configure Pi compat such as `cacheControlFormat: "anthropic"` only when the endpoint supports it.
|
|
213
211
|
- Gemini/Vertex: implicit caching benefits from repeated large stable prefixes. This extension does not create explicit `cachedContents` resources or store cache resource names.
|
|
214
212
|
- Proxies/aggregators: fix upstream routing/provider order where possible. Cache hit rates are unreliable if the same model id/name can route to different upstreams.
|
|
@@ -225,9 +223,9 @@ This package now has provider-family stats adapters, but it still avoids blind g
|
|
|
225
223
|
|
|
226
224
|
## Out of scope for this release
|
|
227
225
|
|
|
228
|
-
- Broad/
|
|
226
|
+
- Broad/provider-agnostic request-body mutation or cache-control injection. The only default request-body fallback is OpenAI-family `prompt_cache_key` on OpenAI-compatible APIs, sourced from the Pi session id and skipped when an effective key already exists.
|
|
229
227
|
- Injecting Anthropic `cache_control` markers.
|
|
230
|
-
- Sending OpenAI `prompt_cache_key`
|
|
228
|
+
- Sending OpenAI `prompt_cache_key` into custom/non-OpenAI-compatible APIs; the fallback is gated to OpenAI-family id/name plus `openai-completions` / `openai-responses`.
|
|
231
229
|
- Overriding OpenAI `prompt_cache_retention` outside Pi's own compat handling.
|
|
232
230
|
- Creating Gemini explicit `cachedContents` resources or persisting cache resource names.
|
|
233
231
|
- Claiming stats for providers that do not expose reliable cache usage.
|
package/README.zh-CN.md
CHANGED
|
@@ -65,15 +65,13 @@ Generic OpenAI-compatible 代理**不会**仅因为使用 OpenAI 形状 API 或
|
|
|
65
65
|
pi install npm:pi-cache-optimizer
|
|
66
66
|
```
|
|
67
67
|
|
|
68
|
-
3.
|
|
69
|
-
4. 在运行 `pi` 的同一个 shell 中导出 DeepSeek API key:
|
|
68
|
+
3. 如果使用 DeepSeek 模型,请在运行 `pi` 的同一个 shell 中导出 DeepSeek API key:
|
|
70
69
|
|
|
71
70
|
```bash
|
|
72
71
|
export DEEPSEEK_API_KEY='...'
|
|
73
72
|
```
|
|
74
73
|
|
|
75
|
-
|
|
76
|
-
5. 如需退出自动写入,请在启动 Pi 之前设 `PI_CACHE_OPTIMIZER_NO_AUTO_CONFIG=1`。退出后不会产生任何写入或备份,也不会新增 provider 条目。
|
|
74
|
+
本扩展**不会**读取、存储或打印 key 的值。
|
|
77
75
|
|
|
78
76
|
## 安装
|
|
79
77
|
|
|
@@ -81,15 +79,15 @@ Generic OpenAI-compatible 代理**不会**仅因为使用 OpenAI 形状 API 或
|
|
|
81
79
|
pi install npm:pi-cache-optimizer
|
|
82
80
|
```
|
|
83
81
|
|
|
84
|
-
安装后 `PI_CACHE_RETENTION=long` **自动生效**,system prompt **自动重组**、skills 自动压缩、session-overview
|
|
82
|
+
安装后 `PI_CACHE_RETENTION=long` **自动生效**,system prompt **自动重组**、skills 自动压缩、session-overview 动态尾字段自动剥离;受支持 model family 的响应完成且暴露 usage 后,底部状态栏会显示缓存统计。
|
|
85
83
|
|
|
86
84
|
## 退出(Opt-out)
|
|
87
85
|
|
|
88
86
|
| 环境变量 | 作用 |
|
|
89
87
|
|---------|------|
|
|
90
|
-
| `PI_CACHE_OPTIMIZER_NO_AUTO_CONFIG=1` | 跳过 `models.json` DeepSeek 自动写入 |
|
|
91
88
|
| `PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION=1` | 保留 pi 的 verbose `<available_skills>` XML(退出一行索引模式) |
|
|
92
|
-
| `PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY=
|
|
89
|
+
| `PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY=0` | 禁用 OpenAI-family `prompt_cache_key` 兜底(默认启用) |
|
|
90
|
+
| `PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY=1` | 禁用 OpenAI-family `prompt_cache_key` 兜底 |
|
|
93
91
|
|
|
94
92
|
## 卸载
|
|
95
93
|
|
|
@@ -117,7 +115,7 @@ rm ~/.pi/agent/pi-cache-optimizer-stats.json
|
|
|
117
115
|
rm -f ~/.pi/agent/deepseek-cache-optimizer-stats.json
|
|
118
116
|
```
|
|
119
117
|
|
|
120
|
-
|
|
118
|
+
|
|
121
119
|
|
|
122
120
|
## 底部缓存统计
|
|
123
121
|
|
|
@@ -196,7 +194,7 @@ Provider 缓存通常依赖精确或近似精确的前缀匹配。Pi 的 system
|
|
|
196
194
|
↓ 稳定前缀不变 → 更容易命中缓存
|
|
197
195
|
```
|
|
198
196
|
|
|
199
|
-
Pi 本身还会根据模型 compat 和 `PI_CACHE_RETENTION` 决定是否发送缓存相关字段,例如 `prompt_cache_retention`、session affinity headers 或 Anthropic-style `cache_control
|
|
197
|
+
Pi 本身还会根据模型 compat 和 `PI_CACHE_RETENTION` 决定是否发送缓存相关字段,例如 `prompt_cache_retention`、session affinity headers 或 Anthropic-style `cache_control`。本扩展现在默认只做一个保守的 request-body 兜底:对使用 OpenAI-compatible Pi API 的 OpenAI-family 模型,当顶层 `prompt_cache_key` 缺失或为空时,用 Pi session id 补上,并且不会覆盖已有的非空 key。本扩展不伪造缓存命中,只帮助配置、提高稳定前缀概率,并把已暴露的 usage 汇总到底部状态栏。
|
|
200
198
|
|
|
201
199
|
## 提高 cache 命中率
|
|
202
200
|
|
|
@@ -211,7 +209,7 @@ Pi 本身还会根据模型 compat 和 `PI_CACHE_RETENTION` 决定是否发送
|
|
|
211
209
|
各 provider 注意点:
|
|
212
210
|
|
|
213
211
|
- DeepSeek:现有行为仍是参考路径。稳定前缀排序,加上 long-retention / session-affinity compat,最有利于自动 KV prefix 复用。
|
|
214
|
-
- OpenAI-family:prompt caching 只会在真实上游支持且 prompt 足够长时自动生效。请尽量把静态 instructions、tools、examples、specs 放在变化的 user/task context 前面。retention 传输默认由 Pi
|
|
212
|
+
- OpenAI-family:prompt caching 只会在真实上游支持且 prompt 足够长时自动生效。请尽量把静态 instructions、tools、examples、specs 放在变化的 user/task context 前面。retention 传输默认由 Pi 负责。对 OpenAI-compatible Pi API,本扩展会用 Pi session id 补齐缺失或空白的顶层 `prompt_cache_key`(与 Pi core 官方 OpenAI 行为对齐),并且不会覆盖已有非空的 `prompt_cache_key` / `promptCacheKey`。可用 `PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY=1` 或 `PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY=0` 禁用该兜底。不支持该字段的 OpenAI-compatible 代理可能拒绝请求;custom API 不会被注入。
|
|
215
213
|
- Claude:prompt caching 依赖 Anthropic `cache_control` breakpoints。本扩展不会自行注入 breakpoint;对兼容 endpoint,只在 endpoint 明确支持时配置 Pi compat,例如 `cacheControlFormat: "anthropic"`。
|
|
216
214
|
- Gemini/Vertex:implicit caching 受益于重复的大型稳定前缀。本扩展不会创建 explicit `cachedContents` resources,也不会保存 cache resource names。
|
|
217
215
|
- Proxies/aggregators:尽量固定上游 routing/provider order。如果同一个 model id/name 可能路由到不同上游,cache hit rate 会不稳定。
|
|
@@ -229,9 +227,9 @@ Pi 本身还会根据模型 compat 和 `PI_CACHE_RETENTION` 决定是否发送
|
|
|
229
227
|
|
|
230
228
|
## 本版本不包含
|
|
231
229
|
|
|
232
|
-
-
|
|
230
|
+
- 广泛/provider-agnostic 修改请求体,或做 cache-control 注入。唯一默认 request-body 兜底是 OpenAI-family 在 OpenAI-compatible API 上使用 Pi session id 的 `prompt_cache_key`,且已有有效 key 时会跳过。
|
|
233
231
|
- 注入 Anthropic `cache_control` markers。
|
|
234
|
-
-
|
|
232
|
+
- 向 custom / 非 OpenAI-compatible API 发送 OpenAI `prompt_cache_key`;该兜底同时要求 model id/name 属于 OpenAI-family,且 API 是 `openai-completions` / `openai-responses`。
|
|
235
233
|
- 在 Pi 自己的 compat 处理之外覆盖 OpenAI `prompt_cache_retention`。
|
|
236
234
|
- 创建 Gemini explicit `cachedContents` resources 或持久化 cache resource names。
|
|
237
235
|
- 对不暴露可靠 cache usage 的 provider 声称统计支持。
|
package/index.ts
CHANGED
|
@@ -1,10 +1,3 @@
|
|
|
1
|
-
import { createHash } from "node:crypto";
|
|
2
|
-
import {
|
|
3
|
-
mkdirSync,
|
|
4
|
-
readFileSync,
|
|
5
|
-
renameSync,
|
|
6
|
-
writeFileSync,
|
|
7
|
-
} from "node:fs";
|
|
8
1
|
import { mkdir, readFile, rename, unlink, writeFile } from "node:fs/promises";
|
|
9
2
|
import { homedir } from "node:os";
|
|
10
3
|
import { dirname, join } from "node:path";
|
|
@@ -16,10 +9,8 @@ import type { BuildSystemPromptOptions, ExtensionAPI, ExtensionContext } from "@
|
|
|
16
9
|
* What it does:
|
|
17
10
|
* 1. Reorders Pi's system prompt so stable content is sent before dynamic context.
|
|
18
11
|
* 2. Sets PI_CACHE_RETENTION=long at extension load time.
|
|
19
|
-
* 3.
|
|
20
|
-
*
|
|
21
|
-
* 4. Warns once for provider/model cache compat gaps where the signal is conservative.
|
|
22
|
-
* 5. Shows lightweight persisted provider-specific cache stats in Pi's footer.
|
|
12
|
+
* 3. Warns once for provider/model cache compat gaps where the signal is conservative.
|
|
13
|
+
* 4. Shows lightweight persisted provider-specific cache stats in Pi's footer.
|
|
23
14
|
*
|
|
24
15
|
* Provider prompt/KV caches are provider-side and best-effort. This extension improves
|
|
25
16
|
* the odds of cache hits; it cannot guarantee hits, especially through proxies.
|
|
@@ -41,14 +32,11 @@ const STATUS_KEY = "pi-cache-stats";
|
|
|
41
32
|
const STATE_DIR = join(homedir(), ".pi", "agent");
|
|
42
33
|
const STATE_FILE_PATH = join(STATE_DIR, "pi-cache-optimizer-stats.json");
|
|
43
34
|
const LEGACY_STATE_FILE_PATH = join(STATE_DIR, "deepseek-cache-optimizer-stats.json");
|
|
44
|
-
const MODELS_JSON_PATH = join(STATE_DIR, "models.json");
|
|
45
|
-
|
|
46
35
|
const CACHE_PROVIDER_IDS: CacheProviderId[] = ["deepseek", "openai", "claude", "gemini"];
|
|
47
36
|
const OPENAI_CACHE_KEY_ENV = "PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY";
|
|
48
|
-
const
|
|
49
|
-
const
|
|
37
|
+
const NO_OPENAI_CACHE_KEY_ENV = "PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY";
|
|
38
|
+
const OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH = 64;
|
|
50
39
|
const NO_SKILL_COMPRESSION_ENV = "PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION";
|
|
51
|
-
const DEEPSEEK_API_KEY_ENV = "DEEPSEEK_API_KEY";
|
|
52
40
|
|
|
53
41
|
// WORM-flag: if optimizeSystemPrompt ever detects that its blind-replace
|
|
54
42
|
// logic has accidentally truncated a structural marker (any XML tag or
|
|
@@ -114,6 +102,18 @@ type PersistedCacheStatsV2 = {
|
|
|
114
102
|
statsByProvider: Partial<Record<CacheProviderId, CacheStats>>;
|
|
115
103
|
};
|
|
116
104
|
|
|
105
|
+
/** Per-model-key scoped state. Used in memory and for v3 persistence. */
|
|
106
|
+
type CacheStatsState = {
|
|
107
|
+
statsByModel: Record<string, CacheStats>;
|
|
108
|
+
legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
type PersistedCacheStatsV3 = {
|
|
112
|
+
version: 3;
|
|
113
|
+
statsByModel: Record<string, CacheStats>;
|
|
114
|
+
legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
|
|
115
|
+
};
|
|
116
|
+
|
|
117
117
|
type UsageSnapshot = {
|
|
118
118
|
cacheRead: number;
|
|
119
119
|
cacheWrite: number;
|
|
@@ -511,12 +511,17 @@ function optimizeSystemPrompt(
|
|
|
511
511
|
};
|
|
512
512
|
}
|
|
513
513
|
|
|
514
|
-
function
|
|
515
|
-
const normalized =
|
|
514
|
+
function clampPromptCacheKey(key: string | undefined): string | undefined {
|
|
515
|
+
const normalized = key?.trim();
|
|
516
516
|
if (!normalized) return undefined;
|
|
517
517
|
|
|
518
|
-
const
|
|
519
|
-
return
|
|
518
|
+
const chars = Array.from(normalized);
|
|
519
|
+
if (chars.length <= OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH) return normalized;
|
|
520
|
+
return chars.slice(0, OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH).join("");
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
function getSessionPromptCacheKey(ctx: ExtensionContext): string | undefined {
|
|
524
|
+
return clampPromptCacheKey(ctx.sessionManager.getSessionId());
|
|
520
525
|
}
|
|
521
526
|
|
|
522
527
|
function asRecord(value: unknown): UnknownRecord | undefined {
|
|
@@ -547,8 +552,16 @@ function isEnabledEnv(value: string | undefined): boolean {
|
|
|
547
552
|
return normalized === "1" || normalized === "true" || normalized === "yes" || normalized === "on";
|
|
548
553
|
}
|
|
549
554
|
|
|
550
|
-
function
|
|
551
|
-
|
|
555
|
+
function isDisabledEnv(value: string | undefined): boolean {
|
|
556
|
+
if (!value) return false;
|
|
557
|
+
const normalized = value.trim().toLowerCase();
|
|
558
|
+
return normalized === "0" || normalized === "false" || normalized === "no" || normalized === "off";
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
function shouldInjectOpenAIPromptCacheKey(): boolean {
|
|
562
|
+
if (isEnabledEnv(process.env[NO_OPENAI_CACHE_KEY_ENV])) return false;
|
|
563
|
+
if (isDisabledEnv(process.env[OPENAI_CACHE_KEY_ENV])) return false;
|
|
564
|
+
return true;
|
|
552
565
|
}
|
|
553
566
|
|
|
554
567
|
function isAssistantMessage(message: unknown): boolean {
|
|
@@ -796,13 +809,51 @@ function normalizeWithFallback(
|
|
|
796
809
|
|
|
797
810
|
function addOpenAIPromptCacheKey(payload: unknown, cacheKey: string | undefined): unknown | undefined {
|
|
798
811
|
const record = asRecord(payload);
|
|
799
|
-
|
|
812
|
+
const normalizedCacheKey = clampPromptCacheKey(cacheKey);
|
|
813
|
+
if (!record || !normalizedCacheKey) return undefined;
|
|
800
814
|
|
|
801
|
-
if (
|
|
815
|
+
if (hasEffectivePromptCacheKey(record)) {
|
|
802
816
|
return undefined;
|
|
803
817
|
}
|
|
804
818
|
|
|
805
|
-
return { ...record, prompt_cache_key:
|
|
819
|
+
return { ...record, prompt_cache_key: normalizedCacheKey };
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
function hasEffectivePromptCacheKey(record: UnknownRecord): boolean {
|
|
823
|
+
return isNonEmptyString(record.prompt_cache_key) || isNonEmptyString(record.promptCacheKey);
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
function isNonEmptyString(value: unknown): boolean {
|
|
827
|
+
return typeof value === "string" && value.trim().length > 0;
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
function isOfficialOpenAIBaseUrl(model: PiModel): boolean {
|
|
831
|
+
const value = lower(model.baseUrl).trim();
|
|
832
|
+
if (!value) return false;
|
|
833
|
+
|
|
834
|
+
try {
|
|
835
|
+
return new URL(value).hostname === "api.openai.com";
|
|
836
|
+
} catch {
|
|
837
|
+
return value === "api.openai.com" || value.startsWith("api.openai.com/");
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
function describeMissingOpenAIFamilyProxyCompat(model: PiModel): string[] {
|
|
842
|
+
const compat = getCompat(model);
|
|
843
|
+
const missing: string[] = [];
|
|
844
|
+
|
|
845
|
+
if (!isOpenAIFamilyModel(model)) return missing;
|
|
846
|
+
if (lower(model.api) !== "openai-completions") return missing;
|
|
847
|
+
if (isOfficialOpenAIBaseUrl(model)) return missing;
|
|
848
|
+
|
|
849
|
+
if (compat.supportsLongCacheRetention !== true) {
|
|
850
|
+
missing.push("supportsLongCacheRetention");
|
|
851
|
+
}
|
|
852
|
+
if (compat.sendSessionAffinityHeaders !== true) {
|
|
853
|
+
missing.push("sendSessionAffinityHeaders");
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
return missing;
|
|
806
857
|
}
|
|
807
858
|
|
|
808
859
|
function describeMissingDeepSeekCompat(model: PiModel): string[] {
|
|
@@ -881,6 +932,15 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
|
|
|
881
932
|
normalizeUsage(message) {
|
|
882
933
|
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
883
934
|
},
|
|
935
|
+
warningText(model) {
|
|
936
|
+
const missing = describeMissingOpenAIFamilyProxyCompat(model);
|
|
937
|
+
if (missing.length === 0) return undefined;
|
|
938
|
+
|
|
939
|
+
return (
|
|
940
|
+
`💡 pi-cache-optimizer: ${modelKey(model)} looks like a third-party GPT/OpenAI-compatible proxy but merged compat lacks ${missing.join(" and ")}. ` +
|
|
941
|
+
`For better cache locality, add compat: { "supportsLongCacheRetention": true, "sendSessionAffinityHeaders": true } in ~/.pi/agent/models.json when the endpoint supports these fields.`
|
|
942
|
+
);
|
|
943
|
+
},
|
|
884
944
|
},
|
|
885
945
|
{
|
|
886
946
|
id: "gemini",
|
|
@@ -1013,30 +1073,56 @@ function parseCacheStats(value: unknown): CacheStats | undefined {
|
|
|
1013
1073
|
};
|
|
1014
1074
|
}
|
|
1015
1075
|
|
|
1016
|
-
function parsePersistedCacheStats(value: unknown):
|
|
1076
|
+
function parsePersistedCacheStats(value: unknown): CacheStatsState | undefined {
|
|
1017
1077
|
const record = asRecord(value);
|
|
1018
1078
|
if (!record) return undefined;
|
|
1019
1079
|
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1080
|
+
// version 3: model-scoped stats + legacy family fallback
|
|
1081
|
+
if (record.version === 3) {
|
|
1082
|
+
const statsByModel: Record<string, CacheStats> = {};
|
|
1083
|
+
const rawModelMap = asRecord(record.statsByModel);
|
|
1084
|
+
if (rawModelMap) {
|
|
1085
|
+
for (const [key, val] of Object.entries(rawModelMap)) {
|
|
1086
|
+
const parsed = parseCacheStats(val);
|
|
1087
|
+
if (parsed) statsByModel[key] = parsed;
|
|
1088
|
+
}
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
const legacyFamily: Partial<Record<CacheProviderId, CacheStats>> = {};
|
|
1092
|
+
const rawFamily = asRecord(record.legacyFamily);
|
|
1093
|
+
if (rawFamily) {
|
|
1094
|
+
for (const id of CACHE_PROVIDER_IDS) {
|
|
1095
|
+
const stats = parseCacheStats(rawFamily[id]);
|
|
1096
|
+
if (stats) legacyFamily[id] = stats;
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1024
1099
|
|
|
1025
|
-
|
|
1100
|
+
return { statsByModel, legacyFamily };
|
|
1101
|
+
}
|
|
1026
1102
|
|
|
1027
|
-
|
|
1028
|
-
if (
|
|
1103
|
+
// version 2: migrate statsByProvider into legacyFamily
|
|
1104
|
+
if (record.version === 2) {
|
|
1105
|
+
const statsByProvider = asRecord(record.statsByProvider);
|
|
1106
|
+
const legacyFamily: Partial<Record<CacheProviderId, CacheStats>> = {};
|
|
1107
|
+
if (statsByProvider) {
|
|
1108
|
+
for (const id of CACHE_PROVIDER_IDS) {
|
|
1109
|
+
const stats = parseCacheStats(statsByProvider[id]);
|
|
1110
|
+
if (stats) legacyFamily[id] = stats;
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
return { statsByModel: {}, legacyFamily };
|
|
1114
|
+
}
|
|
1029
1115
|
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
const
|
|
1033
|
-
|
|
1116
|
+
// version 1: single DeepSeek stats -> migrate to legacyFamily.deepseek
|
|
1117
|
+
if (record.version === 1) {
|
|
1118
|
+
const migrated = parseCacheStats(record.stats);
|
|
1119
|
+
return migrated ? { statsByModel: {}, legacyFamily: { deepseek: migrated } } : undefined;
|
|
1034
1120
|
}
|
|
1035
1121
|
|
|
1036
|
-
return
|
|
1122
|
+
return undefined;
|
|
1037
1123
|
}
|
|
1038
1124
|
|
|
1039
|
-
async function readPersistedCacheStats(): Promise<
|
|
1125
|
+
async function readPersistedCacheStats(): Promise<CacheStatsState | undefined> {
|
|
1040
1126
|
try {
|
|
1041
1127
|
const raw = await readFile(STATE_FILE_PATH, "utf8");
|
|
1042
1128
|
return parsePersistedCacheStats(JSON.parse(raw));
|
|
@@ -1076,231 +1162,20 @@ async function readPersistedCacheStats(): Promise<Partial<Record<CacheProviderId
|
|
|
1076
1162
|
return undefined;
|
|
1077
1163
|
}
|
|
1078
1164
|
|
|
1079
|
-
async function writePersistedCacheStats(
|
|
1165
|
+
async function writePersistedCacheStats(state: CacheStatsState): Promise<void> {
|
|
1080
1166
|
await mkdir(STATE_DIR, { recursive: true });
|
|
1081
|
-
const payload:
|
|
1167
|
+
const payload: PersistedCacheStatsV3 = {
|
|
1168
|
+
version: 3,
|
|
1169
|
+
statsByModel: state.statsByModel,
|
|
1170
|
+
legacyFamily: state.legacyFamily,
|
|
1171
|
+
};
|
|
1082
1172
|
const tempPath = `${STATE_FILE_PATH}.${process.pid}.${Date.now()}.tmp`;
|
|
1083
1173
|
|
|
1084
1174
|
await writeFile(tempPath, JSON.stringify(payload, null, 2) + "\n", "utf8");
|
|
1085
1175
|
await rename(tempPath, STATE_FILE_PATH);
|
|
1086
1176
|
}
|
|
1087
1177
|
|
|
1088
|
-
// ============================================================
|
|
1089
|
-
// models.json auto-config (DeepSeek seed)
|
|
1090
|
-
// ============================================================
|
|
1091
|
-
|
|
1092
|
-
type ModelsJsonShape = {
|
|
1093
|
-
providers?: UnknownRecord;
|
|
1094
|
-
} & UnknownRecord;
|
|
1095
|
-
|
|
1096
|
-
const DEEPSEEK_SEED_PROVIDER = {
|
|
1097
|
-
baseUrl: "https://api.deepseek.com",
|
|
1098
|
-
api: "openai-completions",
|
|
1099
|
-
apiKey: "$DEEPSEEK_API_KEY",
|
|
1100
|
-
models: [
|
|
1101
|
-
{
|
|
1102
|
-
id: "deepseek-v4-pro",
|
|
1103
|
-
name: "DeepSeek V4 Pro",
|
|
1104
|
-
contextWindow: 1_000_000,
|
|
1105
|
-
maxTokens: 384_000,
|
|
1106
|
-
input: ["text"],
|
|
1107
|
-
reasoning: true,
|
|
1108
|
-
cost: { input: 1.74, output: 3.48, cacheRead: 0.145, cacheWrite: 0 },
|
|
1109
|
-
compat: {
|
|
1110
|
-
requiresReasoningContentOnAssistantMessages: true,
|
|
1111
|
-
thinkingFormat: "deepseek",
|
|
1112
|
-
supportsLongCacheRetention: true,
|
|
1113
|
-
sendSessionAffinityHeaders: true,
|
|
1114
|
-
reasoningEffortMap: {
|
|
1115
|
-
minimal: "high",
|
|
1116
|
-
low: "high",
|
|
1117
|
-
medium: "high",
|
|
1118
|
-
high: "high",
|
|
1119
|
-
xhigh: "max",
|
|
1120
|
-
},
|
|
1121
|
-
},
|
|
1122
|
-
},
|
|
1123
|
-
{
|
|
1124
|
-
id: "deepseek-v4-flash",
|
|
1125
|
-
name: "DeepSeek V4 Flash",
|
|
1126
|
-
contextWindow: 1_000_000,
|
|
1127
|
-
maxTokens: 384_000,
|
|
1128
|
-
input: ["text"],
|
|
1129
|
-
reasoning: true,
|
|
1130
|
-
cost: { input: 0.14, output: 0.28, cacheRead: 0.028, cacheWrite: 0 },
|
|
1131
|
-
compat: {
|
|
1132
|
-
requiresReasoningContentOnAssistantMessages: true,
|
|
1133
|
-
thinkingFormat: "deepseek",
|
|
1134
|
-
supportsLongCacheRetention: true,
|
|
1135
|
-
sendSessionAffinityHeaders: true,
|
|
1136
|
-
reasoningEffortMap: {
|
|
1137
|
-
minimal: "high",
|
|
1138
|
-
low: "high",
|
|
1139
|
-
medium: "high",
|
|
1140
|
-
high: "high",
|
|
1141
|
-
xhigh: "max",
|
|
1142
|
-
},
|
|
1143
|
-
},
|
|
1144
|
-
},
|
|
1145
|
-
],
|
|
1146
|
-
} as const;
|
|
1147
|
-
|
|
1148
|
-
function modelsJsonContainsDeepseek(parsed: ModelsJsonShape): boolean {
|
|
1149
|
-
const providers = asRecord(parsed.providers);
|
|
1150
|
-
if (!providers) return false;
|
|
1151
|
-
|
|
1152
|
-
// Respect user intent: a provider key literally named "deepseek" (case-insensitive)
|
|
1153
|
-
// means the user already declared their own DeepSeek block, even if its models list is empty.
|
|
1154
|
-
for (const key of Object.keys(providers)) {
|
|
1155
|
-
if (key.toLowerCase() === "deepseek") return true;
|
|
1156
|
-
}
|
|
1157
|
-
|
|
1158
|
-
for (const providerValue of Object.values(providers)) {
|
|
1159
|
-
const provider = asRecord(providerValue);
|
|
1160
|
-
if (!provider) continue;
|
|
1161
|
-
const models = provider.models;
|
|
1162
|
-
if (!Array.isArray(models)) continue;
|
|
1163
|
-
for (const model of models) {
|
|
1164
|
-
const record = asRecord(model);
|
|
1165
|
-
if (!record) continue;
|
|
1166
|
-
if (lower(record.id).includes("deepseek") || lower(record.name).includes("deepseek")) {
|
|
1167
|
-
return true;
|
|
1168
|
-
}
|
|
1169
|
-
}
|
|
1170
|
-
}
|
|
1171
|
-
|
|
1172
|
-
return false;
|
|
1173
|
-
}
|
|
1174
|
-
|
|
1175
|
-
type EnsureDeepseekResult = {
|
|
1176
|
-
// Whether some DeepSeek-like model is now present in models.json (either pre-existing or just-seeded).
|
|
1177
|
-
deepseekPresent: boolean;
|
|
1178
|
-
// Whether we just wrote the seed in this activation.
|
|
1179
|
-
seeded: boolean;
|
|
1180
|
-
// Whether auto-config was deliberately skipped (env opt-out or malformed file).
|
|
1181
|
-
skipped: boolean;
|
|
1182
|
-
};
|
|
1183
|
-
|
|
1184
|
-
function ensureDeepseekConfigured(notify?: (text: string, level: "info" | "warning") => void): EnsureDeepseekResult {
|
|
1185
|
-
const result: EnsureDeepseekResult = { deepseekPresent: false, seeded: false, skipped: false };
|
|
1186
1178
|
|
|
1187
|
-
if (isEnabledEnv(process.env[NO_AUTO_CONFIG_ENV])) {
|
|
1188
|
-
result.skipped = true;
|
|
1189
|
-
// Even when opted out, callers still need to know whether DeepSeek is present so the
|
|
1190
|
-
// API-key hint can fire. Read-only inspection only; no writes.
|
|
1191
|
-
try {
|
|
1192
|
-
const raw = readFileSync(MODELS_JSON_PATH, "utf8");
|
|
1193
|
-
const parsed = JSON.parse(raw) as ModelsJsonShape;
|
|
1194
|
-
if (parsed && typeof parsed === "object") {
|
|
1195
|
-
result.deepseekPresent = modelsJsonContainsDeepseek(parsed);
|
|
1196
|
-
}
|
|
1197
|
-
} catch {
|
|
1198
|
-
// ignore: missing or unreadable file means "not present"
|
|
1199
|
-
}
|
|
1200
|
-
return result;
|
|
1201
|
-
}
|
|
1202
|
-
|
|
1203
|
-
let originalBytes: string | undefined;
|
|
1204
|
-
let parsed: ModelsJsonShape;
|
|
1205
|
-
try {
|
|
1206
|
-
originalBytes = readFileSync(MODELS_JSON_PATH, "utf8");
|
|
1207
|
-
} catch (error) {
|
|
1208
|
-
if (getErrorCode(error) !== "ENOENT") {
|
|
1209
|
-
console.warn(`${LOG_PREFIX}: failed to read models.json; skipping auto-config`, error);
|
|
1210
|
-
result.skipped = true;
|
|
1211
|
-
return result;
|
|
1212
|
-
}
|
|
1213
|
-
parsed = { providers: {} };
|
|
1214
|
-
}
|
|
1215
|
-
|
|
1216
|
-
if (originalBytes !== undefined) {
|
|
1217
|
-
try {
|
|
1218
|
-
const decoded = JSON.parse(originalBytes) as unknown;
|
|
1219
|
-
if (decoded && typeof decoded === "object" && !Array.isArray(decoded)) {
|
|
1220
|
-
parsed = decoded as ModelsJsonShape;
|
|
1221
|
-
} else {
|
|
1222
|
-
// A non-object top-level JSON (array/string/number) is unexpected; treat as malformed and abort.
|
|
1223
|
-
console.warn(`${LOG_PREFIX}: models.json top-level is not an object; aborting auto-config`);
|
|
1224
|
-
result.skipped = true;
|
|
1225
|
-
return result;
|
|
1226
|
-
}
|
|
1227
|
-
} catch (error) {
|
|
1228
|
-
// Malformed JSON: do NOT overwrite the user's file.
|
|
1229
|
-
console.warn(`${LOG_PREFIX}: models.json is not valid JSON; aborting auto-config`, error);
|
|
1230
|
-
result.skipped = true;
|
|
1231
|
-
return result;
|
|
1232
|
-
}
|
|
1233
|
-
} else {
|
|
1234
|
-
parsed = { providers: {} };
|
|
1235
|
-
}
|
|
1236
|
-
|
|
1237
|
-
if (modelsJsonContainsDeepseek(parsed)) {
|
|
1238
|
-
result.deepseekPresent = true;
|
|
1239
|
-
return result;
|
|
1240
|
-
}
|
|
1241
|
-
|
|
1242
|
-
// Decide we will seed. Snapshot the old bytes (or empty marker) into a backup before mutating.
|
|
1243
|
-
const backupPath = `${MODELS_JSON_PATH}.bak.${Date.now()}`;
|
|
1244
|
-
try {
|
|
1245
|
-
mkdirSync(STATE_DIR, { recursive: true });
|
|
1246
|
-
writeFileSync(backupPath, originalBytes ?? "", "utf8");
|
|
1247
|
-
} catch (error) {
|
|
1248
|
-
console.warn(`${LOG_PREFIX}: failed to write models.json backup; aborting auto-config`, error);
|
|
1249
|
-
result.skipped = true;
|
|
1250
|
-
return result;
|
|
1251
|
-
}
|
|
1252
|
-
|
|
1253
|
-
const providersIn = asRecord(parsed.providers) ?? {};
|
|
1254
|
-
const merged: ModelsJsonShape = {
|
|
1255
|
-
...parsed,
|
|
1256
|
-
providers: { ...providersIn, deepseek: DEEPSEEK_SEED_PROVIDER },
|
|
1257
|
-
};
|
|
1258
|
-
|
|
1259
|
-
const tempPath = `${MODELS_JSON_PATH}.tmp.${process.pid}`;
|
|
1260
|
-
try {
|
|
1261
|
-
writeFileSync(tempPath, JSON.stringify(merged, null, 2) + "\n", "utf8");
|
|
1262
|
-
} catch (error) {
|
|
1263
|
-
console.warn(`${LOG_PREFIX}: failed to write models.json temp file; aborting auto-config`, error);
|
|
1264
|
-
result.skipped = true;
|
|
1265
|
-
return result;
|
|
1266
|
-
}
|
|
1267
|
-
|
|
1268
|
-
try {
|
|
1269
|
-
renameSync(tempPath, MODELS_JSON_PATH);
|
|
1270
|
-
} catch (error) {
|
|
1271
|
-
console.warn(
|
|
1272
|
-
`${LOG_PREFIX}: failed to atomically rename models.json (temp left at ${tempPath})`,
|
|
1273
|
-
error,
|
|
1274
|
-
);
|
|
1275
|
-
result.skipped = true;
|
|
1276
|
-
return result;
|
|
1277
|
-
}
|
|
1278
|
-
|
|
1279
|
-
result.seeded = true;
|
|
1280
|
-
result.deepseekPresent = true;
|
|
1281
|
-
notify?.(
|
|
1282
|
-
`${LOG_PREFIX}: seeded DeepSeek provider into ${MODELS_JSON_PATH} (backup at ${backupPath}). ` +
|
|
1283
|
-
`Set ${DEEPSEEK_API_KEY_ENV} to use it; or set ${NO_AUTO_CONFIG_ENV}=1 next time to opt out.`,
|
|
1284
|
-
"info",
|
|
1285
|
-
);
|
|
1286
|
-
return result;
|
|
1287
|
-
}
|
|
1288
|
-
|
|
1289
|
-
function emitDeepseekApiKeyHintIfNeeded(
|
|
1290
|
-
deepseekPresent: boolean,
|
|
1291
|
-
notify: (text: string, level: "info" | "warning") => void,
|
|
1292
|
-
): void {
|
|
1293
|
-
if (!deepseekPresent) return;
|
|
1294
|
-
const value = process.env[DEEPSEEK_API_KEY_ENV];
|
|
1295
|
-
if (typeof value === "string" && value.trim().length > 0) return;
|
|
1296
|
-
|
|
1297
|
-
notify(
|
|
1298
|
-
`${LOG_PREFIX}: ${DEEPSEEK_API_KEY_ENV} is not set. ` +
|
|
1299
|
-
`DeepSeek models in ${MODELS_JSON_PATH} reference $${DEEPSEEK_API_KEY_ENV}; ` +
|
|
1300
|
-
`export ${DEEPSEEK_API_KEY_ENV}=... in your shell to enable them.`,
|
|
1301
|
-
"info",
|
|
1302
|
-
);
|
|
1303
|
-
}
|
|
1304
1179
|
|
|
1305
1180
|
// Internal helpers exported only so the task verification script
|
|
1306
1181
|
// (.trellis/tasks/.../verify.ts) can exercise them. They are not part of the
|
|
@@ -1315,42 +1190,75 @@ export const __internals_for_tests = {
|
|
|
1315
1190
|
compressSkillsInSystemPrompt,
|
|
1316
1191
|
MIN_STABLE_CANDIDATE_LENGTH,
|
|
1317
1192
|
SKILL_COMPRESSION_MIN_COUNT,
|
|
1193
|
+
// OpenAI-family cache-key helpers
|
|
1194
|
+
addOpenAIPromptCacheKey,
|
|
1195
|
+
clampPromptCacheKey,
|
|
1196
|
+
hasEffectivePromptCacheKey,
|
|
1197
|
+
isNonEmptyString,
|
|
1198
|
+
shouldInjectOpenAIPromptCacheKey,
|
|
1199
|
+
isOpenAICompatibleApi,
|
|
1200
|
+
isOpenAIFamilyModel,
|
|
1201
|
+
isOpenAIFamilyAssistantMessage,
|
|
1202
|
+
isOpenAIFamilyToken,
|
|
1203
|
+
describeMissingOpenAIFamilyProxyCompat,
|
|
1204
|
+
isOfficialOpenAIBaseUrl,
|
|
1205
|
+
getModelIdNameTokenValues,
|
|
1206
|
+
getAssistantMessageModelTokenValues,
|
|
1207
|
+
getCompat,
|
|
1208
|
+
modelKey,
|
|
1209
|
+
// Cache stats helpers (module-level, usable from verify script)
|
|
1210
|
+
addUsageToCacheStats,
|
|
1211
|
+
formatCacheStats,
|
|
1212
|
+
emptyCacheStats,
|
|
1213
|
+
emptyAllCacheStats,
|
|
1214
|
+
parseCacheStats,
|
|
1215
|
+
parsePersistedCacheStats,
|
|
1318
1216
|
};
|
|
1319
1217
|
|
|
1320
1218
|
export default function (pi: ExtensionAPI) {
|
|
1321
1219
|
const warnedModels = new Set<string>();
|
|
1322
|
-
let
|
|
1220
|
+
let cacheStatsByModel: Record<string, CacheStats> = {};
|
|
1221
|
+
let cacheStatsLegacyFamily: Partial<Record<CacheProviderId, CacheStats>> = emptyAllCacheStats();
|
|
1323
1222
|
let lastStatusText: string | undefined;
|
|
1324
|
-
let latestPromptCacheKey: string | undefined;
|
|
1325
1223
|
let persistenceWarningShown = false;
|
|
1326
|
-
let
|
|
1224
|
+
let persistTimer: ReturnType<typeof setTimeout> | null = null;
|
|
1225
|
+
const PERSIST_DEBOUNCE_MS = 2000;
|
|
1327
1226
|
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1227
|
+
function getCacheStatsState(): CacheStatsState {
|
|
1228
|
+
return { statsByModel: cacheStatsByModel, legacyFamily: cacheStatsLegacyFamily };
|
|
1229
|
+
}
|
|
1230
|
+
|
|
1231
|
+
/** Look up active stats for a model, falling back to legacy family. */
|
|
1232
|
+
function getStatsForModel(model: PiModel | undefined, adapter: CacheProviderAdapter): CacheStats {
|
|
1233
|
+
if (model) {
|
|
1234
|
+
const key = modelKey(model);
|
|
1235
|
+
const existing = cacheStatsByModel[key];
|
|
1236
|
+
if (existing) return existing;
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
// Fallback: legacy family bucket — used when model key is unknown
|
|
1240
|
+
// or this model hasn't been seen yet in this session.
|
|
1241
|
+
const family = cacheStatsLegacyFamily[adapter.id];
|
|
1242
|
+
if (family) return family;
|
|
1243
|
+
|
|
1244
|
+
const created = emptyCacheStats();
|
|
1245
|
+
cacheStatsLegacyFamily[adapter.id] = created;
|
|
1246
|
+
return created;
|
|
1340
1247
|
}
|
|
1341
1248
|
|
|
1342
|
-
|
|
1343
|
-
|
|
1249
|
+
/** Get or create a stats entry for the given model key. */
|
|
1250
|
+
function getOrCreateStatsByModelKey(key: string): CacheStats {
|
|
1251
|
+
const existing = cacheStatsByModel[key];
|
|
1344
1252
|
if (existing) return existing;
|
|
1345
1253
|
|
|
1346
1254
|
const created = emptyCacheStats();
|
|
1347
|
-
|
|
1255
|
+
cacheStatsByModel[key] = created;
|
|
1348
1256
|
return created;
|
|
1349
1257
|
}
|
|
1350
1258
|
|
|
1351
1259
|
async function persistCacheStats(ctx?: ExtensionContext): Promise<void> {
|
|
1352
1260
|
try {
|
|
1353
|
-
await writePersistedCacheStats(
|
|
1261
|
+
await writePersistedCacheStats(getCacheStatsState());
|
|
1354
1262
|
} catch (error) {
|
|
1355
1263
|
console.warn(`${LOG_PREFIX}: failed to persist cache stats`, error);
|
|
1356
1264
|
if (!persistenceWarningShown) {
|
|
@@ -1363,14 +1271,48 @@ export default function (pi: ExtensionAPI) {
|
|
|
1363
1271
|
}
|
|
1364
1272
|
}
|
|
1365
1273
|
|
|
1274
|
+
/** Schedule a debounced persist. Coalesces rapid message_end writes
|
|
1275
|
+
* into a single disk write after PERSIST_DEBOUNCE_MS of silence.
|
|
1276
|
+
* In-memory stats remain instantly up-to-date for the footer; only
|
|
1277
|
+
* the on-disk persistence is delayed. */
|
|
1278
|
+
function schedulePersistCacheStats(ctx?: ExtensionContext): void {
|
|
1279
|
+
if (persistTimer !== null) clearTimeout(persistTimer);
|
|
1280
|
+
persistTimer = setTimeout(() => {
|
|
1281
|
+
persistTimer = null;
|
|
1282
|
+
persistCacheStats(ctx).catch((err) => {
|
|
1283
|
+
console.warn(`${LOG_PREFIX}: debounced persist failed`, err);
|
|
1284
|
+
});
|
|
1285
|
+
}, PERSIST_DEBOUNCE_MS);
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
/** Flush any pending debounced persist immediately (cancels timer + writes).
|
|
1289
|
+
* Used on reload and day-rollover where immediate durability matters. */
|
|
1290
|
+
async function flushPersistCacheStats(ctx?: ExtensionContext): Promise<void> {
|
|
1291
|
+
if (persistTimer !== null) {
|
|
1292
|
+
clearTimeout(persistTimer);
|
|
1293
|
+
persistTimer = null;
|
|
1294
|
+
}
|
|
1295
|
+
await persistCacheStats(ctx);
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1366
1298
|
async function rollOverStatsIfNeeded(ctx?: ExtensionContext): Promise<void> {
|
|
1367
1299
|
const day = currentLocalDay();
|
|
1368
1300
|
let changed = false;
|
|
1369
1301
|
|
|
1302
|
+
// Roll over per-model entries.
|
|
1303
|
+
for (const key of Object.keys(cacheStatsByModel)) {
|
|
1304
|
+
const stats = cacheStatsByModel[key];
|
|
1305
|
+
if (stats && stats.day !== day) {
|
|
1306
|
+
cacheStatsByModel[key] = emptyCacheStats(day);
|
|
1307
|
+
changed = true;
|
|
1308
|
+
}
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1311
|
+
// Roll over legacy family entries.
|
|
1370
1312
|
for (const id of CACHE_PROVIDER_IDS) {
|
|
1371
|
-
const stats =
|
|
1313
|
+
const stats = cacheStatsLegacyFamily[id];
|
|
1372
1314
|
if (stats && stats.day !== day) {
|
|
1373
|
-
|
|
1315
|
+
cacheStatsLegacyFamily[id] = emptyCacheStats(day);
|
|
1374
1316
|
changed = true;
|
|
1375
1317
|
}
|
|
1376
1318
|
}
|
|
@@ -1383,13 +1325,21 @@ export default function (pi: ExtensionAPI) {
|
|
|
1383
1325
|
|
|
1384
1326
|
async function restoreCacheStats(reason: string, ctx: ExtensionContext): Promise<void> {
|
|
1385
1327
|
if (reason === "reload") {
|
|
1386
|
-
|
|
1328
|
+
cacheStatsByModel = {};
|
|
1329
|
+
cacheStatsLegacyFamily = emptyAllCacheStats();
|
|
1387
1330
|
lastStatusText = undefined;
|
|
1388
|
-
await
|
|
1331
|
+
await flushPersistCacheStats(ctx);
|
|
1389
1332
|
return;
|
|
1390
1333
|
}
|
|
1391
1334
|
|
|
1392
|
-
|
|
1335
|
+
const persisted = await readPersistedCacheStats();
|
|
1336
|
+
if (persisted) {
|
|
1337
|
+
cacheStatsByModel = persisted.statsByModel;
|
|
1338
|
+
cacheStatsLegacyFamily = persisted.legacyFamily;
|
|
1339
|
+
} else {
|
|
1340
|
+
cacheStatsByModel = {};
|
|
1341
|
+
cacheStatsLegacyFamily = emptyAllCacheStats();
|
|
1342
|
+
}
|
|
1393
1343
|
lastStatusText = undefined;
|
|
1394
1344
|
await rollOverStatsIfNeeded(ctx);
|
|
1395
1345
|
}
|
|
@@ -1398,7 +1348,17 @@ export default function (pi: ExtensionAPI) {
|
|
|
1398
1348
|
await rollOverStatsIfNeeded(ctx);
|
|
1399
1349
|
|
|
1400
1350
|
const adapter = selectAdapterForModel(model);
|
|
1401
|
-
let statusText: string | undefined
|
|
1351
|
+
let statusText: string | undefined;
|
|
1352
|
+
if (adapter) {
|
|
1353
|
+
// Display only per-model scoped stats. A model that has never been
|
|
1354
|
+
// used in this session shows 0/0 rather than falling back to legacy
|
|
1355
|
+
// family aggregated stats (which could span different providers with
|
|
1356
|
+
// the same model-family name). The message_end hook populates
|
|
1357
|
+
// cacheStatsByModel[key] on first use with that model.
|
|
1358
|
+
const key = model ? modelKey(model) : undefined;
|
|
1359
|
+
const stats = key ? cacheStatsByModel[key] : undefined;
|
|
1360
|
+
statusText = formatCacheStats(adapter, stats ?? emptyCacheStats());
|
|
1361
|
+
}
|
|
1402
1362
|
|
|
1403
1363
|
// If optimizeSystemPrompt detected structural truncation on this or
|
|
1404
1364
|
// a recent turn, flag it once in the footer so the user knows to
|
|
@@ -1418,12 +1378,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
1418
1378
|
pi.on("session_start", async (event, ctx) => {
|
|
1419
1379
|
await restoreCacheStats(event.reason, ctx);
|
|
1420
1380
|
notifyCacheCompatIfNeeded(ctx.model, ctx, warnedModels);
|
|
1421
|
-
if (!apiKeyHintShown) {
|
|
1422
|
-
apiKeyHintShown = true;
|
|
1423
|
-
emitDeepseekApiKeyHintIfNeeded(autoConfig.deepseekPresent, (text, level) => {
|
|
1424
|
-
ctx.ui.notify(text, level);
|
|
1425
|
-
});
|
|
1426
|
-
}
|
|
1427
1381
|
await publishStatus(ctx);
|
|
1428
1382
|
});
|
|
1429
1383
|
|
|
@@ -1489,7 +1443,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
1489
1443
|
// cache key derived from `stablePrefix` reflects what actually
|
|
1490
1444
|
// ships to the provider.
|
|
1491
1445
|
const optimized = optimizeSystemPrompt(compressedPrompt, event.systemPromptOptions);
|
|
1492
|
-
latestPromptCacheKey = buildPromptCacheKey(optimized.stablePrefix);
|
|
1493
1446
|
|
|
1494
1447
|
if (optimized.changed && optimized.systemPrompt.trim().length > 0) {
|
|
1495
1448
|
return { systemPrompt: optimized.systemPrompt };
|
|
@@ -1510,10 +1463,11 @@ export default function (pi: ExtensionAPI) {
|
|
|
1510
1463
|
});
|
|
1511
1464
|
|
|
1512
1465
|
pi.on("before_provider_request", (event, ctx) => {
|
|
1513
|
-
if (!
|
|
1466
|
+
if (!shouldInjectOpenAIPromptCacheKey()) return undefined;
|
|
1514
1467
|
if (!isOpenAIFamilyModel(ctx.model)) return undefined;
|
|
1468
|
+
if (!isOpenAICompatibleApi(ctx.model?.api)) return undefined;
|
|
1515
1469
|
|
|
1516
|
-
return addOpenAIPromptCacheKey(event.payload,
|
|
1470
|
+
return addOpenAIPromptCacheKey(event.payload, getSessionPromptCacheKey(ctx));
|
|
1517
1471
|
});
|
|
1518
1472
|
|
|
1519
1473
|
pi.on("message_end", async (event, ctx) => {
|
|
@@ -1524,8 +1478,17 @@ export default function (pi: ExtensionAPI) {
|
|
|
1524
1478
|
if (!usage) return;
|
|
1525
1479
|
|
|
1526
1480
|
await rollOverStatsIfNeeded(ctx);
|
|
1527
|
-
|
|
1528
|
-
|
|
1481
|
+
|
|
1482
|
+
// Update stats scoped to the active model (provider/id key).
|
|
1483
|
+
// Falls back to legacy family when ctx.model is undefined.
|
|
1484
|
+
if (ctx.model) {
|
|
1485
|
+
const key = modelKey(ctx.model);
|
|
1486
|
+
addUsageToCacheStats(getOrCreateStatsByModelKey(key), usage);
|
|
1487
|
+
} else {
|
|
1488
|
+
addUsageToCacheStats(getStatsForModel(undefined, adapter), usage);
|
|
1489
|
+
}
|
|
1490
|
+
|
|
1491
|
+
schedulePersistCacheStats(ctx);
|
|
1529
1492
|
await publishStatus(ctx);
|
|
1530
1493
|
});
|
|
1531
1494
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-cache-optimizer",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.4.1",
|
|
4
4
|
"description": "Pi extension that improves provider-side KV/prompt cache hit rates (DeepSeek, OpenAI, Claude, Gemini) by reordering the system prompt, requesting long retention, and showing footer cache stats. Renamed from pi-deepseek-cache-optimizer.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|