pi-cache-optimizer 2.3.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -12
- package/README.zh-CN.md +10 -12
- package/index.ts +92 -265
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -62,15 +62,13 @@ State files under `~/.pi/agent/` are resolved via Node's `os.homedir()`, so on W
|
|
|
62
62
|
pi install npm:pi-cache-optimizer
|
|
63
63
|
```
|
|
64
64
|
|
|
65
|
-
3.
|
|
66
|
-
4. Export your DeepSeek API key in the same shell where you run `pi`:
|
|
65
|
+
3. Export your DeepSeek API key in the same shell where you run `pi` (if you use a DeepSeek model):
|
|
67
66
|
|
|
68
67
|
```bash
|
|
69
68
|
export DEEPSEEK_API_KEY='...'
|
|
70
69
|
```
|
|
71
70
|
|
|
72
|
-
|
|
73
|
-
5. Opt out of auto-seeding by exporting `PI_CACHE_OPTIMIZER_NO_AUTO_CONFIG=1` before launching Pi. With opt-out, no write or backup happens, and no provider entry is added or modified.
|
|
71
|
+
This extension never reads, stores, or prints the key value.
|
|
74
72
|
|
|
75
73
|
## Install
|
|
76
74
|
|
|
@@ -78,15 +76,15 @@ State files under `~/.pi/agent/` are resolved via Node's `os.homedir()`, so on W
|
|
|
78
76
|
pi install npm:pi-cache-optimizer
|
|
79
77
|
```
|
|
80
78
|
|
|
81
|
-
After installation, `PI_CACHE_RETENTION=long` is applied automatically, the system prompt is reordered and skills are compressed automatically, session-overview churn is stripped automatically,
|
|
79
|
+
After installation, `PI_CACHE_RETENTION=long` is applied automatically, the system prompt is reordered and skills are compressed automatically, session-overview churn is stripped automatically, and the footer shows cache stats after supported model-family responses with exposed usage.
|
|
82
80
|
|
|
83
81
|
## Opt-out
|
|
84
82
|
|
|
85
83
|
| Env var | Effect |
|
|
86
84
|
|---------|--------|
|
|
87
|
-
| `PI_CACHE_OPTIMIZER_NO_AUTO_CONFIG=1` | Skip DeepSeek `models.json` auto-seed |
|
|
88
85
|
| `PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION=1` | Keep pi's verbose `<available_skills>` XML (opt out of one-line index) |
|
|
89
|
-
| `PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY=
|
|
86
|
+
| `PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY=0` | Disable the OpenAI-family `prompt_cache_key` fallback (default is enabled) |
|
|
87
|
+
| `PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY=1` | Disable the OpenAI-family `prompt_cache_key` fallback |
|
|
90
88
|
|
|
91
89
|
## Uninstall
|
|
92
90
|
|
|
@@ -114,7 +112,7 @@ rm ~/.pi/agent/pi-cache-optimizer-stats.json
|
|
|
114
112
|
rm -f ~/.pi/agent/deepseek-cache-optimizer-stats.json
|
|
115
113
|
```
|
|
116
114
|
|
|
117
|
-
|
|
115
|
+
|
|
118
116
|
|
|
119
117
|
## Footer cache stats
|
|
120
118
|
|
|
@@ -193,7 +191,7 @@ After: [stable tools + rules | dynamic git status | task context]
|
|
|
193
191
|
↓ stable prefix → higher chance of cache reuse
|
|
194
192
|
```
|
|
195
193
|
|
|
196
|
-
Pi itself decides whether to send cache-related fields such as `prompt_cache_retention`, session-affinity headers, or Anthropic-style `cache_control` based on model compat and `PI_CACHE_RETENTION`.
|
|
194
|
+
Pi itself decides whether to send cache-related fields such as `prompt_cache_retention`, session-affinity headers, or Anthropic-style `cache_control` based on model compat and `PI_CACHE_RETENTION`. This extension now adds only one conservative request-body fallback by default: for OpenAI-family models using OpenAI-compatible Pi APIs, it fills a missing or blank top-level `prompt_cache_key` with the Pi session id and never overwrites an existing non-empty key. The extension does not fake cache hits; it helps configuration, improves stable-prefix probability, and summarizes exposed usage in the footer.
|
|
197
195
|
|
|
198
196
|
## Improving cache hit rate
|
|
199
197
|
|
|
@@ -208,7 +206,7 @@ What the extension does automatically:
|
|
|
208
206
|
Provider notes:
|
|
209
207
|
|
|
210
208
|
- DeepSeek: current behavior remains the reference path. Stable prefix ordering plus long-retention/session-affinity compat gives the best chance of automatic KV prefix reuse.
|
|
211
|
-
- OpenAI-family: prompt caching is automatic only on supported upstreams and sufficiently long prompts. Keep static instructions, tools, examples, and specs before changing user/task context. Pi owns retention transport by default.
|
|
209
|
+
- OpenAI-family: prompt caching is automatic only on supported upstreams and sufficiently long prompts. Keep static instructions, tools, examples, and specs before changing user/task context. Pi owns retention transport by default. For OpenAI-compatible Pi APIs, the extension fills a missing or blank top-level `prompt_cache_key` with the Pi session id (matching Pi core's official OpenAI behavior) and never overwrites an existing non-empty `prompt_cache_key` / `promptCacheKey`. Disable this fallback with `PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY=1` or `PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY=0`. Unsupported OpenAI-compatible proxies may reject unknown fields; custom APIs are not targeted.
|
|
212
210
|
- Claude: prompt caching depends on Anthropic `cache_control` breakpoints. This extension does not inject breakpoints itself; for compatible endpoints, configure Pi compat such as `cacheControlFormat: "anthropic"` only when the endpoint supports it.
|
|
213
211
|
- Gemini/Vertex: implicit caching benefits from repeated large stable prefixes. This extension does not create explicit `cachedContents` resources or store cache resource names.
|
|
214
212
|
- Proxies/aggregators: fix upstream routing/provider order where possible. Cache hit rates are unreliable if the same model id/name can route to different upstreams.
|
|
@@ -225,9 +223,9 @@ This package now has provider-family stats adapters, but it still avoids blind g
|
|
|
225
223
|
|
|
226
224
|
## Out of scope for this release
|
|
227
225
|
|
|
228
|
-
- Broad/
|
|
226
|
+
- Broad/provider-agnostic request-body mutation or cache-control injection. The only default request-body fallback is OpenAI-family `prompt_cache_key` on OpenAI-compatible APIs, sourced from the Pi session id and skipped when an effective key already exists.
|
|
229
227
|
- Injecting Anthropic `cache_control` markers.
|
|
230
|
-
- Sending OpenAI `prompt_cache_key`
|
|
228
|
+
- Sending OpenAI `prompt_cache_key` into custom/non-OpenAI-compatible APIs; the fallback is gated to OpenAI-family id/name plus `openai-completions` / `openai-responses`.
|
|
231
229
|
- Overriding OpenAI `prompt_cache_retention` outside Pi's own compat handling.
|
|
232
230
|
- Creating Gemini explicit `cachedContents` resources or persisting cache resource names.
|
|
233
231
|
- Claiming stats for providers that do not expose reliable cache usage.
|
package/README.zh-CN.md
CHANGED
|
@@ -65,15 +65,13 @@ Generic OpenAI-compatible 代理**不会**仅因为使用 OpenAI 形状 API 或
|
|
|
65
65
|
pi install npm:pi-cache-optimizer
|
|
66
66
|
```
|
|
67
67
|
|
|
68
|
-
3.
|
|
69
|
-
4. 在运行 `pi` 的同一个 shell 中导出 DeepSeek API key:
|
|
68
|
+
3. 如果使用 DeepSeek 模型,请在运行 `pi` 的同一个 shell 中导出 DeepSeek API key:
|
|
70
69
|
|
|
71
70
|
```bash
|
|
72
71
|
export DEEPSEEK_API_KEY='...'
|
|
73
72
|
```
|
|
74
73
|
|
|
75
|
-
|
|
76
|
-
5. 如需退出自动写入,请在启动 Pi 之前设 `PI_CACHE_OPTIMIZER_NO_AUTO_CONFIG=1`。退出后不会产生任何写入或备份,也不会新增 provider 条目。
|
|
74
|
+
本扩展**不会**读取、存储或打印 key 的值。
|
|
77
75
|
|
|
78
76
|
## 安装
|
|
79
77
|
|
|
@@ -81,15 +79,15 @@ Generic OpenAI-compatible 代理**不会**仅因为使用 OpenAI 形状 API 或
|
|
|
81
79
|
pi install npm:pi-cache-optimizer
|
|
82
80
|
```
|
|
83
81
|
|
|
84
|
-
安装后 `PI_CACHE_RETENTION=long` **自动生效**,system prompt **自动重组**、skills 自动压缩、session-overview
|
|
82
|
+
安装后 `PI_CACHE_RETENTION=long` **自动生效**,system prompt **自动重组**、skills 自动压缩、session-overview 动态尾字段自动剥离;受支持 model family 的响应完成且暴露 usage 后,底部状态栏会显示缓存统计。
|
|
85
83
|
|
|
86
84
|
## 退出(Opt-out)
|
|
87
85
|
|
|
88
86
|
| 环境变量 | 作用 |
|
|
89
87
|
|---------|------|
|
|
90
|
-
| `PI_CACHE_OPTIMIZER_NO_AUTO_CONFIG=1` | 跳过 `models.json` DeepSeek 自动写入 |
|
|
91
88
|
| `PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION=1` | 保留 pi 的 verbose `<available_skills>` XML(退出一行索引模式) |
|
|
92
|
-
| `PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY=
|
|
89
|
+
| `PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY=0` | 禁用 OpenAI-family `prompt_cache_key` 兜底(默认启用) |
|
|
90
|
+
| `PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY=1` | 禁用 OpenAI-family `prompt_cache_key` 兜底 |
|
|
93
91
|
|
|
94
92
|
## 卸载
|
|
95
93
|
|
|
@@ -117,7 +115,7 @@ rm ~/.pi/agent/pi-cache-optimizer-stats.json
|
|
|
117
115
|
rm -f ~/.pi/agent/deepseek-cache-optimizer-stats.json
|
|
118
116
|
```
|
|
119
117
|
|
|
120
|
-
|
|
118
|
+
|
|
121
119
|
|
|
122
120
|
## 底部缓存统计
|
|
123
121
|
|
|
@@ -196,7 +194,7 @@ Provider 缓存通常依赖精确或近似精确的前缀匹配。Pi 的 system
|
|
|
196
194
|
↓ 稳定前缀不变 → 更容易命中缓存
|
|
197
195
|
```
|
|
198
196
|
|
|
199
|
-
Pi 本身还会根据模型 compat 和 `PI_CACHE_RETENTION` 决定是否发送缓存相关字段,例如 `prompt_cache_retention`、session affinity headers 或 Anthropic-style `cache_control
|
|
197
|
+
Pi 本身还会根据模型 compat 和 `PI_CACHE_RETENTION` 决定是否发送缓存相关字段,例如 `prompt_cache_retention`、session affinity headers 或 Anthropic-style `cache_control`。本扩展现在默认只做一个保守的 request-body 兜底:对使用 OpenAI-compatible Pi API 的 OpenAI-family 模型,当顶层 `prompt_cache_key` 缺失或为空时,用 Pi session id 补上,并且不会覆盖已有的非空 key。本扩展不伪造缓存命中,只帮助配置、提高稳定前缀概率,并把已暴露的 usage 汇总到底部状态栏。
|
|
200
198
|
|
|
201
199
|
## 提高 cache 命中率
|
|
202
200
|
|
|
@@ -211,7 +209,7 @@ Pi 本身还会根据模型 compat 和 `PI_CACHE_RETENTION` 决定是否发送
|
|
|
211
209
|
各 provider 注意点:
|
|
212
210
|
|
|
213
211
|
- DeepSeek:现有行为仍是参考路径。稳定前缀排序,加上 long-retention / session-affinity compat,最有利于自动 KV prefix 复用。
|
|
214
|
-
- OpenAI-family:prompt caching 只会在真实上游支持且 prompt 足够长时自动生效。请尽量把静态 instructions、tools、examples、specs 放在变化的 user/task context 前面。retention 传输默认由 Pi
|
|
212
|
+
- OpenAI-family:prompt caching 只会在真实上游支持且 prompt 足够长时自动生效。请尽量把静态 instructions、tools、examples、specs 放在变化的 user/task context 前面。retention 传输默认由 Pi 负责。对 OpenAI-compatible Pi API,本扩展会用 Pi session id 补齐缺失或空白的顶层 `prompt_cache_key`(与 Pi core 官方 OpenAI 行为对齐),并且不会覆盖已有非空的 `prompt_cache_key` / `promptCacheKey`。可用 `PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY=1` 或 `PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY=0` 禁用该兜底。不支持该字段的 OpenAI-compatible 代理可能拒绝请求;custom API 不会被注入。
|
|
215
213
|
- Claude:prompt caching 依赖 Anthropic `cache_control` breakpoints。本扩展不会自行注入 breakpoint;对兼容 endpoint,只在 endpoint 明确支持时配置 Pi compat,例如 `cacheControlFormat: "anthropic"`。
|
|
216
214
|
- Gemini/Vertex:implicit caching 受益于重复的大型稳定前缀。本扩展不会创建 explicit `cachedContents` resources,也不会保存 cache resource names。
|
|
217
215
|
- Proxies/aggregators:尽量固定上游 routing/provider order。如果同一个 model id/name 可能路由到不同上游,cache hit rate 会不稳定。
|
|
@@ -229,9 +227,9 @@ Pi 本身还会根据模型 compat 和 `PI_CACHE_RETENTION` 决定是否发送
|
|
|
229
227
|
|
|
230
228
|
## 本版本不包含
|
|
231
229
|
|
|
232
|
-
-
|
|
230
|
+
- 广泛/provider-agnostic 修改请求体,或做 cache-control 注入。唯一默认 request-body 兜底是 OpenAI-family 在 OpenAI-compatible API 上使用 Pi session id 的 `prompt_cache_key`,且已有有效 key 时会跳过。
|
|
233
231
|
- 注入 Anthropic `cache_control` markers。
|
|
234
|
-
-
|
|
232
|
+
- 向 custom / 非 OpenAI-compatible API 发送 OpenAI `prompt_cache_key`;该兜底同时要求 model id/name 属于 OpenAI-family,且 API 是 `openai-completions` / `openai-responses`。
|
|
235
233
|
- 在 Pi 自己的 compat 处理之外覆盖 OpenAI `prompt_cache_retention`。
|
|
236
234
|
- 创建 Gemini explicit `cachedContents` resources 或持久化 cache resource names。
|
|
237
235
|
- 对不暴露可靠 cache usage 的 provider 声称统计支持。
|
package/index.ts
CHANGED
|
@@ -1,10 +1,3 @@
|
|
|
1
|
-
import { createHash } from "node:crypto";
|
|
2
|
-
import {
|
|
3
|
-
mkdirSync,
|
|
4
|
-
readFileSync,
|
|
5
|
-
renameSync,
|
|
6
|
-
writeFileSync,
|
|
7
|
-
} from "node:fs";
|
|
8
1
|
import { mkdir, readFile, rename, unlink, writeFile } from "node:fs/promises";
|
|
9
2
|
import { homedir } from "node:os";
|
|
10
3
|
import { dirname, join } from "node:path";
|
|
@@ -16,10 +9,8 @@ import type { BuildSystemPromptOptions, ExtensionAPI, ExtensionContext } from "@
|
|
|
16
9
|
* What it does:
|
|
17
10
|
* 1. Reorders Pi's system prompt so stable content is sent before dynamic context.
|
|
18
11
|
* 2. Sets PI_CACHE_RETENTION=long at extension load time.
|
|
19
|
-
* 3.
|
|
20
|
-
*
|
|
21
|
-
* 4. Warns once for provider/model cache compat gaps where the signal is conservative.
|
|
22
|
-
* 5. Shows lightweight persisted provider-specific cache stats in Pi's footer.
|
|
12
|
+
* 3. Warns once for provider/model cache compat gaps where the signal is conservative.
|
|
13
|
+
* 4. Shows lightweight persisted provider-specific cache stats in Pi's footer.
|
|
23
14
|
*
|
|
24
15
|
* Provider prompt/KV caches are provider-side and best-effort. This extension improves
|
|
25
16
|
* the odds of cache hits; it cannot guarantee hits, especially through proxies.
|
|
@@ -41,14 +32,11 @@ const STATUS_KEY = "pi-cache-stats";
|
|
|
41
32
|
const STATE_DIR = join(homedir(), ".pi", "agent");
|
|
42
33
|
const STATE_FILE_PATH = join(STATE_DIR, "pi-cache-optimizer-stats.json");
|
|
43
34
|
const LEGACY_STATE_FILE_PATH = join(STATE_DIR, "deepseek-cache-optimizer-stats.json");
|
|
44
|
-
const MODELS_JSON_PATH = join(STATE_DIR, "models.json");
|
|
45
|
-
|
|
46
35
|
const CACHE_PROVIDER_IDS: CacheProviderId[] = ["deepseek", "openai", "claude", "gemini"];
|
|
47
36
|
const OPENAI_CACHE_KEY_ENV = "PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY";
|
|
48
|
-
const
|
|
49
|
-
const
|
|
37
|
+
const NO_OPENAI_CACHE_KEY_ENV = "PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY";
|
|
38
|
+
const OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH = 64;
|
|
50
39
|
const NO_SKILL_COMPRESSION_ENV = "PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION";
|
|
51
|
-
const DEEPSEEK_API_KEY_ENV = "DEEPSEEK_API_KEY";
|
|
52
40
|
|
|
53
41
|
// WORM-flag: if optimizeSystemPrompt ever detects that its blind-replace
|
|
54
42
|
// logic has accidentally truncated a structural marker (any XML tag or
|
|
@@ -511,12 +499,17 @@ function optimizeSystemPrompt(
|
|
|
511
499
|
};
|
|
512
500
|
}
|
|
513
501
|
|
|
514
|
-
function
|
|
515
|
-
const normalized =
|
|
502
|
+
function clampPromptCacheKey(key: string | undefined): string | undefined {
|
|
503
|
+
const normalized = key?.trim();
|
|
516
504
|
if (!normalized) return undefined;
|
|
517
505
|
|
|
518
|
-
const
|
|
519
|
-
return
|
|
506
|
+
const chars = Array.from(normalized);
|
|
507
|
+
if (chars.length <= OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH) return normalized;
|
|
508
|
+
return chars.slice(0, OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH).join("");
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
function getSessionPromptCacheKey(ctx: ExtensionContext): string | undefined {
|
|
512
|
+
return clampPromptCacheKey(ctx.sessionManager.getSessionId());
|
|
520
513
|
}
|
|
521
514
|
|
|
522
515
|
function asRecord(value: unknown): UnknownRecord | undefined {
|
|
@@ -547,8 +540,16 @@ function isEnabledEnv(value: string | undefined): boolean {
|
|
|
547
540
|
return normalized === "1" || normalized === "true" || normalized === "yes" || normalized === "on";
|
|
548
541
|
}
|
|
549
542
|
|
|
550
|
-
function
|
|
551
|
-
|
|
543
|
+
function isDisabledEnv(value: string | undefined): boolean {
|
|
544
|
+
if (!value) return false;
|
|
545
|
+
const normalized = value.trim().toLowerCase();
|
|
546
|
+
return normalized === "0" || normalized === "false" || normalized === "no" || normalized === "off";
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
function shouldInjectOpenAIPromptCacheKey(): boolean {
|
|
550
|
+
if (isEnabledEnv(process.env[NO_OPENAI_CACHE_KEY_ENV])) return false;
|
|
551
|
+
if (isDisabledEnv(process.env[OPENAI_CACHE_KEY_ENV])) return false;
|
|
552
|
+
return true;
|
|
552
553
|
}
|
|
553
554
|
|
|
554
555
|
function isAssistantMessage(message: unknown): boolean {
|
|
@@ -796,13 +797,51 @@ function normalizeWithFallback(
|
|
|
796
797
|
|
|
797
798
|
function addOpenAIPromptCacheKey(payload: unknown, cacheKey: string | undefined): unknown | undefined {
|
|
798
799
|
const record = asRecord(payload);
|
|
799
|
-
|
|
800
|
+
const normalizedCacheKey = clampPromptCacheKey(cacheKey);
|
|
801
|
+
if (!record || !normalizedCacheKey) return undefined;
|
|
800
802
|
|
|
801
|
-
if (
|
|
803
|
+
if (hasEffectivePromptCacheKey(record)) {
|
|
802
804
|
return undefined;
|
|
803
805
|
}
|
|
804
806
|
|
|
805
|
-
return { ...record, prompt_cache_key:
|
|
807
|
+
return { ...record, prompt_cache_key: normalizedCacheKey };
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
function hasEffectivePromptCacheKey(record: UnknownRecord): boolean {
|
|
811
|
+
return isNonEmptyString(record.prompt_cache_key) || isNonEmptyString(record.promptCacheKey);
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
function isNonEmptyString(value: unknown): boolean {
|
|
815
|
+
return typeof value === "string" && value.trim().length > 0;
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
function isOfficialOpenAIBaseUrl(model: PiModel): boolean {
|
|
819
|
+
const value = lower(model.baseUrl).trim();
|
|
820
|
+
if (!value) return false;
|
|
821
|
+
|
|
822
|
+
try {
|
|
823
|
+
return new URL(value).hostname === "api.openai.com";
|
|
824
|
+
} catch {
|
|
825
|
+
return value === "api.openai.com" || value.startsWith("api.openai.com/");
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
function describeMissingOpenAIFamilyProxyCompat(model: PiModel): string[] {
|
|
830
|
+
const compat = getCompat(model);
|
|
831
|
+
const missing: string[] = [];
|
|
832
|
+
|
|
833
|
+
if (!isOpenAIFamilyModel(model)) return missing;
|
|
834
|
+
if (model.api !== "openai-completions") return missing;
|
|
835
|
+
if (isOfficialOpenAIBaseUrl(model)) return missing;
|
|
836
|
+
|
|
837
|
+
if (compat.supportsLongCacheRetention !== true) {
|
|
838
|
+
missing.push("supportsLongCacheRetention");
|
|
839
|
+
}
|
|
840
|
+
if (compat.sendSessionAffinityHeaders !== true) {
|
|
841
|
+
missing.push("sendSessionAffinityHeaders");
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
return missing;
|
|
806
845
|
}
|
|
807
846
|
|
|
808
847
|
function describeMissingDeepSeekCompat(model: PiModel): string[] {
|
|
@@ -881,6 +920,15 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
|
|
|
881
920
|
normalizeUsage(message) {
|
|
882
921
|
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
883
922
|
},
|
|
923
|
+
warningText(model) {
|
|
924
|
+
const missing = describeMissingOpenAIFamilyProxyCompat(model);
|
|
925
|
+
if (missing.length === 0) return undefined;
|
|
926
|
+
|
|
927
|
+
return (
|
|
928
|
+
`💡 pi-cache-optimizer: ${modelKey(model)} looks like a third-party GPT/OpenAI-compatible proxy but merged compat lacks ${missing.join(" and ")}. ` +
|
|
929
|
+
`For better cache locality, add compat: { "supportsLongCacheRetention": true, "sendSessionAffinityHeaders": true } in ~/.pi/agent/models.json when the endpoint supports these fields.`
|
|
930
|
+
);
|
|
931
|
+
},
|
|
884
932
|
},
|
|
885
933
|
{
|
|
886
934
|
id: "gemini",
|
|
@@ -1085,222 +1133,7 @@ async function writePersistedCacheStats(statsByProvider: Partial<Record<CachePro
|
|
|
1085
1133
|
await rename(tempPath, STATE_FILE_PATH);
|
|
1086
1134
|
}
|
|
1087
1135
|
|
|
1088
|
-
// ============================================================
|
|
1089
|
-
// models.json auto-config (DeepSeek seed)
|
|
1090
|
-
// ============================================================
|
|
1091
1136
|
|
|
1092
|
-
type ModelsJsonShape = {
|
|
1093
|
-
providers?: UnknownRecord;
|
|
1094
|
-
} & UnknownRecord;
|
|
1095
|
-
|
|
1096
|
-
const DEEPSEEK_SEED_PROVIDER = {
|
|
1097
|
-
baseUrl: "https://api.deepseek.com",
|
|
1098
|
-
api: "openai-completions",
|
|
1099
|
-
apiKey: "$DEEPSEEK_API_KEY",
|
|
1100
|
-
models: [
|
|
1101
|
-
{
|
|
1102
|
-
id: "deepseek-v4-pro",
|
|
1103
|
-
name: "DeepSeek V4 Pro",
|
|
1104
|
-
contextWindow: 1_000_000,
|
|
1105
|
-
maxTokens: 384_000,
|
|
1106
|
-
input: ["text"],
|
|
1107
|
-
reasoning: true,
|
|
1108
|
-
cost: { input: 1.74, output: 3.48, cacheRead: 0.145, cacheWrite: 0 },
|
|
1109
|
-
compat: {
|
|
1110
|
-
requiresReasoningContentOnAssistantMessages: true,
|
|
1111
|
-
thinkingFormat: "deepseek",
|
|
1112
|
-
supportsLongCacheRetention: true,
|
|
1113
|
-
sendSessionAffinityHeaders: true,
|
|
1114
|
-
reasoningEffortMap: {
|
|
1115
|
-
minimal: "high",
|
|
1116
|
-
low: "high",
|
|
1117
|
-
medium: "high",
|
|
1118
|
-
high: "high",
|
|
1119
|
-
xhigh: "max",
|
|
1120
|
-
},
|
|
1121
|
-
},
|
|
1122
|
-
},
|
|
1123
|
-
{
|
|
1124
|
-
id: "deepseek-v4-flash",
|
|
1125
|
-
name: "DeepSeek V4 Flash",
|
|
1126
|
-
contextWindow: 1_000_000,
|
|
1127
|
-
maxTokens: 384_000,
|
|
1128
|
-
input: ["text"],
|
|
1129
|
-
reasoning: true,
|
|
1130
|
-
cost: { input: 0.14, output: 0.28, cacheRead: 0.028, cacheWrite: 0 },
|
|
1131
|
-
compat: {
|
|
1132
|
-
requiresReasoningContentOnAssistantMessages: true,
|
|
1133
|
-
thinkingFormat: "deepseek",
|
|
1134
|
-
supportsLongCacheRetention: true,
|
|
1135
|
-
sendSessionAffinityHeaders: true,
|
|
1136
|
-
reasoningEffortMap: {
|
|
1137
|
-
minimal: "high",
|
|
1138
|
-
low: "high",
|
|
1139
|
-
medium: "high",
|
|
1140
|
-
high: "high",
|
|
1141
|
-
xhigh: "max",
|
|
1142
|
-
},
|
|
1143
|
-
},
|
|
1144
|
-
},
|
|
1145
|
-
],
|
|
1146
|
-
} as const;
|
|
1147
|
-
|
|
1148
|
-
function modelsJsonContainsDeepseek(parsed: ModelsJsonShape): boolean {
|
|
1149
|
-
const providers = asRecord(parsed.providers);
|
|
1150
|
-
if (!providers) return false;
|
|
1151
|
-
|
|
1152
|
-
// Respect user intent: a provider key literally named "deepseek" (case-insensitive)
|
|
1153
|
-
// means the user already declared their own DeepSeek block, even if its models list is empty.
|
|
1154
|
-
for (const key of Object.keys(providers)) {
|
|
1155
|
-
if (key.toLowerCase() === "deepseek") return true;
|
|
1156
|
-
}
|
|
1157
|
-
|
|
1158
|
-
for (const providerValue of Object.values(providers)) {
|
|
1159
|
-
const provider = asRecord(providerValue);
|
|
1160
|
-
if (!provider) continue;
|
|
1161
|
-
const models = provider.models;
|
|
1162
|
-
if (!Array.isArray(models)) continue;
|
|
1163
|
-
for (const model of models) {
|
|
1164
|
-
const record = asRecord(model);
|
|
1165
|
-
if (!record) continue;
|
|
1166
|
-
if (lower(record.id).includes("deepseek") || lower(record.name).includes("deepseek")) {
|
|
1167
|
-
return true;
|
|
1168
|
-
}
|
|
1169
|
-
}
|
|
1170
|
-
}
|
|
1171
|
-
|
|
1172
|
-
return false;
|
|
1173
|
-
}
|
|
1174
|
-
|
|
1175
|
-
type EnsureDeepseekResult = {
|
|
1176
|
-
// Whether some DeepSeek-like model is now present in models.json (either pre-existing or just-seeded).
|
|
1177
|
-
deepseekPresent: boolean;
|
|
1178
|
-
// Whether we just wrote the seed in this activation.
|
|
1179
|
-
seeded: boolean;
|
|
1180
|
-
// Whether auto-config was deliberately skipped (env opt-out or malformed file).
|
|
1181
|
-
skipped: boolean;
|
|
1182
|
-
};
|
|
1183
|
-
|
|
1184
|
-
function ensureDeepseekConfigured(notify?: (text: string, level: "info" | "warning") => void): EnsureDeepseekResult {
|
|
1185
|
-
const result: EnsureDeepseekResult = { deepseekPresent: false, seeded: false, skipped: false };
|
|
1186
|
-
|
|
1187
|
-
if (isEnabledEnv(process.env[NO_AUTO_CONFIG_ENV])) {
|
|
1188
|
-
result.skipped = true;
|
|
1189
|
-
// Even when opted out, callers still need to know whether DeepSeek is present so the
|
|
1190
|
-
// API-key hint can fire. Read-only inspection only; no writes.
|
|
1191
|
-
try {
|
|
1192
|
-
const raw = readFileSync(MODELS_JSON_PATH, "utf8");
|
|
1193
|
-
const parsed = JSON.parse(raw) as ModelsJsonShape;
|
|
1194
|
-
if (parsed && typeof parsed === "object") {
|
|
1195
|
-
result.deepseekPresent = modelsJsonContainsDeepseek(parsed);
|
|
1196
|
-
}
|
|
1197
|
-
} catch {
|
|
1198
|
-
// ignore: missing or unreadable file means "not present"
|
|
1199
|
-
}
|
|
1200
|
-
return result;
|
|
1201
|
-
}
|
|
1202
|
-
|
|
1203
|
-
let originalBytes: string | undefined;
|
|
1204
|
-
let parsed: ModelsJsonShape;
|
|
1205
|
-
try {
|
|
1206
|
-
originalBytes = readFileSync(MODELS_JSON_PATH, "utf8");
|
|
1207
|
-
} catch (error) {
|
|
1208
|
-
if (getErrorCode(error) !== "ENOENT") {
|
|
1209
|
-
console.warn(`${LOG_PREFIX}: failed to read models.json; skipping auto-config`, error);
|
|
1210
|
-
result.skipped = true;
|
|
1211
|
-
return result;
|
|
1212
|
-
}
|
|
1213
|
-
parsed = { providers: {} };
|
|
1214
|
-
}
|
|
1215
|
-
|
|
1216
|
-
if (originalBytes !== undefined) {
|
|
1217
|
-
try {
|
|
1218
|
-
const decoded = JSON.parse(originalBytes) as unknown;
|
|
1219
|
-
if (decoded && typeof decoded === "object" && !Array.isArray(decoded)) {
|
|
1220
|
-
parsed = decoded as ModelsJsonShape;
|
|
1221
|
-
} else {
|
|
1222
|
-
// A non-object top-level JSON (array/string/number) is unexpected; treat as malformed and abort.
|
|
1223
|
-
console.warn(`${LOG_PREFIX}: models.json top-level is not an object; aborting auto-config`);
|
|
1224
|
-
result.skipped = true;
|
|
1225
|
-
return result;
|
|
1226
|
-
}
|
|
1227
|
-
} catch (error) {
|
|
1228
|
-
// Malformed JSON: do NOT overwrite the user's file.
|
|
1229
|
-
console.warn(`${LOG_PREFIX}: models.json is not valid JSON; aborting auto-config`, error);
|
|
1230
|
-
result.skipped = true;
|
|
1231
|
-
return result;
|
|
1232
|
-
}
|
|
1233
|
-
} else {
|
|
1234
|
-
parsed = { providers: {} };
|
|
1235
|
-
}
|
|
1236
|
-
|
|
1237
|
-
if (modelsJsonContainsDeepseek(parsed)) {
|
|
1238
|
-
result.deepseekPresent = true;
|
|
1239
|
-
return result;
|
|
1240
|
-
}
|
|
1241
|
-
|
|
1242
|
-
// Decide we will seed. Snapshot the old bytes (or empty marker) into a backup before mutating.
|
|
1243
|
-
const backupPath = `${MODELS_JSON_PATH}.bak.${Date.now()}`;
|
|
1244
|
-
try {
|
|
1245
|
-
mkdirSync(STATE_DIR, { recursive: true });
|
|
1246
|
-
writeFileSync(backupPath, originalBytes ?? "", "utf8");
|
|
1247
|
-
} catch (error) {
|
|
1248
|
-
console.warn(`${LOG_PREFIX}: failed to write models.json backup; aborting auto-config`, error);
|
|
1249
|
-
result.skipped = true;
|
|
1250
|
-
return result;
|
|
1251
|
-
}
|
|
1252
|
-
|
|
1253
|
-
const providersIn = asRecord(parsed.providers) ?? {};
|
|
1254
|
-
const merged: ModelsJsonShape = {
|
|
1255
|
-
...parsed,
|
|
1256
|
-
providers: { ...providersIn, deepseek: DEEPSEEK_SEED_PROVIDER },
|
|
1257
|
-
};
|
|
1258
|
-
|
|
1259
|
-
const tempPath = `${MODELS_JSON_PATH}.tmp.${process.pid}`;
|
|
1260
|
-
try {
|
|
1261
|
-
writeFileSync(tempPath, JSON.stringify(merged, null, 2) + "\n", "utf8");
|
|
1262
|
-
} catch (error) {
|
|
1263
|
-
console.warn(`${LOG_PREFIX}: failed to write models.json temp file; aborting auto-config`, error);
|
|
1264
|
-
result.skipped = true;
|
|
1265
|
-
return result;
|
|
1266
|
-
}
|
|
1267
|
-
|
|
1268
|
-
try {
|
|
1269
|
-
renameSync(tempPath, MODELS_JSON_PATH);
|
|
1270
|
-
} catch (error) {
|
|
1271
|
-
console.warn(
|
|
1272
|
-
`${LOG_PREFIX}: failed to atomically rename models.json (temp left at ${tempPath})`,
|
|
1273
|
-
error,
|
|
1274
|
-
);
|
|
1275
|
-
result.skipped = true;
|
|
1276
|
-
return result;
|
|
1277
|
-
}
|
|
1278
|
-
|
|
1279
|
-
result.seeded = true;
|
|
1280
|
-
result.deepseekPresent = true;
|
|
1281
|
-
notify?.(
|
|
1282
|
-
`${LOG_PREFIX}: seeded DeepSeek provider into ${MODELS_JSON_PATH} (backup at ${backupPath}). ` +
|
|
1283
|
-
`Set ${DEEPSEEK_API_KEY_ENV} to use it; or set ${NO_AUTO_CONFIG_ENV}=1 next time to opt out.`,
|
|
1284
|
-
"info",
|
|
1285
|
-
);
|
|
1286
|
-
return result;
|
|
1287
|
-
}
|
|
1288
|
-
|
|
1289
|
-
function emitDeepseekApiKeyHintIfNeeded(
|
|
1290
|
-
deepseekPresent: boolean,
|
|
1291
|
-
notify: (text: string, level: "info" | "warning") => void,
|
|
1292
|
-
): void {
|
|
1293
|
-
if (!deepseekPresent) return;
|
|
1294
|
-
const value = process.env[DEEPSEEK_API_KEY_ENV];
|
|
1295
|
-
if (typeof value === "string" && value.trim().length > 0) return;
|
|
1296
|
-
|
|
1297
|
-
notify(
|
|
1298
|
-
`${LOG_PREFIX}: ${DEEPSEEK_API_KEY_ENV} is not set. ` +
|
|
1299
|
-
`DeepSeek models in ${MODELS_JSON_PATH} reference $${DEEPSEEK_API_KEY_ENV}; ` +
|
|
1300
|
-
`export ${DEEPSEEK_API_KEY_ENV}=... in your shell to enable them.`,
|
|
1301
|
-
"info",
|
|
1302
|
-
);
|
|
1303
|
-
}
|
|
1304
1137
|
|
|
1305
1138
|
// Internal helpers exported only so the task verification script
|
|
1306
1139
|
// (.trellis/tasks/.../verify.ts) can exercise them. They are not part of the
|
|
@@ -1315,29 +1148,29 @@ export const __internals_for_tests = {
|
|
|
1315
1148
|
compressSkillsInSystemPrompt,
|
|
1316
1149
|
MIN_STABLE_CANDIDATE_LENGTH,
|
|
1317
1150
|
SKILL_COMPRESSION_MIN_COUNT,
|
|
1151
|
+
// OpenAI-family cache-key helpers
|
|
1152
|
+
addOpenAIPromptCacheKey,
|
|
1153
|
+
clampPromptCacheKey,
|
|
1154
|
+
hasEffectivePromptCacheKey,
|
|
1155
|
+
isNonEmptyString,
|
|
1156
|
+
shouldInjectOpenAIPromptCacheKey,
|
|
1157
|
+
isOpenAICompatibleApi,
|
|
1158
|
+
isOpenAIFamilyModel,
|
|
1159
|
+
isOpenAIFamilyAssistantMessage,
|
|
1160
|
+
isOpenAIFamilyToken,
|
|
1161
|
+
describeMissingOpenAIFamilyProxyCompat,
|
|
1162
|
+
isOfficialOpenAIBaseUrl,
|
|
1163
|
+
getModelIdNameTokenValues,
|
|
1164
|
+
getAssistantMessageModelTokenValues,
|
|
1165
|
+
getCompat,
|
|
1166
|
+
modelKey,
|
|
1318
1167
|
};
|
|
1319
1168
|
|
|
1320
1169
|
export default function (pi: ExtensionAPI) {
|
|
1321
1170
|
const warnedModels = new Set<string>();
|
|
1322
1171
|
let cacheStatsByProvider: Partial<Record<CacheProviderId, CacheStats>> = emptyAllCacheStats();
|
|
1323
1172
|
let lastStatusText: string | undefined;
|
|
1324
|
-
let latestPromptCacheKey: string | undefined;
|
|
1325
1173
|
let persistenceWarningShown = false;
|
|
1326
|
-
let apiKeyHintShown = false;
|
|
1327
|
-
|
|
1328
|
-
// Auto-config runs once at extension activation (idempotent: skips if DeepSeek already configured).
|
|
1329
|
-
// Pi's UI logger is not yet bound here, so seed-time notifications go through console.warn / console.info.
|
|
1330
|
-
// Per-session UI notification is emitted from the session_start hook below.
|
|
1331
|
-
let autoConfig: EnsureDeepseekResult;
|
|
1332
|
-
try {
|
|
1333
|
-
autoConfig = ensureDeepseekConfigured((text, level) => {
|
|
1334
|
-
if (level === "warning") console.warn(text);
|
|
1335
|
-
else console.info(text);
|
|
1336
|
-
});
|
|
1337
|
-
} catch (error) {
|
|
1338
|
-
console.warn(`${LOG_PREFIX}: ensureDeepseekConfigured threw; continuing without auto-config`, error);
|
|
1339
|
-
autoConfig = { deepseekPresent: false, seeded: false, skipped: true };
|
|
1340
|
-
}
|
|
1341
1174
|
|
|
1342
1175
|
function getStatsForAdapter(adapter: CacheProviderAdapter): CacheStats {
|
|
1343
1176
|
const existing = cacheStatsByProvider[adapter.id];
|
|
@@ -1418,12 +1251,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
1418
1251
|
pi.on("session_start", async (event, ctx) => {
|
|
1419
1252
|
await restoreCacheStats(event.reason, ctx);
|
|
1420
1253
|
notifyCacheCompatIfNeeded(ctx.model, ctx, warnedModels);
|
|
1421
|
-
if (!apiKeyHintShown) {
|
|
1422
|
-
apiKeyHintShown = true;
|
|
1423
|
-
emitDeepseekApiKeyHintIfNeeded(autoConfig.deepseekPresent, (text, level) => {
|
|
1424
|
-
ctx.ui.notify(text, level);
|
|
1425
|
-
});
|
|
1426
|
-
}
|
|
1427
1254
|
await publishStatus(ctx);
|
|
1428
1255
|
});
|
|
1429
1256
|
|
|
@@ -1489,7 +1316,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
1489
1316
|
// cache key derived from `stablePrefix` reflects what actually
|
|
1490
1317
|
// ships to the provider.
|
|
1491
1318
|
const optimized = optimizeSystemPrompt(compressedPrompt, event.systemPromptOptions);
|
|
1492
|
-
latestPromptCacheKey = buildPromptCacheKey(optimized.stablePrefix);
|
|
1493
1319
|
|
|
1494
1320
|
if (optimized.changed && optimized.systemPrompt.trim().length > 0) {
|
|
1495
1321
|
return { systemPrompt: optimized.systemPrompt };
|
|
@@ -1510,10 +1336,11 @@ export default function (pi: ExtensionAPI) {
|
|
|
1510
1336
|
});
|
|
1511
1337
|
|
|
1512
1338
|
pi.on("before_provider_request", (event, ctx) => {
|
|
1513
|
-
if (!
|
|
1339
|
+
if (!shouldInjectOpenAIPromptCacheKey()) return undefined;
|
|
1514
1340
|
if (!isOpenAIFamilyModel(ctx.model)) return undefined;
|
|
1341
|
+
if (!isOpenAICompatibleApi(ctx.model?.api)) return undefined;
|
|
1515
1342
|
|
|
1516
|
-
return addOpenAIPromptCacheKey(event.payload,
|
|
1343
|
+
return addOpenAIPromptCacheKey(event.payload, getSessionPromptCacheKey(ctx));
|
|
1517
1344
|
});
|
|
1518
1345
|
|
|
1519
1346
|
pi.on("message_end", async (event, ctx) => {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-cache-optimizer",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.4.0",
|
|
4
4
|
"description": "Pi extension that improves provider-side KV/prompt cache hit rates (DeepSeek, OpenAI, Claude, Gemini) by reordering the system prompt, requesting long retention, and showing footer cache stats. Renamed from pi-deepseek-cache-optimizer.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|