pi-cache-optimizer 2.4.3 → 2.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/README.zh-CN.md +11 -0
- package/index.ts +173 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -35,6 +35,17 @@ This release keeps the original DeepSeek behavior and adds read-only stats adapt
|
|
|
35
35
|
|---|---|---|---|
|
|
36
36
|
| DeepSeek | Model id/name contains `deepseek` | `DS cache` | Pi `usage.cacheRead`/`usage.input`, or raw `prompt_cache_hit_tokens`, `prompt_cache_miss_tokens`, `prompt_tokens` when visible |
|
|
37
37
|
| OpenAI-family | Model id/name contains conservative OpenAI-family tokens such as `gpt-`, `chatgpt`, `o1`, `o3`, `o4`, or `o5` | `OpenAI cache` | Pi-normalized usage, or raw `prompt_tokens_details.cached_tokens` / `input_tokens_details.cached_tokens` with prompt/input totals |
|
|
38
|
+
| Kimi / Moonshot | Model id/name contains `kimi` | `Kimi cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
39
|
+
| Qwen / Alibaba | Model id/name contains `qwen` | `Qwen cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
40
|
+
| GLM / Zhipu | Model id/name contains `glm` | `GLM cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
41
|
+
| MiniMax | Model id/name contains `minimax` | `MiniMax cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
42
|
+
| Hunyuan / Tencent | Model id/name contains `hunyuan` | `Hunyuan cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
43
|
+
| Mistral | Model id/name contains `mistral`, `mixtral`, or `codestral` | `Mistral cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
44
|
+
| xAI / Grok | Model id/name contains `grok`, or pattern `xai` with safe boundaries | `Grok cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
45
|
+
| Meta / Llama | Model id/name contains `llama` | `Llama cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
46
|
+
| NVIDIA Nemotron | Model id/name contains `nemotron` | `Nemotron cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
47
|
+
| Cohere / Command | Model id/name contains `cohere` or `command-r` | `Cohere cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
48
|
+
| Yi / 零一万物 | Model id/name contains `yi-`, `01-ai`, `zero-one`, or pattern `yi` with safe boundaries | `Yi cache` | Pi-normalized usage, or raw OpenAI-shaped fields when visible |
|
|
38
49
|
| Anthropic / Claude | Model id/name contains `anthropic` or `claude` | `Claude cache` | Pi-normalized usage, or raw `cache_read_input_tokens`, `cache_creation_input_tokens`, `input_tokens` |
|
|
39
50
|
| Gemini / Vertex | Model id/name contains `gemini` or `vertex` | `Gemini cache` | Pi-normalized usage, or raw Gemini/Vertex cached-content token metadata when visible |
|
|
40
51
|
|
package/README.zh-CN.md
CHANGED
|
@@ -38,6 +38,17 @@
|
|
|
38
38
|
|---|---|---|---|
|
|
39
39
|
| DeepSeek | model id/name 包含 `deepseek` | `DS cache` | Pi `usage.cacheRead`/`usage.input`,或可见 raw 字段 `prompt_cache_hit_tokens`、`prompt_cache_miss_tokens`、`prompt_tokens` |
|
|
40
40
|
| OpenAI-family | model id/name 包含保守 OpenAI-family token,例如 `gpt-`、`chatgpt`、`o1`、`o3`、`o4` 或 `o5` | `OpenAI cache` | Pi 归一化 usage,或可见 raw 字段 `prompt_tokens_details.cached_tokens` / `input_tokens_details.cached_tokens` 及 prompt/input total |
|
|
41
|
+
| Kimi / Moonshot | model id/name 包含 `kimi` | `Kimi cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
42
|
+
| Qwen / Alibaba | model id/name 包含 `qwen` | `Qwen cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
43
|
+
| GLM / Zhipu | model id/name 包含 `glm` | `GLM cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
44
|
+
| MiniMax | model id/name 包含 `minimax` | `MiniMax cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
45
|
+
| Hunyuan / Tencent | model id/name 包含 `hunyuan` | `Hunyuan cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
46
|
+
| Mistral | model id/name 包含 `mistral`、`mixtral` 或 `codestral` | `Mistral cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
47
|
+
| xAI / Grok | model id/name 包含 `grok`,或安全边界内 `xai` 模式 | `Grok cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
48
|
+
| Meta / Llama | model id/name 包含 `llama` | `Llama cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
49
|
+
| NVIDIA Nemotron | model id/name 包含 `nemotron` | `Nemotron cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
50
|
+
| Cohere / Command | model id/name 包含 `cohere` 或 `command-r` | `Cohere cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
51
|
+
| Yi / 零一万物 | model id/name 包含 `yi-`、`01-ai`、`zero-one`,或安全边界内 `yi` 模式 | `Yi cache` | Pi 归一化 usage,或可见 OpenAI 形状字段 |
|
|
41
52
|
| Anthropic / Claude | model id/name 包含 `anthropic` 或 `claude` | `Claude cache` | Pi 归一化 usage,或可见 raw 字段 `cache_read_input_tokens`、`cache_creation_input_tokens`、`input_tokens` |
|
|
42
53
|
| Gemini / Vertex | model id/name 包含 `gemini` 或 `vertex` | `Gemini cache` | Pi 归一化 usage,或可见 Gemini/Vertex cached-content token metadata |
|
|
43
54
|
|
package/index.ts
CHANGED
|
@@ -80,6 +80,7 @@ const MIN_STABLE_CANDIDATE_LENGTH = 8;
|
|
|
80
80
|
|
|
81
81
|
const ASSISTANT_MESSAGE_MODEL_TOKEN_KEYS = ["model", "name"];
|
|
82
82
|
const OPENAI_REASONING_MODEL_PATTERN = /(^|[/\s:_-])o[1345]($|[-_.:/\s])/;
|
|
83
|
+
const XAI_MODEL_PATTERN = /(^|[/\s:_-])xai($|[-_.:/\s])/;
|
|
83
84
|
|
|
84
85
|
type CacheCompat = {
|
|
85
86
|
sendSessionAffinityHeaders?: boolean;
|
|
@@ -672,6 +673,62 @@ function isHunyuanLikeAssistantMessage(message: unknown, model: PiModel | undefi
|
|
|
672
673
|
return modelOrAssistantMessageHas(message, model, ["hunyuan"]);
|
|
673
674
|
}
|
|
674
675
|
|
|
676
|
+
// ── Additional OpenAI-compatible model detection ──────────────────
|
|
677
|
+
|
|
678
|
+
function isMistralLikeModel(model: PiModel | undefined): boolean {
|
|
679
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["mistral", "mixtral", "codestral"]);
|
|
680
|
+
}
|
|
681
|
+
function isMistralLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
682
|
+
return modelOrAssistantMessageHas(message, model, ["mistral", "mixtral", "codestral"]);
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
function isGrokLikeModel(model: PiModel | undefined): boolean {
|
|
686
|
+
const tokens = getModelIdNameTokenValues(model);
|
|
687
|
+
return hasAnyTokenContaining(tokens, ["grok"]) || tokens.some((t) => XAI_MODEL_PATTERN.test(t));
|
|
688
|
+
}
|
|
689
|
+
function isGrokLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
690
|
+
const allTokens = [
|
|
691
|
+
...getModelIdNameTokenValues(model),
|
|
692
|
+
...getAssistantMessageModelTokenValues(message),
|
|
693
|
+
];
|
|
694
|
+
return hasAnyTokenContaining(allTokens, ["grok"]) || allTokens.some((t) => XAI_MODEL_PATTERN.test(t));
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
function isLlamaLikeModel(model: PiModel | undefined): boolean {
|
|
698
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["llama"]);
|
|
699
|
+
}
|
|
700
|
+
function isLlamaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
701
|
+
return modelOrAssistantMessageHas(message, model, ["llama"]);
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
function isNemotronLikeModel(model: PiModel | undefined): boolean {
|
|
705
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["nemotron"]);
|
|
706
|
+
}
|
|
707
|
+
function isNemotronLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
708
|
+
return modelOrAssistantMessageHas(message, model, ["nemotron"]);
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
function isCohereLikeModel(model: PiModel | undefined): boolean {
|
|
712
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["cohere", "command-r"]);
|
|
713
|
+
}
|
|
714
|
+
function isCohereLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
715
|
+
return modelOrAssistantMessageHas(message, model, ["cohere", "command-r"]);
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
const YI_MODEL_PATTERN = /(^|[\/\s:_-])yi($|[\-_.:\/\s])/;
|
|
719
|
+
|
|
720
|
+
function isYiLikeModel(model: PiModel | undefined): boolean {
|
|
721
|
+
const tokens = getModelIdNameTokenValues(model);
|
|
722
|
+
return hasAnyTokenContaining(tokens, ["yi-", "01-ai", "zero-one"]) || tokens.some((t) => YI_MODEL_PATTERN.test(t));
|
|
723
|
+
}
|
|
724
|
+
function isYiLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
725
|
+
const allTokens = [
|
|
726
|
+
...getModelIdNameTokenValues(model),
|
|
727
|
+
...getAssistantMessageModelTokenValues(message),
|
|
728
|
+
];
|
|
729
|
+
return hasAnyTokenContaining(allTokens, ["yi-", "01-ai", "zero-one"]) || allTokens.some((t) => YI_MODEL_PATTERN.test(t));
|
|
730
|
+
}
|
|
731
|
+
|
|
675
732
|
// ── Model key ──────────────────────────────────────────────────────
|
|
676
733
|
|
|
677
734
|
function modelKey(model: PiModel): string {
|
|
@@ -1136,6 +1193,109 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
|
|
|
1136
1193
|
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1137
1194
|
},
|
|
1138
1195
|
},
|
|
1196
|
+
// ── More OpenAI-compatible adapters ──────────────────────────
|
|
1197
|
+
{
|
|
1198
|
+
id: "openai" as CacheProviderId,
|
|
1199
|
+
label: "Mistral cache",
|
|
1200
|
+
matchesModel: isMistralLikeModel,
|
|
1201
|
+
matchesAssistantMessage(message, model) {
|
|
1202
|
+
if (!isAssistantMessage(message)) return false;
|
|
1203
|
+
return isMistralLikeAssistantMessage(message, model);
|
|
1204
|
+
},
|
|
1205
|
+
normalizeUsage(message) {
|
|
1206
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1207
|
+
},
|
|
1208
|
+
warningText(model) {
|
|
1209
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1210
|
+
if (missing.length === 0) return undefined;
|
|
1211
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1212
|
+
},
|
|
1213
|
+
},
|
|
1214
|
+
{
|
|
1215
|
+
id: "openai" as CacheProviderId,
|
|
1216
|
+
label: "Grok cache",
|
|
1217
|
+
matchesModel: isGrokLikeModel,
|
|
1218
|
+
matchesAssistantMessage(message, model) {
|
|
1219
|
+
if (!isAssistantMessage(message)) return false;
|
|
1220
|
+
return isGrokLikeAssistantMessage(message, model);
|
|
1221
|
+
},
|
|
1222
|
+
normalizeUsage(message) {
|
|
1223
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1224
|
+
},
|
|
1225
|
+
warningText(model) {
|
|
1226
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1227
|
+
if (missing.length === 0) return undefined;
|
|
1228
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1229
|
+
},
|
|
1230
|
+
},
|
|
1231
|
+
{
|
|
1232
|
+
id: "openai" as CacheProviderId,
|
|
1233
|
+
label: "Llama cache",
|
|
1234
|
+
matchesModel: isLlamaLikeModel,
|
|
1235
|
+
matchesAssistantMessage(message, model) {
|
|
1236
|
+
if (!isAssistantMessage(message)) return false;
|
|
1237
|
+
return isLlamaLikeAssistantMessage(message, model);
|
|
1238
|
+
},
|
|
1239
|
+
normalizeUsage(message) {
|
|
1240
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1241
|
+
},
|
|
1242
|
+
warningText(model) {
|
|
1243
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1244
|
+
if (missing.length === 0) return undefined;
|
|
1245
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1246
|
+
},
|
|
1247
|
+
},
|
|
1248
|
+
{
|
|
1249
|
+
id: "openai" as CacheProviderId,
|
|
1250
|
+
label: "Nemotron cache",
|
|
1251
|
+
matchesModel: isNemotronLikeModel,
|
|
1252
|
+
matchesAssistantMessage(message, model) {
|
|
1253
|
+
if (!isAssistantMessage(message)) return false;
|
|
1254
|
+
return isNemotronLikeAssistantMessage(message, model);
|
|
1255
|
+
},
|
|
1256
|
+
normalizeUsage(message) {
|
|
1257
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1258
|
+
},
|
|
1259
|
+
warningText(model) {
|
|
1260
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1261
|
+
if (missing.length === 0) return undefined;
|
|
1262
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1263
|
+
},
|
|
1264
|
+
},
|
|
1265
|
+
{
|
|
1266
|
+
id: "openai" as CacheProviderId,
|
|
1267
|
+
label: "Cohere cache",
|
|
1268
|
+
matchesModel: isCohereLikeModel,
|
|
1269
|
+
matchesAssistantMessage(message, model) {
|
|
1270
|
+
if (!isAssistantMessage(message)) return false;
|
|
1271
|
+
return isCohereLikeAssistantMessage(message, model);
|
|
1272
|
+
},
|
|
1273
|
+
normalizeUsage(message) {
|
|
1274
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1275
|
+
},
|
|
1276
|
+
warningText(model) {
|
|
1277
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1278
|
+
if (missing.length === 0) return undefined;
|
|
1279
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1280
|
+
},
|
|
1281
|
+
},
|
|
1282
|
+
{
|
|
1283
|
+
id: "openai" as CacheProviderId,
|
|
1284
|
+
label: "Yi cache",
|
|
1285
|
+
matchesModel: isYiLikeModel,
|
|
1286
|
+
matchesAssistantMessage(message, model) {
|
|
1287
|
+
if (!isAssistantMessage(message)) return false;
|
|
1288
|
+
return isYiLikeAssistantMessage(message, model);
|
|
1289
|
+
},
|
|
1290
|
+
normalizeUsage(message) {
|
|
1291
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1292
|
+
},
|
|
1293
|
+
warningText(model) {
|
|
1294
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1295
|
+
if (missing.length === 0) return undefined;
|
|
1296
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1297
|
+
},
|
|
1298
|
+
},
|
|
1139
1299
|
];
|
|
1140
1300
|
|
|
1141
1301
|
function selectAdapterForModel(model: PiModel | undefined): CacheProviderAdapter | undefined {
|
|
@@ -1398,6 +1558,19 @@ export const __internals_for_tests = {
|
|
|
1398
1558
|
isMiniMaxLikeAssistantMessage,
|
|
1399
1559
|
isHunyuanLikeModel,
|
|
1400
1560
|
isHunyuanLikeAssistantMessage,
|
|
1561
|
+
// Additional OpenAI-compatible model detection
|
|
1562
|
+
isMistralLikeModel,
|
|
1563
|
+
isMistralLikeAssistantMessage,
|
|
1564
|
+
isGrokLikeModel,
|
|
1565
|
+
isGrokLikeAssistantMessage,
|
|
1566
|
+
isLlamaLikeModel,
|
|
1567
|
+
isLlamaLikeAssistantMessage,
|
|
1568
|
+
isNemotronLikeModel,
|
|
1569
|
+
isNemotronLikeAssistantMessage,
|
|
1570
|
+
isCohereLikeModel,
|
|
1571
|
+
isCohereLikeAssistantMessage,
|
|
1572
|
+
isYiLikeModel,
|
|
1573
|
+
isYiLikeAssistantMessage,
|
|
1401
1574
|
buildOpenAIProxyCompatWarningText,
|
|
1402
1575
|
getModelIdNameTokenValues,
|
|
1403
1576
|
getAssistantMessageModelTokenValues,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-cache-optimizer",
|
|
3
|
-
"version": "2.4.
|
|
3
|
+
"version": "2.4.4",
|
|
4
4
|
"description": "Pi extension that improves provider-side KV/prompt cache hit rates (DeepSeek, OpenAI, Claude, Gemini) by reordering the system prompt, requesting long retention, and showing footer cache stats. Renamed from pi-deepseek-cache-optimizer.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|