pi-cache-optimizer 2.5.1 → 2.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -495
- package/README.zh-CN.md +77 -357
- package/index.ts +223 -43
- package/package.json +2 -2
package/index.ts
CHANGED
|
@@ -4,6 +4,39 @@ import { homedir } from "node:os";
|
|
|
4
4
|
import { dirname, join } from "node:path";
|
|
5
5
|
import type { BuildSystemPromptOptions, ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
|
|
6
6
|
|
|
7
|
+
type MutableEnv = Record<string, string | undefined>;
|
|
8
|
+
|
|
9
|
+
type CacheRetentionEnvSnapshot = {
|
|
10
|
+
wasSet: boolean;
|
|
11
|
+
value?: string;
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
const PI_CACHE_RETENTION_ENV = "PI_CACHE_RETENTION";
|
|
15
|
+
const LONG_CACHE_RETENTION_VALUE = "long";
|
|
16
|
+
|
|
17
|
+
function captureCacheRetentionEnv(env: MutableEnv = process.env): CacheRetentionEnvSnapshot {
|
|
18
|
+
return {
|
|
19
|
+
wasSet: Object.prototype.hasOwnProperty.call(env, PI_CACHE_RETENTION_ENV),
|
|
20
|
+
value: env[PI_CACHE_RETENTION_ENV],
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function requestLongCacheRetention(env: MutableEnv = process.env): void {
|
|
25
|
+
if (!env[PI_CACHE_RETENTION_ENV] || env[PI_CACHE_RETENTION_ENV] !== LONG_CACHE_RETENTION_VALUE) {
|
|
26
|
+
env[PI_CACHE_RETENTION_ENV] = LONG_CACHE_RETENTION_VALUE;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function restoreCacheRetentionEnv(snapshot: CacheRetentionEnvSnapshot, env: MutableEnv = process.env): void {
|
|
31
|
+
if (snapshot.wasSet) {
|
|
32
|
+
env[PI_CACHE_RETENTION_ENV] = snapshot.value;
|
|
33
|
+
} else {
|
|
34
|
+
delete env[PI_CACHE_RETENTION_ENV];
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const STARTUP_CACHE_RETENTION_ENV = captureCacheRetentionEnv();
|
|
39
|
+
|
|
7
40
|
/**
|
|
8
41
|
* Pi Cache Optimizer (formerly pi-deepseek-cache-optimizer)
|
|
9
42
|
*
|
|
@@ -19,10 +52,9 @@ import type { BuildSystemPromptOptions, ExtensionAPI, ExtensionContext } from "@
|
|
|
19
52
|
|
|
20
53
|
// ============================================================
|
|
21
54
|
// Automatically request long prompt-cache retention when Pi supports it.
|
|
55
|
+
// /cache-optimizer disable restores the startup value for this Pi process.
|
|
22
56
|
// ============================================================
|
|
23
|
-
|
|
24
|
-
process.env.PI_CACHE_RETENTION = "long";
|
|
25
|
-
}
|
|
57
|
+
requestLongCacheRetention();
|
|
26
58
|
|
|
27
59
|
type PiModel = NonNullable<ExtensionContext["model"]>;
|
|
28
60
|
type UnknownRecord = Record<string, unknown>;
|
|
@@ -40,6 +72,8 @@ const OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH = 64;
|
|
|
40
72
|
const NO_SKILL_COMPRESSION_ENV = "PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION";
|
|
41
73
|
const NO_PROMPT_REWRITE_ENV = "PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE";
|
|
42
74
|
|
|
75
|
+
let runtimeOptimizerEnabled = true;
|
|
76
|
+
|
|
43
77
|
// WORM-flag: if optimizeSystemPrompt ever detects that its blind-replace
|
|
44
78
|
// logic has accidentally truncated a structural marker (any XML tag or
|
|
45
79
|
// HTML comment boundary marker present in the original prompt), we flip
|
|
@@ -657,11 +691,46 @@ function isDisabledEnv(value: string | undefined): boolean {
|
|
|
657
691
|
}
|
|
658
692
|
|
|
659
693
|
function shouldInjectOpenAIPromptCacheKey(): boolean {
|
|
694
|
+
if (!runtimeOptimizerEnabled) return false;
|
|
660
695
|
if (isEnabledEnv(process.env[NO_OPENAI_CACHE_KEY_ENV])) return false;
|
|
661
696
|
if (isDisabledEnv(process.env[OPENAI_CACHE_KEY_ENV])) return false;
|
|
662
697
|
return true;
|
|
663
698
|
}
|
|
664
699
|
|
|
700
|
+
function setRuntimeOptimizerEnabled(enabled: boolean, env: MutableEnv = process.env): void {
|
|
701
|
+
runtimeOptimizerEnabled = enabled;
|
|
702
|
+
if (enabled) {
|
|
703
|
+
requestLongCacheRetention(env);
|
|
704
|
+
} else {
|
|
705
|
+
restoreCacheRetentionEnv(STARTUP_CACHE_RETENTION_ENV, env);
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
function isRuntimeOptimizerEnabled(): boolean {
|
|
710
|
+
return runtimeOptimizerEnabled;
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
function getOptimizerRuntimeModeLines(): string[] {
|
|
714
|
+
const state = runtimeOptimizerEnabled ? "enabled" : "disabled";
|
|
715
|
+
const lines: string[] = [];
|
|
716
|
+
lines.push(`Runtime state: ${state}`);
|
|
717
|
+
lines.push(`• Prompt rewrite: ${runtimeOptimizerEnabled && !isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV]) ? "on" : "off"}`);
|
|
718
|
+
lines.push(`• OpenAI prompt_cache_key fallback: ${shouldInjectOpenAIPromptCacheKey() ? "on" : "off"}`);
|
|
719
|
+
lines.push(`• Footer cache stats: on${runtimeOptimizerEnabled ? "" : " (comparison mode)"}`);
|
|
720
|
+
lines.push(`• Compat warnings: ${runtimeOptimizerEnabled ? "on" : "off"}`);
|
|
721
|
+
lines.push(`• ${PI_CACHE_RETENTION_ENV}: ${process.env[PI_CACHE_RETENTION_ENV] ?? "(unset)"}`);
|
|
722
|
+
if (!runtimeOptimizerEnabled) {
|
|
723
|
+
lines.push("This is a current-process switch. Run /reload or restart Pi to return to startup behavior.");
|
|
724
|
+
} else if (isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV]) || !shouldInjectOpenAIPromptCacheKey()) {
|
|
725
|
+
lines.push("Some features are still disabled by environment variables.");
|
|
726
|
+
}
|
|
727
|
+
return lines;
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
function formatOptimizerRuntimeMode(): string {
|
|
731
|
+
return getOptimizerRuntimeModeLines().join("\n");
|
|
732
|
+
}
|
|
733
|
+
|
|
665
734
|
function isAssistantMessage(message: unknown): boolean {
|
|
666
735
|
return asRecord(message)?.role === "assistant";
|
|
667
736
|
}
|
|
@@ -1443,6 +1512,40 @@ function describeMissingOpenAICompatibleProxyCompat(model: PiModel): string[] {
|
|
|
1443
1512
|
return missing;
|
|
1444
1513
|
}
|
|
1445
1514
|
|
|
1515
|
+
function buildSafeOpenAIProxyCompatSuggestion(missing: string[]): Record<string, boolean> {
|
|
1516
|
+
const suggestion: Record<string, boolean> = {};
|
|
1517
|
+
if (missing.includes("sendSessionAffinityHeaders")) {
|
|
1518
|
+
suggestion.sendSessionAffinityHeaders = true;
|
|
1519
|
+
}
|
|
1520
|
+
return suggestion;
|
|
1521
|
+
}
|
|
1522
|
+
|
|
1523
|
+
function getPromptCacheRetentionUnsupportedHint(): string {
|
|
1524
|
+
return "If this channel returns `400 Unsupported parameter: prompt_cache_retention`, remove/avoid `supportsLongCacheRetention`; this extension does not write that field directly, but Pi may send it when long retention is requested and compat says the proxy supports it.";
|
|
1525
|
+
}
|
|
1526
|
+
|
|
1527
|
+
function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[], options: { includeJsonIntro?: boolean } = {}): void {
|
|
1528
|
+
const suggestion = buildSafeOpenAIProxyCompatSuggestion(missing);
|
|
1529
|
+
const hasSafeSuggestion = Object.keys(suggestion).length > 0;
|
|
1530
|
+
|
|
1531
|
+
if (hasSafeSuggestion) {
|
|
1532
|
+
if (options.includeJsonIntro !== false) {
|
|
1533
|
+
lines.push("Safe default suggestion:");
|
|
1534
|
+
}
|
|
1535
|
+
lines.push(JSON.stringify(suggestion, null, 2));
|
|
1536
|
+
} else if (missing.includes("supportsLongCacheRetention")) {
|
|
1537
|
+
lines.push("No safe automatic JSON change is recommended for `supportsLongCacheRetention`.");
|
|
1538
|
+
}
|
|
1539
|
+
|
|
1540
|
+
if (missing.includes("sendSessionAffinityHeaders")) {
|
|
1541
|
+
lines.push("- sendSessionAffinityHeaders: recommended for third-party proxies when supported; it helps keep one Pi session on the same upstream/backend.");
|
|
1542
|
+
}
|
|
1543
|
+
if (missing.includes("supportsLongCacheRetention")) {
|
|
1544
|
+
lines.push("- supportsLongCacheRetention: optional. Enable only after your endpoint/proxy explicitly supports OpenAI long prompt cache retention.");
|
|
1545
|
+
lines.push(`- ${getPromptCacheRetentionUnsupportedHint()}`);
|
|
1546
|
+
}
|
|
1547
|
+
}
|
|
1548
|
+
|
|
1446
1549
|
/**
|
|
1447
1550
|
* Build the warning text displayed to users when an OpenAI-family third-party
|
|
1448
1551
|
* proxy is missing one or more cache/session-affinity compat flags.
|
|
@@ -1456,11 +1559,6 @@ function describeMissingOpenAICompatibleProxyCompat(model: PiModel): string[] {
|
|
|
1456
1559
|
* exercise it via __internals_for_tests.
|
|
1457
1560
|
*/
|
|
1458
1561
|
function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): string {
|
|
1459
|
-
const suggestion: Record<string, boolean> = {};
|
|
1460
|
-
for (const flag of missing) {
|
|
1461
|
-
suggestion[flag] = true;
|
|
1462
|
-
}
|
|
1463
|
-
|
|
1464
1562
|
// Extract provider id from the model key (e.g. "otokapi/gpt-5.5" -> "otokapi").
|
|
1465
1563
|
// If no slash is found, fall back to the key itself.
|
|
1466
1564
|
const slashIdx = key.indexOf("/");
|
|
@@ -1469,19 +1567,11 @@ function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): stri
|
|
|
1469
1567
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
1470
1568
|
const lines: string[] = [
|
|
1471
1569
|
`💡 pi-cache-optimizer: ${key} is a third-party GPT/OpenAI-compatible proxy but merged compat lacks ${missing.join(" and ")}.`,
|
|
1472
|
-
`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (at the same level as baseUrl/api/apiKey/models)
|
|
1473
|
-
``,
|
|
1474
|
-
JSON.stringify(suggestion, null, 2),
|
|
1570
|
+
`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (at the same level as baseUrl/api/apiKey/models).`,
|
|
1475
1571
|
``,
|
|
1476
1572
|
];
|
|
1477
1573
|
|
|
1478
|
-
|
|
1479
|
-
if (flag === "supportsLongCacheRetention") {
|
|
1480
|
-
lines.push("- supportsLongCacheRetention: confirm your endpoint or proxy supports long prompt cache retention.");
|
|
1481
|
-
} else if (flag === "sendSessionAffinityHeaders") {
|
|
1482
|
-
lines.push("- sendSessionAffinityHeaders: keeps requests on the same backend for proxy cache locality (session affinity).");
|
|
1483
|
-
}
|
|
1484
|
-
}
|
|
1574
|
+
appendOpenAIProxyCompatAdviceLines(lines, missing);
|
|
1485
1575
|
|
|
1486
1576
|
return lines.join("\n");
|
|
1487
1577
|
}
|
|
@@ -3070,7 +3160,10 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
|
|
|
3070
3160
|
" • Return cache usage fields (prompt_cache_hit_tokens, etc.) in the response.",
|
|
3071
3161
|
);
|
|
3072
3162
|
notes.push(
|
|
3073
|
-
`
|
|
3163
|
+
` Safe compat default: { "sendSessionAffinityHeaders": true }`,
|
|
3164
|
+
);
|
|
3165
|
+
notes.push(
|
|
3166
|
+
` Add supportsLongCacheRetention only if the proxy explicitly supports prompt_cache_retention.`,
|
|
3074
3167
|
);
|
|
3075
3168
|
|
|
3076
3169
|
return notes;
|
|
@@ -3103,7 +3196,7 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
|
|
|
3103
3196
|
return notes;
|
|
3104
3197
|
}
|
|
3105
3198
|
|
|
3106
|
-
function buildDoctorDiagnosis(model: PiModel): string {
|
|
3199
|
+
function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400?: boolean } = {}): string {
|
|
3107
3200
|
const lines: string[] = [];
|
|
3108
3201
|
lines.push(`Provider: ${model.provider}`);
|
|
3109
3202
|
lines.push(`Model: ${model.id}`);
|
|
@@ -3120,16 +3213,25 @@ function buildDoctorDiagnosis(model: PiModel): string {
|
|
|
3120
3213
|
const key = modelKey(model);
|
|
3121
3214
|
const slashIdx = key.indexOf("/");
|
|
3122
3215
|
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
3123
|
-
const suggestion = Object.fromEntries(missing.map((f) => [f, true]));
|
|
3124
3216
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
3125
|
-
lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (same level as baseUrl/api/apiKey/models)
|
|
3126
|
-
lines
|
|
3217
|
+
lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (same level as baseUrl/api/apiKey/models).`);
|
|
3218
|
+
appendOpenAIProxyCompatAdviceLines(lines, missing);
|
|
3127
3219
|
} else if (isCompatCheckApplicable(model)) {
|
|
3128
3220
|
lines.push("✅ Compat fully configured.");
|
|
3129
3221
|
} else {
|
|
3130
3222
|
lines.push("ℹ️ Compat check not applicable for this model.");
|
|
3131
3223
|
}
|
|
3132
3224
|
|
|
3225
|
+
if (isCompatCheckApplicable(model) && compat.supportsLongCacheRetention === true) {
|
|
3226
|
+
lines.push("");
|
|
3227
|
+
if (options.promptCacheRetention400) {
|
|
3228
|
+
lines.push("⚠️ A 400 response was observed while supportsLongCacheRetention is enabled.");
|
|
3229
|
+
lines.push(` ${getPromptCacheRetentionUnsupportedHint()}`);
|
|
3230
|
+
} else {
|
|
3231
|
+
lines.push(`ℹ️ Long retention is enabled. ${getPromptCacheRetentionUnsupportedHint()}`);
|
|
3232
|
+
}
|
|
3233
|
+
}
|
|
3234
|
+
|
|
3133
3235
|
// ── Router/channel diagnostics ──
|
|
3134
3236
|
const routerNotes = describeRouterChannelDiagnostics(model);
|
|
3135
3237
|
if (routerNotes.length > 0) {
|
|
@@ -3280,22 +3382,22 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
|
|
|
3280
3382
|
if (missing.length > 0) {
|
|
3281
3383
|
const slashIdx = key.indexOf("/");
|
|
3282
3384
|
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
3283
|
-
const suggestion = Object.fromEntries(missing.map((f) => [f, true]));
|
|
3284
3385
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
3285
3386
|
lines.push(`Active model: ${key}`);
|
|
3286
3387
|
lines.push(`Missing: ${missing.join(", ")}`);
|
|
3287
3388
|
lines.push("");
|
|
3288
3389
|
lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
|
|
3289
|
-
lines.push(`(at the same level as baseUrl/api/apiKey/models)
|
|
3290
|
-
lines
|
|
3291
|
-
lines.push("");
|
|
3292
|
-
lines.push(`Only enable if your endpoint supports them.`);
|
|
3390
|
+
lines.push(`(at the same level as baseUrl/api/apiKey/models).`);
|
|
3391
|
+
appendOpenAIProxyCompatAdviceLines(lines, missing);
|
|
3293
3392
|
}
|
|
3294
3393
|
|
|
3295
3394
|
// When compat is fully configured but router notes exist, prefix the status.
|
|
3296
3395
|
if (routerNotes.length > 0 && missing.length === 0) {
|
|
3297
3396
|
if (isCompatCheckApplicable(model)) {
|
|
3298
3397
|
lines.push("✅ Compat fully configured.");
|
|
3398
|
+
if (getCompat(model).supportsLongCacheRetention === true) {
|
|
3399
|
+
lines.push(getPromptCacheRetentionUnsupportedHint());
|
|
3400
|
+
}
|
|
3299
3401
|
} else {
|
|
3300
3402
|
lines.push("ℹ️ Compat check not applicable for this model.");
|
|
3301
3403
|
}
|
|
@@ -3339,6 +3441,8 @@ export const __internals_for_tests = {
|
|
|
3339
3441
|
isOpenAIFamilyToken,
|
|
3340
3442
|
describeMissingOpenAIFamilyProxyCompat,
|
|
3341
3443
|
describeMissingOpenAICompatibleProxyCompat,
|
|
3444
|
+
buildSafeOpenAIProxyCompatSuggestion,
|
|
3445
|
+
getPromptCacheRetentionUnsupportedHint,
|
|
3342
3446
|
isOfficialOpenAIBaseUrl,
|
|
3343
3447
|
// Non-GPT OpenAI-compatible model detection
|
|
3344
3448
|
isKimiLikeModel,
|
|
@@ -3454,6 +3558,15 @@ export const __internals_for_tests = {
|
|
|
3454
3558
|
modelKey,
|
|
3455
3559
|
// Platform-friendly path helper
|
|
3456
3560
|
getModelsJsonDisplayPath,
|
|
3561
|
+
captureCacheRetentionEnv,
|
|
3562
|
+
requestLongCacheRetention,
|
|
3563
|
+
restoreCacheRetentionEnv,
|
|
3564
|
+
setRuntimeOptimizerEnabled,
|
|
3565
|
+
isRuntimeOptimizerEnabled,
|
|
3566
|
+
getOptimizerRuntimeModeLines,
|
|
3567
|
+
formatOptimizerRuntimeMode,
|
|
3568
|
+
PI_CACHE_RETENTION_ENV,
|
|
3569
|
+
LONG_CACHE_RETENTION_VALUE,
|
|
3457
3570
|
// Integrity diagnostics
|
|
3458
3571
|
getLastPromptIntegrityWarningAt,
|
|
3459
3572
|
// Diagnostic command helpers
|
|
@@ -3492,6 +3605,8 @@ export const __internals_for_tests = {
|
|
|
3492
3605
|
|
|
3493
3606
|
export default function (pi: ExtensionAPI) {
|
|
3494
3607
|
const warnedModels = new Set<string>();
|
|
3608
|
+
const promptCacheRetention400Models = new Set<string>();
|
|
3609
|
+
const warnedPromptCacheRetention400Models = new Set<string>();
|
|
3495
3610
|
let cacheStatsByModel: Record<string, CacheStats> = {};
|
|
3496
3611
|
let cacheStatsLegacyFamily: Partial<Record<CacheProviderId, CacheStats>> = emptyAllCacheStats();
|
|
3497
3612
|
let lastStatusText: string | undefined;
|
|
@@ -3582,6 +3697,17 @@ export default function (pi: ExtensionAPI) {
|
|
|
3582
3697
|
return created;
|
|
3583
3698
|
}
|
|
3584
3699
|
|
|
3700
|
+
function resetCurrentSessionStats(): void {
|
|
3701
|
+
const prefix = `${currentSessionHash || "_nosession"}:`;
|
|
3702
|
+
for (const key of Object.keys(cacheStatsByModel)) {
|
|
3703
|
+
if (key.startsWith(prefix)) delete cacheStatsByModel[key];
|
|
3704
|
+
}
|
|
3705
|
+
for (const key of Array.from(recentSamplesByModelKey.keys())) {
|
|
3706
|
+
if (key.startsWith(prefix)) recentSamplesByModelKey.delete(key);
|
|
3707
|
+
}
|
|
3708
|
+
lastStatusText = undefined;
|
|
3709
|
+
}
|
|
3710
|
+
|
|
3585
3711
|
async function persistCacheStats(ctx?: ExtensionContext): Promise<void> {
|
|
3586
3712
|
try {
|
|
3587
3713
|
await writePersistedCacheStats(getCacheStatsState(), currentSessionHashSet ? currentSessionHash : undefined);
|
|
@@ -3745,7 +3871,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
3745
3871
|
// cacheStatsByModel[sessionModelKey(model)] on first use.
|
|
3746
3872
|
const sk = model ? sessionModelKey(model) : undefined;
|
|
3747
3873
|
const stats = sk ? cacheStatsByModel[sk] : undefined;
|
|
3748
|
-
|
|
3874
|
+
const statsText = formatCacheStats(adapter, stats ?? emptyCacheStats());
|
|
3875
|
+
statusText = runtimeOptimizerEnabled ? statsText : `Cache Optimizer disabled · ${statsText}`;
|
|
3749
3876
|
}
|
|
3750
3877
|
|
|
3751
3878
|
// If optimizeSystemPrompt detected structural truncation on this or
|
|
@@ -3779,7 +3906,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
3779
3906
|
// update so the marker persists through stats changes and day
|
|
3780
3907
|
// rollovers. Redundant setStatus calls are blocked by the
|
|
3781
3908
|
// `lastStatusText` early return above.
|
|
3782
|
-
if (statusText !== undefined && model) {
|
|
3909
|
+
if (runtimeOptimizerEnabled && statusText !== undefined && model) {
|
|
3783
3910
|
const compatMissing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
3784
3911
|
if (compatMissing.length > 0) {
|
|
3785
3912
|
statusText = statusText + " ⚠️ compat";
|
|
@@ -3794,12 +3921,12 @@ export default function (pi: ExtensionAPI) {
|
|
|
3794
3921
|
|
|
3795
3922
|
pi.on("session_start", async (event, ctx) => {
|
|
3796
3923
|
await restoreCacheStats(event.reason, ctx);
|
|
3797
|
-
notifyCacheCompatIfNeeded(ctx.model, ctx, warnedModels);
|
|
3924
|
+
if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(ctx.model, ctx, warnedModels);
|
|
3798
3925
|
await publishStatus(ctx);
|
|
3799
3926
|
});
|
|
3800
3927
|
|
|
3801
3928
|
pi.on("model_select", async (event, ctx) => {
|
|
3802
|
-
notifyCacheCompatIfNeeded(event.model, ctx, warnedModels);
|
|
3929
|
+
if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(event.model, ctx, warnedModels);
|
|
3803
3930
|
await publishStatus(ctx, event.model);
|
|
3804
3931
|
});
|
|
3805
3932
|
|
|
@@ -3838,6 +3965,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
3838
3965
|
}
|
|
3839
3966
|
}
|
|
3840
3967
|
|
|
3968
|
+
if (!runtimeOptimizerEnabled) return {};
|
|
3969
|
+
|
|
3841
3970
|
// Global opt-out: PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 bypasses all
|
|
3842
3971
|
// prompt mutations below (session-overview churn strip, skill compression,
|
|
3843
3972
|
// and stable-prefix reordering). Footer stats and the OpenAI
|
|
@@ -3894,6 +4023,25 @@ export default function (pi: ExtensionAPI) {
|
|
|
3894
4023
|
return addOpenAIPromptCacheKey(event.payload, getSessionPromptCacheKey(ctx));
|
|
3895
4024
|
});
|
|
3896
4025
|
|
|
4026
|
+
pi.on("after_provider_response", (event, ctx) => {
|
|
4027
|
+
const model = ctx.model;
|
|
4028
|
+
if (!runtimeOptimizerEnabled || !model) return;
|
|
4029
|
+
if (event.status !== 400) return;
|
|
4030
|
+
if (!isCompatCheckApplicable(model)) return;
|
|
4031
|
+
if (getCompat(model).supportsLongCacheRetention !== true) return;
|
|
4032
|
+
|
|
4033
|
+
const key = modelKey(model);
|
|
4034
|
+
promptCacheRetention400Models.add(key);
|
|
4035
|
+
if (warnedPromptCacheRetention400Models.has(key)) return;
|
|
4036
|
+
warnedPromptCacheRetention400Models.add(key);
|
|
4037
|
+
ctx.ui.notify(
|
|
4038
|
+
`⚠️ ${LOG_PREFIX}: ${key} returned HTTP 400 while supportsLongCacheRetention is enabled. ` +
|
|
4039
|
+
getPromptCacheRetentionUnsupportedHint() +
|
|
4040
|
+
` Run /cache-optimizer doctor for the exact edit location.`,
|
|
4041
|
+
"warning",
|
|
4042
|
+
);
|
|
4043
|
+
});
|
|
4044
|
+
|
|
3897
4045
|
pi.on("message_end", async (event, ctx) => {
|
|
3898
4046
|
const adapter = selectAdapterForAssistantMessage(event.message, ctx.model);
|
|
3899
4047
|
if (!adapter) return;
|
|
@@ -3929,6 +4077,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
3929
4077
|
// ────────────────────────────────────────────────────────────────
|
|
3930
4078
|
// Register /cache-optimizer command
|
|
3931
4079
|
// Subcommands:
|
|
4080
|
+
// enable — enable runtime prompt/cache optimizations for this process
|
|
4081
|
+
// disable — disable runtime prompt/cache optimizations for this process
|
|
3932
4082
|
// doctor — show current model/provider/api/baseUrl/compat status
|
|
3933
4083
|
// with low-hit diagnosis
|
|
3934
4084
|
// stats — show active model stats bucket, recent trend, usage
|
|
@@ -3942,12 +4092,24 @@ export default function (pi: ExtensionAPI) {
|
|
|
3942
4092
|
const model = cmdCtx.model;
|
|
3943
4093
|
const subcommand = args.trim().toLowerCase().split(/\s+/)[0] || "help";
|
|
3944
4094
|
|
|
3945
|
-
if (subcommand === "
|
|
4095
|
+
if (subcommand === "enable") {
|
|
4096
|
+
setRuntimeOptimizerEnabled(true);
|
|
4097
|
+
resetCurrentSessionStats();
|
|
4098
|
+
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
4099
|
+
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
4100
|
+
cmdCtx.ui.notify(`✅ Pi Cache Optimizer enabled for this Pi process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
|
|
4101
|
+
} else if (subcommand === "disable") {
|
|
4102
|
+
setRuntimeOptimizerEnabled(false);
|
|
4103
|
+
resetCurrentSessionStats();
|
|
4104
|
+
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
4105
|
+
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
4106
|
+
cmdCtx.ui.notify(`⏸️ Pi Cache Optimizer disabled for this Pi process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
|
|
4107
|
+
} else if (subcommand === "doctor") {
|
|
3946
4108
|
if (!model) {
|
|
3947
4109
|
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
3948
4110
|
return;
|
|
3949
4111
|
}
|
|
3950
|
-
const diagnosis = buildDoctorDiagnosis(model);
|
|
4112
|
+
const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
|
|
3951
4113
|
const adapter = selectAdapterForModel(model);
|
|
3952
4114
|
const sk = model ? sessionModelKey(model) : undefined;
|
|
3953
4115
|
const statsState = sk ? cacheStatsByModel[sk] : undefined;
|
|
@@ -4020,18 +4182,32 @@ export default function (pi: ExtensionAPI) {
|
|
|
4020
4182
|
// Try interactive selection menu when UI supports it
|
|
4021
4183
|
if (cmdCtx.hasUI) {
|
|
4022
4184
|
const menuOptions = [
|
|
4023
|
-
"
|
|
4024
|
-
"
|
|
4025
|
-
"
|
|
4026
|
-
"
|
|
4027
|
-
"
|
|
4185
|
+
"Enable — Turn on runtime optimizations",
|
|
4186
|
+
"Disable — Turn off runtime optimizations",
|
|
4187
|
+
"Doctor — Show cache configuration",
|
|
4188
|
+
"Stats — Show cache stats and trend",
|
|
4189
|
+
"Compat — Show compat suggestion",
|
|
4190
|
+
"Reset — Reset local session stats",
|
|
4191
|
+
"Cancel",
|
|
4028
4192
|
];
|
|
4029
4193
|
const choice = await cmdCtx.ui.select("Cache Optimizer", menuOptions);
|
|
4030
4194
|
if (choice === menuOptions[0]) {
|
|
4195
|
+
setRuntimeOptimizerEnabled(true);
|
|
4196
|
+
resetCurrentSessionStats();
|
|
4197
|
+
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
4198
|
+
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
4199
|
+
cmdCtx.ui.notify(`✅ Pi Cache Optimizer enabled for this Pi process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
|
|
4200
|
+
} else if (choice === menuOptions[1]) {
|
|
4201
|
+
setRuntimeOptimizerEnabled(false);
|
|
4202
|
+
resetCurrentSessionStats();
|
|
4203
|
+
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
4204
|
+
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
4205
|
+
cmdCtx.ui.notify(`⏸️ Pi Cache Optimizer disabled for this Pi process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
|
|
4206
|
+
} else if (choice === menuOptions[2]) {
|
|
4031
4207
|
if (!model) {
|
|
4032
4208
|
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
4033
4209
|
} else {
|
|
4034
|
-
const diagnosis = buildDoctorDiagnosis(model);
|
|
4210
|
+
const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
|
|
4035
4211
|
const adapter = selectAdapterForModel(model);
|
|
4036
4212
|
const sk = model ? sessionModelKey(model) : undefined;
|
|
4037
4213
|
const statsState = sk ? cacheStatsByModel[sk] : undefined;
|
|
@@ -4042,7 +4218,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
4042
4218
|
: diagnosis;
|
|
4043
4219
|
cmdCtx.ui.notify(fullDiagnosis, "info");
|
|
4044
4220
|
}
|
|
4045
|
-
} else if (choice === menuOptions[
|
|
4221
|
+
} else if (choice === menuOptions[3]) {
|
|
4046
4222
|
if (!model) {
|
|
4047
4223
|
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
4048
4224
|
} else {
|
|
@@ -4053,7 +4229,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
4053
4229
|
const output = buildStatsOutput(model, adapter, statsState, samples);
|
|
4054
4230
|
cmdCtx.ui.notify(output, "info");
|
|
4055
4231
|
}
|
|
4056
|
-
} else if (choice === menuOptions[
|
|
4232
|
+
} else if (choice === menuOptions[4]) {
|
|
4057
4233
|
if (!model) {
|
|
4058
4234
|
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
4059
4235
|
} else {
|
|
@@ -4069,7 +4245,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
4069
4245
|
);
|
|
4070
4246
|
}
|
|
4071
4247
|
}
|
|
4072
|
-
} else if (choice === menuOptions[
|
|
4248
|
+
} else if (choice === menuOptions[5]) {
|
|
4073
4249
|
if (!model) {
|
|
4074
4250
|
cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
|
|
4075
4251
|
} else {
|
|
@@ -4098,11 +4274,15 @@ export default function (pi: ExtensionAPI) {
|
|
|
4098
4274
|
// Fallback: text help when no interactive UI
|
|
4099
4275
|
const diagnosis: string[] = [];
|
|
4100
4276
|
diagnosis.push("📋 /cache-optimizer commands:");
|
|
4277
|
+
diagnosis.push(" enable — Enable prompt/cache optimizations for this Pi process");
|
|
4278
|
+
diagnosis.push(" disable — Disable prompt/cache optimizations for this Pi process");
|
|
4101
4279
|
diagnosis.push(" doctor — Show current model/provider/api/baseUrl/compat and low-hit diagnosis");
|
|
4102
4280
|
diagnosis.push(" stats — Show active model stats bucket and recent trend");
|
|
4103
4281
|
diagnosis.push(" compat — Show compat suggestion with edit location");
|
|
4104
4282
|
diagnosis.push(" reset — Reset local session stats for current model (does not affect upstream)");
|
|
4105
4283
|
diagnosis.push("");
|
|
4284
|
+
diagnosis.push(formatOptimizerRuntimeMode());
|
|
4285
|
+
diagnosis.push("");
|
|
4106
4286
|
if (model) {
|
|
4107
4287
|
const displayKey = modelKey(model);
|
|
4108
4288
|
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-cache-optimizer",
|
|
3
|
-
"version": "2.5.
|
|
4
|
-
"description": "Pi
|
|
3
|
+
"version": "2.5.2",
|
|
4
|
+
"description": "Improve Pi prompt/KV cache hit rates with stable prompts, OpenAI-compatible cache keys, proxy compat warnings, and footer cache stats.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|
|
7
7
|
"deepseek",
|