pi-cache-optimizer 2.5.1 → 2.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/README.md +80 -493
  2. package/README.zh-CN.md +83 -355
  3. package/index.ts +223 -43
  4. package/package.json +2 -2
package/index.ts CHANGED
@@ -4,6 +4,39 @@ import { homedir } from "node:os";
4
4
  import { dirname, join } from "node:path";
5
5
  import type { BuildSystemPromptOptions, ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
6
6
 
7
+ type MutableEnv = Record<string, string | undefined>;
8
+
9
+ type CacheRetentionEnvSnapshot = {
10
+ wasSet: boolean;
11
+ value?: string;
12
+ };
13
+
14
+ const PI_CACHE_RETENTION_ENV = "PI_CACHE_RETENTION";
15
+ const LONG_CACHE_RETENTION_VALUE = "long";
16
+
17
+ function captureCacheRetentionEnv(env: MutableEnv = process.env): CacheRetentionEnvSnapshot {
18
+ return {
19
+ wasSet: Object.prototype.hasOwnProperty.call(env, PI_CACHE_RETENTION_ENV),
20
+ value: env[PI_CACHE_RETENTION_ENV],
21
+ };
22
+ }
23
+
24
+ function requestLongCacheRetention(env: MutableEnv = process.env): void {
25
+ if (!env[PI_CACHE_RETENTION_ENV] || env[PI_CACHE_RETENTION_ENV] !== LONG_CACHE_RETENTION_VALUE) {
26
+ env[PI_CACHE_RETENTION_ENV] = LONG_CACHE_RETENTION_VALUE;
27
+ }
28
+ }
29
+
30
+ function restoreCacheRetentionEnv(snapshot: CacheRetentionEnvSnapshot, env: MutableEnv = process.env): void {
31
+ if (snapshot.wasSet) {
32
+ env[PI_CACHE_RETENTION_ENV] = snapshot.value;
33
+ } else {
34
+ delete env[PI_CACHE_RETENTION_ENV];
35
+ }
36
+ }
37
+
38
+ const STARTUP_CACHE_RETENTION_ENV = captureCacheRetentionEnv();
39
+
7
40
  /**
8
41
  * Pi Cache Optimizer (formerly pi-deepseek-cache-optimizer)
9
42
  *
@@ -19,10 +52,9 @@ import type { BuildSystemPromptOptions, ExtensionAPI, ExtensionContext } from "@
19
52
 
20
53
  // ============================================================
21
54
  // Automatically request long prompt-cache retention when Pi supports it.
55
+ // /cache-optimizer disable restores the startup value for this Pi process.
22
56
  // ============================================================
23
- if (!process.env.PI_CACHE_RETENTION || process.env.PI_CACHE_RETENTION !== "long") {
24
- process.env.PI_CACHE_RETENTION = "long";
25
- }
57
+ requestLongCacheRetention();
26
58
 
27
59
  type PiModel = NonNullable<ExtensionContext["model"]>;
28
60
  type UnknownRecord = Record<string, unknown>;
@@ -40,6 +72,8 @@ const OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH = 64;
40
72
  const NO_SKILL_COMPRESSION_ENV = "PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION";
41
73
  const NO_PROMPT_REWRITE_ENV = "PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE";
42
74
 
75
+ let runtimeOptimizerEnabled = true;
76
+
43
77
  // WORM-flag: if optimizeSystemPrompt ever detects that its blind-replace
44
78
  // logic has accidentally truncated a structural marker (any XML tag or
45
79
  // HTML comment boundary marker present in the original prompt), we flip
@@ -657,11 +691,46 @@ function isDisabledEnv(value: string | undefined): boolean {
657
691
  }
658
692
 
659
693
  function shouldInjectOpenAIPromptCacheKey(): boolean {
694
+ if (!runtimeOptimizerEnabled) return false;
660
695
  if (isEnabledEnv(process.env[NO_OPENAI_CACHE_KEY_ENV])) return false;
661
696
  if (isDisabledEnv(process.env[OPENAI_CACHE_KEY_ENV])) return false;
662
697
  return true;
663
698
  }
664
699
 
700
+ function setRuntimeOptimizerEnabled(enabled: boolean, env: MutableEnv = process.env): void {
701
+ runtimeOptimizerEnabled = enabled;
702
+ if (enabled) {
703
+ requestLongCacheRetention(env);
704
+ } else {
705
+ restoreCacheRetentionEnv(STARTUP_CACHE_RETENTION_ENV, env);
706
+ }
707
+ }
708
+
709
+ function isRuntimeOptimizerEnabled(): boolean {
710
+ return runtimeOptimizerEnabled;
711
+ }
712
+
713
+ function getOptimizerRuntimeModeLines(): string[] {
714
+ const state = runtimeOptimizerEnabled ? "enabled" : "disabled";
715
+ const lines: string[] = [];
716
+ lines.push(`Runtime state: ${state}`);
717
+ lines.push(`• Prompt rewrite: ${runtimeOptimizerEnabled && !isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV]) ? "on" : "off"}`);
718
+ lines.push(`• OpenAI prompt_cache_key fallback: ${shouldInjectOpenAIPromptCacheKey() ? "on" : "off"}`);
719
+ lines.push(`• Footer cache stats: on${runtimeOptimizerEnabled ? "" : " (comparison mode)"}`);
720
+ lines.push(`• Compat warnings: ${runtimeOptimizerEnabled ? "on" : "off"}`);
721
+ lines.push(`• ${PI_CACHE_RETENTION_ENV}: ${process.env[PI_CACHE_RETENTION_ENV] ?? "(unset)"}`);
722
+ if (!runtimeOptimizerEnabled) {
723
+ lines.push("This is a current-process switch. Run /reload or restart Pi to return to startup behavior.");
724
+ } else if (isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV]) || !shouldInjectOpenAIPromptCacheKey()) {
725
+ lines.push("Some features are still disabled by environment variables.");
726
+ }
727
+ return lines;
728
+ }
729
+
730
+ function formatOptimizerRuntimeMode(): string {
731
+ return getOptimizerRuntimeModeLines().join("\n");
732
+ }
733
+
665
734
  function isAssistantMessage(message: unknown): boolean {
666
735
  return asRecord(message)?.role === "assistant";
667
736
  }
@@ -1443,6 +1512,40 @@ function describeMissingOpenAICompatibleProxyCompat(model: PiModel): string[] {
1443
1512
  return missing;
1444
1513
  }
1445
1514
 
1515
+ function buildSafeOpenAIProxyCompatSuggestion(missing: string[]): Record<string, boolean> {
1516
+ const suggestion: Record<string, boolean> = {};
1517
+ if (missing.includes("sendSessionAffinityHeaders")) {
1518
+ suggestion.sendSessionAffinityHeaders = true;
1519
+ }
1520
+ return suggestion;
1521
+ }
1522
+
1523
+ function getPromptCacheRetentionUnsupportedHint(): string {
1524
+ return "If this channel returns `400 Unsupported parameter: prompt_cache_retention`, remove/avoid `supportsLongCacheRetention`; this extension does not write that field directly, but Pi may send it when long retention is requested and compat says the proxy supports it.";
1525
+ }
1526
+
1527
+ function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[], options: { includeJsonIntro?: boolean } = {}): void {
1528
+ const suggestion = buildSafeOpenAIProxyCompatSuggestion(missing);
1529
+ const hasSafeSuggestion = Object.keys(suggestion).length > 0;
1530
+
1531
+ if (hasSafeSuggestion) {
1532
+ if (options.includeJsonIntro !== false) {
1533
+ lines.push("Safe default suggestion:");
1534
+ }
1535
+ lines.push(JSON.stringify(suggestion, null, 2));
1536
+ } else if (missing.includes("supportsLongCacheRetention")) {
1537
+ lines.push("No safe automatic JSON change is recommended for `supportsLongCacheRetention`.");
1538
+ }
1539
+
1540
+ if (missing.includes("sendSessionAffinityHeaders")) {
1541
+ lines.push("- sendSessionAffinityHeaders: recommended for third-party proxies when supported; it helps keep one Pi session on the same upstream/backend.");
1542
+ }
1543
+ if (missing.includes("supportsLongCacheRetention")) {
1544
+ lines.push("- supportsLongCacheRetention: optional. Enable only after your endpoint/proxy explicitly supports OpenAI long prompt cache retention.");
1545
+ lines.push(`- ${getPromptCacheRetentionUnsupportedHint()}`);
1546
+ }
1547
+ }
1548
+
1446
1549
  /**
1447
1550
  * Build the warning text displayed to users when an OpenAI-family third-party
1448
1551
  * proxy is missing one or more cache/session-affinity compat flags.
@@ -1456,11 +1559,6 @@ function describeMissingOpenAICompatibleProxyCompat(model: PiModel): string[] {
1456
1559
  * exercise it via __internals_for_tests.
1457
1560
  */
1458
1561
  function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): string {
1459
- const suggestion: Record<string, boolean> = {};
1460
- for (const flag of missing) {
1461
- suggestion[flag] = true;
1462
- }
1463
-
1464
1562
  // Extract provider id from the model key (e.g. "otokapi/gpt-5.5" -> "otokapi").
1465
1563
  // If no slash is found, fall back to the key itself.
1466
1564
  const slashIdx = key.indexOf("/");
@@ -1469,19 +1567,11 @@ function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): stri
1469
1567
  const modelsJsonPath = getModelsJsonDisplayPath();
1470
1568
  const lines: string[] = [
1471
1569
  `💡 pi-cache-optimizer: ${key} is a third-party GPT/OpenAI-compatible proxy but merged compat lacks ${missing.join(" and ")}.`,
1472
- `Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (at the same level as baseUrl/api/apiKey/models):`,
1473
- ``,
1474
- JSON.stringify(suggestion, null, 2),
1570
+ `Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (at the same level as baseUrl/api/apiKey/models).`,
1475
1571
  ``,
1476
1572
  ];
1477
1573
 
1478
- for (const flag of missing) {
1479
- if (flag === "supportsLongCacheRetention") {
1480
- lines.push("- supportsLongCacheRetention: confirm your endpoint or proxy supports long prompt cache retention.");
1481
- } else if (flag === "sendSessionAffinityHeaders") {
1482
- lines.push("- sendSessionAffinityHeaders: keeps requests on the same backend for proxy cache locality (session affinity).");
1483
- }
1484
- }
1574
+ appendOpenAIProxyCompatAdviceLines(lines, missing);
1485
1575
 
1486
1576
  return lines.join("\n");
1487
1577
  }
@@ -3070,7 +3160,10 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
3070
3160
  " • Return cache usage fields (prompt_cache_hit_tokens, etc.) in the response.",
3071
3161
  );
3072
3162
  notes.push(
3073
- ` Example compat: { "sendSessionAffinityHeaders": true, "supportsLongCacheRetention": true }`,
3163
+ ` Safe compat default: { "sendSessionAffinityHeaders": true }`,
3164
+ );
3165
+ notes.push(
3166
+ ` Add supportsLongCacheRetention only if the proxy explicitly supports prompt_cache_retention.`,
3074
3167
  );
3075
3168
 
3076
3169
  return notes;
@@ -3103,7 +3196,7 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
3103
3196
  return notes;
3104
3197
  }
3105
3198
 
3106
- function buildDoctorDiagnosis(model: PiModel): string {
3199
+ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400?: boolean } = {}): string {
3107
3200
  const lines: string[] = [];
3108
3201
  lines.push(`Provider: ${model.provider}`);
3109
3202
  lines.push(`Model: ${model.id}`);
@@ -3120,16 +3213,25 @@ function buildDoctorDiagnosis(model: PiModel): string {
3120
3213
  const key = modelKey(model);
3121
3214
  const slashIdx = key.indexOf("/");
3122
3215
  const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
3123
- const suggestion = Object.fromEntries(missing.map((f) => [f, true]));
3124
3216
  const modelsJsonPath = getModelsJsonDisplayPath();
3125
- lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (same level as baseUrl/api/apiKey/models):`);
3126
- lines.push(JSON.stringify(suggestion, null, 2));
3217
+ lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (same level as baseUrl/api/apiKey/models).`);
3218
+ appendOpenAIProxyCompatAdviceLines(lines, missing);
3127
3219
  } else if (isCompatCheckApplicable(model)) {
3128
3220
  lines.push("✅ Compat fully configured.");
3129
3221
  } else {
3130
3222
  lines.push("ℹ️ Compat check not applicable for this model.");
3131
3223
  }
3132
3224
 
3225
+ if (isCompatCheckApplicable(model) && compat.supportsLongCacheRetention === true) {
3226
+ lines.push("");
3227
+ if (options.promptCacheRetention400) {
3228
+ lines.push("⚠️ A 400 response was observed while supportsLongCacheRetention is enabled.");
3229
+ lines.push(` ${getPromptCacheRetentionUnsupportedHint()}`);
3230
+ } else {
3231
+ lines.push(`ℹ️ Long retention is enabled. ${getPromptCacheRetentionUnsupportedHint()}`);
3232
+ }
3233
+ }
3234
+
3133
3235
  // ── Router/channel diagnostics ──
3134
3236
  const routerNotes = describeRouterChannelDiagnostics(model);
3135
3237
  if (routerNotes.length > 0) {
@@ -3280,22 +3382,22 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
3280
3382
  if (missing.length > 0) {
3281
3383
  const slashIdx = key.indexOf("/");
3282
3384
  const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
3283
- const suggestion = Object.fromEntries(missing.map((f) => [f, true]));
3284
3385
  const modelsJsonPath = getModelsJsonDisplayPath();
3285
3386
  lines.push(`Active model: ${key}`);
3286
3387
  lines.push(`Missing: ${missing.join(", ")}`);
3287
3388
  lines.push("");
3288
3389
  lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
3289
- lines.push(`(at the same level as baseUrl/api/apiKey/models) and add:`);
3290
- lines.push(JSON.stringify(suggestion, null, 2));
3291
- lines.push("");
3292
- lines.push(`Only enable if your endpoint supports them.`);
3390
+ lines.push(`(at the same level as baseUrl/api/apiKey/models).`);
3391
+ appendOpenAIProxyCompatAdviceLines(lines, missing);
3293
3392
  }
3294
3393
 
3295
3394
  // When compat is fully configured but router notes exist, prefix the status.
3296
3395
  if (routerNotes.length > 0 && missing.length === 0) {
3297
3396
  if (isCompatCheckApplicable(model)) {
3298
3397
  lines.push("✅ Compat fully configured.");
3398
+ if (getCompat(model).supportsLongCacheRetention === true) {
3399
+ lines.push(getPromptCacheRetentionUnsupportedHint());
3400
+ }
3299
3401
  } else {
3300
3402
  lines.push("ℹ️ Compat check not applicable for this model.");
3301
3403
  }
@@ -3339,6 +3441,8 @@ export const __internals_for_tests = {
3339
3441
  isOpenAIFamilyToken,
3340
3442
  describeMissingOpenAIFamilyProxyCompat,
3341
3443
  describeMissingOpenAICompatibleProxyCompat,
3444
+ buildSafeOpenAIProxyCompatSuggestion,
3445
+ getPromptCacheRetentionUnsupportedHint,
3342
3446
  isOfficialOpenAIBaseUrl,
3343
3447
  // Non-GPT OpenAI-compatible model detection
3344
3448
  isKimiLikeModel,
@@ -3454,6 +3558,15 @@ export const __internals_for_tests = {
3454
3558
  modelKey,
3455
3559
  // Platform-friendly path helper
3456
3560
  getModelsJsonDisplayPath,
3561
+ captureCacheRetentionEnv,
3562
+ requestLongCacheRetention,
3563
+ restoreCacheRetentionEnv,
3564
+ setRuntimeOptimizerEnabled,
3565
+ isRuntimeOptimizerEnabled,
3566
+ getOptimizerRuntimeModeLines,
3567
+ formatOptimizerRuntimeMode,
3568
+ PI_CACHE_RETENTION_ENV,
3569
+ LONG_CACHE_RETENTION_VALUE,
3457
3570
  // Integrity diagnostics
3458
3571
  getLastPromptIntegrityWarningAt,
3459
3572
  // Diagnostic command helpers
@@ -3492,6 +3605,8 @@ export const __internals_for_tests = {
3492
3605
 
3493
3606
  export default function (pi: ExtensionAPI) {
3494
3607
  const warnedModels = new Set<string>();
3608
+ const promptCacheRetention400Models = new Set<string>();
3609
+ const warnedPromptCacheRetention400Models = new Set<string>();
3495
3610
  let cacheStatsByModel: Record<string, CacheStats> = {};
3496
3611
  let cacheStatsLegacyFamily: Partial<Record<CacheProviderId, CacheStats>> = emptyAllCacheStats();
3497
3612
  let lastStatusText: string | undefined;
@@ -3582,6 +3697,17 @@ export default function (pi: ExtensionAPI) {
3582
3697
  return created;
3583
3698
  }
3584
3699
 
3700
+ function resetCurrentSessionStats(): void {
3701
+ const prefix = `${currentSessionHash || "_nosession"}:`;
3702
+ for (const key of Object.keys(cacheStatsByModel)) {
3703
+ if (key.startsWith(prefix)) delete cacheStatsByModel[key];
3704
+ }
3705
+ for (const key of Array.from(recentSamplesByModelKey.keys())) {
3706
+ if (key.startsWith(prefix)) recentSamplesByModelKey.delete(key);
3707
+ }
3708
+ lastStatusText = undefined;
3709
+ }
3710
+
3585
3711
  async function persistCacheStats(ctx?: ExtensionContext): Promise<void> {
3586
3712
  try {
3587
3713
  await writePersistedCacheStats(getCacheStatsState(), currentSessionHashSet ? currentSessionHash : undefined);
@@ -3745,7 +3871,8 @@ export default function (pi: ExtensionAPI) {
3745
3871
  // cacheStatsByModel[sessionModelKey(model)] on first use.
3746
3872
  const sk = model ? sessionModelKey(model) : undefined;
3747
3873
  const stats = sk ? cacheStatsByModel[sk] : undefined;
3748
- statusText = formatCacheStats(adapter, stats ?? emptyCacheStats());
3874
+ const statsText = formatCacheStats(adapter, stats ?? emptyCacheStats());
3875
+ statusText = runtimeOptimizerEnabled ? statsText : `Cache Optimizer disabled · ${statsText}`;
3749
3876
  }
3750
3877
 
3751
3878
  // If optimizeSystemPrompt detected structural truncation on this or
@@ -3779,7 +3906,7 @@ export default function (pi: ExtensionAPI) {
3779
3906
  // update so the marker persists through stats changes and day
3780
3907
  // rollovers. Redundant setStatus calls are blocked by the
3781
3908
  // `lastStatusText` early return above.
3782
- if (statusText !== undefined && model) {
3909
+ if (runtimeOptimizerEnabled && statusText !== undefined && model) {
3783
3910
  const compatMissing = describeMissingOpenAICompatibleProxyCompat(model);
3784
3911
  if (compatMissing.length > 0) {
3785
3912
  statusText = statusText + " ⚠️ compat";
@@ -3794,12 +3921,12 @@ export default function (pi: ExtensionAPI) {
3794
3921
 
3795
3922
  pi.on("session_start", async (event, ctx) => {
3796
3923
  await restoreCacheStats(event.reason, ctx);
3797
- notifyCacheCompatIfNeeded(ctx.model, ctx, warnedModels);
3924
+ if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(ctx.model, ctx, warnedModels);
3798
3925
  await publishStatus(ctx);
3799
3926
  });
3800
3927
 
3801
3928
  pi.on("model_select", async (event, ctx) => {
3802
- notifyCacheCompatIfNeeded(event.model, ctx, warnedModels);
3929
+ if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(event.model, ctx, warnedModels);
3803
3930
  await publishStatus(ctx, event.model);
3804
3931
  });
3805
3932
 
@@ -3838,6 +3965,8 @@ export default function (pi: ExtensionAPI) {
3838
3965
  }
3839
3966
  }
3840
3967
 
3968
+ if (!runtimeOptimizerEnabled) return {};
3969
+
3841
3970
  // Global opt-out: PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 bypasses all
3842
3971
  // prompt mutations below (session-overview churn strip, skill compression,
3843
3972
  // and stable-prefix reordering). Footer stats and the OpenAI
@@ -3894,6 +4023,25 @@ export default function (pi: ExtensionAPI) {
3894
4023
  return addOpenAIPromptCacheKey(event.payload, getSessionPromptCacheKey(ctx));
3895
4024
  });
3896
4025
 
4026
+ pi.on("after_provider_response", (event, ctx) => {
4027
+ const model = ctx.model;
4028
+ if (!runtimeOptimizerEnabled || !model) return;
4029
+ if (event.status !== 400) return;
4030
+ if (!isCompatCheckApplicable(model)) return;
4031
+ if (getCompat(model).supportsLongCacheRetention !== true) return;
4032
+
4033
+ const key = modelKey(model);
4034
+ promptCacheRetention400Models.add(key);
4035
+ if (warnedPromptCacheRetention400Models.has(key)) return;
4036
+ warnedPromptCacheRetention400Models.add(key);
4037
+ ctx.ui.notify(
4038
+ `⚠️ ${LOG_PREFIX}: ${key} returned HTTP 400 while supportsLongCacheRetention is enabled. ` +
4039
+ getPromptCacheRetentionUnsupportedHint() +
4040
+ ` Run /cache-optimizer doctor for the exact edit location.`,
4041
+ "warning",
4042
+ );
4043
+ });
4044
+
3897
4045
  pi.on("message_end", async (event, ctx) => {
3898
4046
  const adapter = selectAdapterForAssistantMessage(event.message, ctx.model);
3899
4047
  if (!adapter) return;
@@ -3929,6 +4077,8 @@ export default function (pi: ExtensionAPI) {
3929
4077
  // ────────────────────────────────────────────────────────────────
3930
4078
  // Register /cache-optimizer command
3931
4079
  // Subcommands:
4080
+ // enable — enable runtime prompt/cache optimizations for this process
4081
+ // disable — disable runtime prompt/cache optimizations for this process
3932
4082
  // doctor — show current model/provider/api/baseUrl/compat status
3933
4083
  // with low-hit diagnosis
3934
4084
  // stats — show active model stats bucket, recent trend, usage
@@ -3942,12 +4092,24 @@ export default function (pi: ExtensionAPI) {
3942
4092
  const model = cmdCtx.model;
3943
4093
  const subcommand = args.trim().toLowerCase().split(/\s+/)[0] || "help";
3944
4094
 
3945
- if (subcommand === "doctor") {
4095
+ if (subcommand === "enable") {
4096
+ setRuntimeOptimizerEnabled(true);
4097
+ resetCurrentSessionStats();
4098
+ await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
4099
+ await publishStatus(cmdCtx as unknown as ExtensionContext, model);
4100
+ cmdCtx.ui.notify(`✅ Pi Cache Optimizer enabled for this Pi process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
4101
+ } else if (subcommand === "disable") {
4102
+ setRuntimeOptimizerEnabled(false);
4103
+ resetCurrentSessionStats();
4104
+ await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
4105
+ await publishStatus(cmdCtx as unknown as ExtensionContext, model);
4106
+ cmdCtx.ui.notify(`⏸️ Pi Cache Optimizer disabled for this Pi process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
4107
+ } else if (subcommand === "doctor") {
3946
4108
  if (!model) {
3947
4109
  cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
3948
4110
  return;
3949
4111
  }
3950
- const diagnosis = buildDoctorDiagnosis(model);
4112
+ const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
3951
4113
  const adapter = selectAdapterForModel(model);
3952
4114
  const sk = model ? sessionModelKey(model) : undefined;
3953
4115
  const statsState = sk ? cacheStatsByModel[sk] : undefined;
@@ -4020,18 +4182,32 @@ export default function (pi: ExtensionAPI) {
4020
4182
  // Try interactive selection menu when UI supports it
4021
4183
  if (cmdCtx.hasUI) {
4022
4184
  const menuOptions = [
4023
- "🩺 Doctor Show current model cache configuration",
4024
- "📊 Stats Show active model stats bucket and trend",
4025
- "⚙️ Compat — Show compat suggestion with edit instructions",
4026
- "🔄 Reset Reset local session stats for current model",
4027
- " Cancel",
4185
+ "EnableTurn on runtime optimizations",
4186
+ "DisableTurn off runtime optimizations",
4187
+ "Doctor — Show cache configuration",
4188
+ "StatsShow cache stats and trend",
4189
+ "Compat — Show compat suggestion",
4190
+ "Reset — Reset local session stats",
4191
+ "Cancel",
4028
4192
  ];
4029
4193
  const choice = await cmdCtx.ui.select("Cache Optimizer", menuOptions);
4030
4194
  if (choice === menuOptions[0]) {
4195
+ setRuntimeOptimizerEnabled(true);
4196
+ resetCurrentSessionStats();
4197
+ await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
4198
+ await publishStatus(cmdCtx as unknown as ExtensionContext, model);
4199
+ cmdCtx.ui.notify(`✅ Pi Cache Optimizer enabled for this Pi process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
4200
+ } else if (choice === menuOptions[1]) {
4201
+ setRuntimeOptimizerEnabled(false);
4202
+ resetCurrentSessionStats();
4203
+ await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
4204
+ await publishStatus(cmdCtx as unknown as ExtensionContext, model);
4205
+ cmdCtx.ui.notify(`⏸️ Pi Cache Optimizer disabled for this Pi process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
4206
+ } else if (choice === menuOptions[2]) {
4031
4207
  if (!model) {
4032
4208
  cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
4033
4209
  } else {
4034
- const diagnosis = buildDoctorDiagnosis(model);
4210
+ const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
4035
4211
  const adapter = selectAdapterForModel(model);
4036
4212
  const sk = model ? sessionModelKey(model) : undefined;
4037
4213
  const statsState = sk ? cacheStatsByModel[sk] : undefined;
@@ -4042,7 +4218,7 @@ export default function (pi: ExtensionAPI) {
4042
4218
  : diagnosis;
4043
4219
  cmdCtx.ui.notify(fullDiagnosis, "info");
4044
4220
  }
4045
- } else if (choice === menuOptions[1]) {
4221
+ } else if (choice === menuOptions[3]) {
4046
4222
  if (!model) {
4047
4223
  cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
4048
4224
  } else {
@@ -4053,7 +4229,7 @@ export default function (pi: ExtensionAPI) {
4053
4229
  const output = buildStatsOutput(model, adapter, statsState, samples);
4054
4230
  cmdCtx.ui.notify(output, "info");
4055
4231
  }
4056
- } else if (choice === menuOptions[2]) {
4232
+ } else if (choice === menuOptions[4]) {
4057
4233
  if (!model) {
4058
4234
  cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
4059
4235
  } else {
@@ -4069,7 +4245,7 @@ export default function (pi: ExtensionAPI) {
4069
4245
  );
4070
4246
  }
4071
4247
  }
4072
- } else if (choice === menuOptions[3]) {
4248
+ } else if (choice === menuOptions[5]) {
4073
4249
  if (!model) {
4074
4250
  cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
4075
4251
  } else {
@@ -4098,11 +4274,15 @@ export default function (pi: ExtensionAPI) {
4098
4274
  // Fallback: text help when no interactive UI
4099
4275
  const diagnosis: string[] = [];
4100
4276
  diagnosis.push("📋 /cache-optimizer commands:");
4277
+ diagnosis.push(" enable — Enable prompt/cache optimizations for this Pi process");
4278
+ diagnosis.push(" disable — Disable prompt/cache optimizations for this Pi process");
4101
4279
  diagnosis.push(" doctor — Show current model/provider/api/baseUrl/compat and low-hit diagnosis");
4102
4280
  diagnosis.push(" stats — Show active model stats bucket and recent trend");
4103
4281
  diagnosis.push(" compat — Show compat suggestion with edit location");
4104
4282
  diagnosis.push(" reset — Reset local session stats for current model (does not affect upstream)");
4105
4283
  diagnosis.push("");
4284
+ diagnosis.push(formatOptimizerRuntimeMode());
4285
+ diagnosis.push("");
4106
4286
  if (model) {
4107
4287
  const displayKey = modelKey(model);
4108
4288
  const missing = describeMissingOpenAICompatibleProxyCompat(model);
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "pi-cache-optimizer",
3
- "version": "2.5.1",
4
- "description": "Pi extension that improves provider-side KV/prompt cache hit rates (DeepSeek, OpenAI, Claude, Gemini) by reordering the system prompt, requesting long retention, and showing footer cache stats. Renamed from pi-deepseek-cache-optimizer.",
3
+ "version": "2.5.3",
4
+ "description": "Improve Pi prompt/KV cache hit rates with stable prompts, OpenAI-compatible cache keys, proxy compat warnings, and footer cache stats.",
5
5
  "keywords": [
6
6
  "pi-package",
7
7
  "deepseek",