pi-cache-optimizer 2.4.0 → 2.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -82,6 +82,7 @@ After installation, `PI_CACHE_RETENTION=long` is applied automatically, the syst
82
82
 
83
83
  | Env var | Effect |
84
84
  |---------|--------|
85
+ | `PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1` | Skip all `before_agent_start` prompt mutations (churn strip, skill compression, stable-prefix reorder); footer stats and `prompt_cache_key` fallback remain active |
85
86
  | `PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION=1` | Keep pi's verbose `<available_skills>` XML (opt out of one-line index) |
86
87
  | `PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY=0` | Disable the OpenAI-family `prompt_cache_key` fallback (default is enabled) |
87
88
  | `PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY=1` | Disable the OpenAI-family `prompt_cache_key` fallback |
package/README.zh-CN.md CHANGED
@@ -85,6 +85,7 @@ pi install npm:pi-cache-optimizer
85
85
 
86
86
  | 环境变量 | 作用 |
87
87
  |---------|------|
88
+ | `PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1` | 跳过所有 `before_agent_start` prompt 修改(session-overview 字段剥离、skills 压缩、稳定前缀重排);底部统计和 `prompt_cache_key` 兜底仍然生效 |
88
89
  | `PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION=1` | 保留 pi 的 verbose `<available_skills>` XML(退出一行索引模式) |
89
90
  | `PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY=0` | 禁用 OpenAI-family `prompt_cache_key` 兜底(默认启用) |
90
91
  | `PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY=1` | 禁用 OpenAI-family `prompt_cache_key` 兜底 |
package/index.ts CHANGED
@@ -37,6 +37,7 @@ const OPENAI_CACHE_KEY_ENV = "PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY";
37
37
  const NO_OPENAI_CACHE_KEY_ENV = "PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY";
38
38
  const OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH = 64;
39
39
  const NO_SKILL_COMPRESSION_ENV = "PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION";
40
+ const NO_PROMPT_REWRITE_ENV = "PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE";
40
41
 
41
42
  // WORM-flag: if optimizeSystemPrompt ever detects that its blind-replace
42
43
  // logic has accidentally truncated a structural marker (any XML tag or
@@ -102,6 +103,18 @@ type PersistedCacheStatsV2 = {
102
103
  statsByProvider: Partial<Record<CacheProviderId, CacheStats>>;
103
104
  };
104
105
 
106
+ /** Per-model-key scoped state. Used in memory and for v3 persistence. */
107
+ type CacheStatsState = {
108
+ statsByModel: Record<string, CacheStats>;
109
+ legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
110
+ };
111
+
112
+ type PersistedCacheStatsV3 = {
113
+ version: 3;
114
+ statsByModel: Record<string, CacheStats>;
115
+ legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
116
+ };
117
+
105
118
  type UsageSnapshot = {
106
119
  cacheRead: number;
107
120
  cacheWrite: number;
@@ -831,7 +844,7 @@ function describeMissingOpenAIFamilyProxyCompat(model: PiModel): string[] {
831
844
  const missing: string[] = [];
832
845
 
833
846
  if (!isOpenAIFamilyModel(model)) return missing;
834
- if (model.api !== "openai-completions") return missing;
847
+ if (lower(model.api) !== "openai-completions") return missing;
835
848
  if (isOfficialOpenAIBaseUrl(model)) return missing;
836
849
 
837
850
  if (compat.supportsLongCacheRetention !== true) {
@@ -844,6 +857,43 @@ function describeMissingOpenAIFamilyProxyCompat(model: PiModel): string[] {
844
857
  return missing;
845
858
  }
846
859
 
860
+ /**
861
+ * Build the warning text displayed to users when an OpenAI-family third-party
862
+ * proxy is missing one or more cache/session-affinity compat flags.
863
+ *
864
+ * The returned string contains a parseable JSON object (via JSON.stringify)
865
+ * listing only the missing flags with recommended value `true`. Inline
866
+ * explanations for each flag follow the JSON snippet as separate prose lines,
867
+ * so the JSON remains valid and copyable.
868
+ *
869
+ * Expected use: the openai adapter's warningText calls this function; tests
870
+ * exercise it via __internals_for_tests.
871
+ */
872
+ function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): string {
873
+ const suggestion: Record<string, boolean> = {};
874
+ for (const flag of missing) {
875
+ suggestion[flag] = true;
876
+ }
877
+
878
+ const lines: string[] = [
879
+ `💡 pi-cache-optimizer: ${key} is a third-party GPT/OpenAI-compatible proxy but merged compat lacks ${missing.join(" and ")}.`,
880
+ `Add under the model's compat in ~/.pi/agent/models.json (only if the endpoint supports them):`,
881
+ ``,
882
+ JSON.stringify(suggestion, null, 2),
883
+ ``,
884
+ ];
885
+
886
+ for (const flag of missing) {
887
+ if (flag === "supportsLongCacheRetention") {
888
+ lines.push("- supportsLongCacheRetention: confirm your endpoint or proxy supports long prompt cache retention.");
889
+ } else if (flag === "sendSessionAffinityHeaders") {
890
+ lines.push("- sendSessionAffinityHeaders: keeps requests on the same backend for proxy cache locality (session affinity).");
891
+ }
892
+ }
893
+
894
+ return lines.join("\n");
895
+ }
896
+
847
897
  function describeMissingDeepSeekCompat(model: PiModel): string[] {
848
898
  const compat = getCompat(model);
849
899
  const missing: string[] = [];
@@ -923,11 +973,7 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
923
973
  warningText(model) {
924
974
  const missing = describeMissingOpenAIFamilyProxyCompat(model);
925
975
  if (missing.length === 0) return undefined;
926
-
927
- return (
928
- `💡 pi-cache-optimizer: ${modelKey(model)} looks like a third-party GPT/OpenAI-compatible proxy but merged compat lacks ${missing.join(" and ")}. ` +
929
- `For better cache locality, add compat: { "supportsLongCacheRetention": true, "sendSessionAffinityHeaders": true } in ~/.pi/agent/models.json when the endpoint supports these fields.`
930
- );
976
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
931
977
  },
932
978
  },
933
979
  {
@@ -1061,30 +1107,56 @@ function parseCacheStats(value: unknown): CacheStats | undefined {
1061
1107
  };
1062
1108
  }
1063
1109
 
1064
- function parsePersistedCacheStats(value: unknown): Partial<Record<CacheProviderId, CacheStats>> | undefined {
1110
+ function parsePersistedCacheStats(value: unknown): CacheStatsState | undefined {
1065
1111
  const record = asRecord(value);
1066
1112
  if (!record) return undefined;
1067
1113
 
1068
- if (record.version === 1) {
1069
- const migrated = parseCacheStats(record.stats);
1070
- return migrated ? { deepseek: migrated } : undefined;
1071
- }
1114
+ // version 3: model-scoped stats + legacy family fallback
1115
+ if (record.version === 3) {
1116
+ const statsByModel: Record<string, CacheStats> = {};
1117
+ const rawModelMap = asRecord(record.statsByModel);
1118
+ if (rawModelMap) {
1119
+ for (const [key, val] of Object.entries(rawModelMap)) {
1120
+ const parsed = parseCacheStats(val);
1121
+ if (parsed) statsByModel[key] = parsed;
1122
+ }
1123
+ }
1072
1124
 
1073
- if (record.version !== 2) return undefined;
1125
+ const legacyFamily: Partial<Record<CacheProviderId, CacheStats>> = {};
1126
+ const rawFamily = asRecord(record.legacyFamily);
1127
+ if (rawFamily) {
1128
+ for (const id of CACHE_PROVIDER_IDS) {
1129
+ const stats = parseCacheStats(rawFamily[id]);
1130
+ if (stats) legacyFamily[id] = stats;
1131
+ }
1132
+ }
1074
1133
 
1075
- const statsByProvider = asRecord(record.statsByProvider);
1076
- if (!statsByProvider) return undefined;
1134
+ return { statsByModel, legacyFamily };
1135
+ }
1077
1136
 
1078
- const parsed: Partial<Record<CacheProviderId, CacheStats>> = {};
1079
- for (const id of CACHE_PROVIDER_IDS) {
1080
- const stats = parseCacheStats(statsByProvider[id]);
1081
- if (stats) parsed[id] = stats;
1137
+ // version 2: migrate statsByProvider into legacyFamily
1138
+ if (record.version === 2) {
1139
+ const statsByProvider = asRecord(record.statsByProvider);
1140
+ const legacyFamily: Partial<Record<CacheProviderId, CacheStats>> = {};
1141
+ if (statsByProvider) {
1142
+ for (const id of CACHE_PROVIDER_IDS) {
1143
+ const stats = parseCacheStats(statsByProvider[id]);
1144
+ if (stats) legacyFamily[id] = stats;
1145
+ }
1146
+ }
1147
+ return { statsByModel: {}, legacyFamily };
1082
1148
  }
1083
1149
 
1084
- return parsed;
1150
+ // version 1: single DeepSeek stats -> migrate to legacyFamily.deepseek
1151
+ if (record.version === 1) {
1152
+ const migrated = parseCacheStats(record.stats);
1153
+ return migrated ? { statsByModel: {}, legacyFamily: { deepseek: migrated } } : undefined;
1154
+ }
1155
+
1156
+ return undefined;
1085
1157
  }
1086
1158
 
1087
- async function readPersistedCacheStats(): Promise<Partial<Record<CacheProviderId, CacheStats>> | undefined> {
1159
+ async function readPersistedCacheStats(): Promise<CacheStatsState | undefined> {
1088
1160
  try {
1089
1161
  const raw = await readFile(STATE_FILE_PATH, "utf8");
1090
1162
  return parsePersistedCacheStats(JSON.parse(raw));
@@ -1124,9 +1196,13 @@ async function readPersistedCacheStats(): Promise<Partial<Record<CacheProviderId
1124
1196
  return undefined;
1125
1197
  }
1126
1198
 
1127
- async function writePersistedCacheStats(statsByProvider: Partial<Record<CacheProviderId, CacheStats>>): Promise<void> {
1199
+ async function writePersistedCacheStats(state: CacheStatsState): Promise<void> {
1128
1200
  await mkdir(STATE_DIR, { recursive: true });
1129
- const payload: PersistedCacheStatsV2 = { version: 2, statsByProvider };
1201
+ const payload: PersistedCacheStatsV3 = {
1202
+ version: 3,
1203
+ statsByModel: state.statsByModel,
1204
+ legacyFamily: state.legacyFamily,
1205
+ };
1130
1206
  const tempPath = `${STATE_FILE_PATH}.${process.pid}.${Date.now()}.tmp`;
1131
1207
 
1132
1208
  await writeFile(tempPath, JSON.stringify(payload, null, 2) + "\n", "utf8");
@@ -1148,6 +1224,8 @@ export const __internals_for_tests = {
1148
1224
  compressSkillsInSystemPrompt,
1149
1225
  MIN_STABLE_CANDIDATE_LENGTH,
1150
1226
  SKILL_COMPRESSION_MIN_COUNT,
1227
+ NO_PROMPT_REWRITE_ENV,
1228
+ isEnabledEnv,
1151
1229
  // OpenAI-family cache-key helpers
1152
1230
  addOpenAIPromptCacheKey,
1153
1231
  clampPromptCacheKey,
@@ -1160,30 +1238,64 @@ export const __internals_for_tests = {
1160
1238
  isOpenAIFamilyToken,
1161
1239
  describeMissingOpenAIFamilyProxyCompat,
1162
1240
  isOfficialOpenAIBaseUrl,
1241
+ buildOpenAIProxyCompatWarningText,
1163
1242
  getModelIdNameTokenValues,
1164
1243
  getAssistantMessageModelTokenValues,
1165
1244
  getCompat,
1166
1245
  modelKey,
1246
+ // Cache stats helpers (module-level, usable from verify script)
1247
+ addUsageToCacheStats,
1248
+ formatCacheStats,
1249
+ emptyCacheStats,
1250
+ emptyAllCacheStats,
1251
+ parseCacheStats,
1252
+ parsePersistedCacheStats,
1167
1253
  };
1168
1254
 
1169
1255
  export default function (pi: ExtensionAPI) {
1170
1256
  const warnedModels = new Set<string>();
1171
- let cacheStatsByProvider: Partial<Record<CacheProviderId, CacheStats>> = emptyAllCacheStats();
1257
+ let cacheStatsByModel: Record<string, CacheStats> = {};
1258
+ let cacheStatsLegacyFamily: Partial<Record<CacheProviderId, CacheStats>> = emptyAllCacheStats();
1172
1259
  let lastStatusText: string | undefined;
1173
1260
  let persistenceWarningShown = false;
1261
+ let persistTimer: ReturnType<typeof setTimeout> | null = null;
1262
+ const PERSIST_DEBOUNCE_MS = 2000;
1263
+
1264
+ function getCacheStatsState(): CacheStatsState {
1265
+ return { statsByModel: cacheStatsByModel, legacyFamily: cacheStatsLegacyFamily };
1266
+ }
1267
+
1268
+ /** Look up active stats for a model, falling back to legacy family. */
1269
+ function getStatsForModel(model: PiModel | undefined, adapter: CacheProviderAdapter): CacheStats {
1270
+ if (model) {
1271
+ const key = modelKey(model);
1272
+ const existing = cacheStatsByModel[key];
1273
+ if (existing) return existing;
1274
+ }
1174
1275
 
1175
- function getStatsForAdapter(adapter: CacheProviderAdapter): CacheStats {
1176
- const existing = cacheStatsByProvider[adapter.id];
1276
+ // Fallback: legacy family bucket — used when model key is unknown
1277
+ // or this model hasn't been seen yet in this session.
1278
+ const family = cacheStatsLegacyFamily[adapter.id];
1279
+ if (family) return family;
1280
+
1281
+ const created = emptyCacheStats();
1282
+ cacheStatsLegacyFamily[adapter.id] = created;
1283
+ return created;
1284
+ }
1285
+
1286
+ /** Get or create a stats entry for the given model key. */
1287
+ function getOrCreateStatsByModelKey(key: string): CacheStats {
1288
+ const existing = cacheStatsByModel[key];
1177
1289
  if (existing) return existing;
1178
1290
 
1179
1291
  const created = emptyCacheStats();
1180
- cacheStatsByProvider[adapter.id] = created;
1292
+ cacheStatsByModel[key] = created;
1181
1293
  return created;
1182
1294
  }
1183
1295
 
1184
1296
  async function persistCacheStats(ctx?: ExtensionContext): Promise<void> {
1185
1297
  try {
1186
- await writePersistedCacheStats(cacheStatsByProvider);
1298
+ await writePersistedCacheStats(getCacheStatsState());
1187
1299
  } catch (error) {
1188
1300
  console.warn(`${LOG_PREFIX}: failed to persist cache stats`, error);
1189
1301
  if (!persistenceWarningShown) {
@@ -1196,14 +1308,48 @@ export default function (pi: ExtensionAPI) {
1196
1308
  }
1197
1309
  }
1198
1310
 
1311
+ /** Schedule a debounced persist. Coalesces rapid message_end writes
1312
+ * into a single disk write after PERSIST_DEBOUNCE_MS of silence.
1313
+ * In-memory stats remain instantly up-to-date for the footer; only
1314
+ * the on-disk persistence is delayed. */
1315
+ function schedulePersistCacheStats(ctx?: ExtensionContext): void {
1316
+ if (persistTimer !== null) clearTimeout(persistTimer);
1317
+ persistTimer = setTimeout(() => {
1318
+ persistTimer = null;
1319
+ persistCacheStats(ctx).catch((err) => {
1320
+ console.warn(`${LOG_PREFIX}: debounced persist failed`, err);
1321
+ });
1322
+ }, PERSIST_DEBOUNCE_MS);
1323
+ }
1324
+
1325
+ /** Flush any pending debounced persist immediately (cancels timer + writes).
1326
+ * Used on reload and day-rollover where immediate durability matters. */
1327
+ async function flushPersistCacheStats(ctx?: ExtensionContext): Promise<void> {
1328
+ if (persistTimer !== null) {
1329
+ clearTimeout(persistTimer);
1330
+ persistTimer = null;
1331
+ }
1332
+ await persistCacheStats(ctx);
1333
+ }
1334
+
1199
1335
  async function rollOverStatsIfNeeded(ctx?: ExtensionContext): Promise<void> {
1200
1336
  const day = currentLocalDay();
1201
1337
  let changed = false;
1202
1338
 
1339
+ // Roll over per-model entries.
1340
+ for (const key of Object.keys(cacheStatsByModel)) {
1341
+ const stats = cacheStatsByModel[key];
1342
+ if (stats && stats.day !== day) {
1343
+ cacheStatsByModel[key] = emptyCacheStats(day);
1344
+ changed = true;
1345
+ }
1346
+ }
1347
+
1348
+ // Roll over legacy family entries.
1203
1349
  for (const id of CACHE_PROVIDER_IDS) {
1204
- const stats = cacheStatsByProvider[id];
1350
+ const stats = cacheStatsLegacyFamily[id];
1205
1351
  if (stats && stats.day !== day) {
1206
- cacheStatsByProvider[id] = emptyCacheStats(day);
1352
+ cacheStatsLegacyFamily[id] = emptyCacheStats(day);
1207
1353
  changed = true;
1208
1354
  }
1209
1355
  }
@@ -1216,13 +1362,21 @@ export default function (pi: ExtensionAPI) {
1216
1362
 
1217
1363
  async function restoreCacheStats(reason: string, ctx: ExtensionContext): Promise<void> {
1218
1364
  if (reason === "reload") {
1219
- cacheStatsByProvider = emptyAllCacheStats();
1365
+ cacheStatsByModel = {};
1366
+ cacheStatsLegacyFamily = emptyAllCacheStats();
1220
1367
  lastStatusText = undefined;
1221
- await persistCacheStats(ctx);
1368
+ await flushPersistCacheStats(ctx);
1222
1369
  return;
1223
1370
  }
1224
1371
 
1225
- cacheStatsByProvider = (await readPersistedCacheStats()) ?? emptyAllCacheStats();
1372
+ const persisted = await readPersistedCacheStats();
1373
+ if (persisted) {
1374
+ cacheStatsByModel = persisted.statsByModel;
1375
+ cacheStatsLegacyFamily = persisted.legacyFamily;
1376
+ } else {
1377
+ cacheStatsByModel = {};
1378
+ cacheStatsLegacyFamily = emptyAllCacheStats();
1379
+ }
1226
1380
  lastStatusText = undefined;
1227
1381
  await rollOverStatsIfNeeded(ctx);
1228
1382
  }
@@ -1231,7 +1385,17 @@ export default function (pi: ExtensionAPI) {
1231
1385
  await rollOverStatsIfNeeded(ctx);
1232
1386
 
1233
1387
  const adapter = selectAdapterForModel(model);
1234
- let statusText: string | undefined = adapter ? formatCacheStats(adapter, getStatsForAdapter(adapter)) : undefined;
1388
+ let statusText: string | undefined;
1389
+ if (adapter) {
1390
+ // Display only per-model scoped stats. A model that has never been
1391
+ // used in this session shows 0/0 rather than falling back to legacy
1392
+ // family aggregated stats (which could span different providers with
1393
+ // the same model-family name). The message_end hook populates
1394
+ // cacheStatsByModel[key] on first use with that model.
1395
+ const key = model ? modelKey(model) : undefined;
1396
+ const stats = key ? cacheStatsByModel[key] : undefined;
1397
+ statusText = formatCacheStats(adapter, stats ?? emptyCacheStats());
1398
+ }
1235
1399
 
1236
1400
  // If optimizeSystemPrompt detected structural truncation on this or
1237
1401
  // a recent turn, flag it once in the footer so the user knows to
@@ -1294,6 +1458,14 @@ export default function (pi: ExtensionAPI) {
1294
1458
  }
1295
1459
  }
1296
1460
 
1461
+ // Global opt-out: PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 bypasses all
1462
+ // prompt mutations below (session-overview churn strip, skill compression,
1463
+ // and stable-prefix reordering). Footer stats and the OpenAI
1464
+ // prompt_cache_key fallback remain active.
1465
+ if (isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV])) {
1466
+ return {};
1467
+ }
1468
+
1297
1469
  // Step 1: strip per-turn churn from <session-overview>.
1298
1470
  // Removing RECENT COMMITS, Working directory status, and
1299
1471
  // Journal line count makes more of the session-overview stable
@@ -1351,8 +1523,17 @@ export default function (pi: ExtensionAPI) {
1351
1523
  if (!usage) return;
1352
1524
 
1353
1525
  await rollOverStatsIfNeeded(ctx);
1354
- addUsageToCacheStats(getStatsForAdapter(adapter), usage);
1355
- await persistCacheStats(ctx);
1526
+
1527
+ // Update stats scoped to the active model (provider/id key).
1528
+ // Falls back to legacy family when ctx.model is undefined.
1529
+ if (ctx.model) {
1530
+ const key = modelKey(ctx.model);
1531
+ addUsageToCacheStats(getOrCreateStatsByModelKey(key), usage);
1532
+ } else {
1533
+ addUsageToCacheStats(getStatsForModel(undefined, adapter), usage);
1534
+ }
1535
+
1536
+ schedulePersistCacheStats(ctx);
1356
1537
  await publishStatus(ctx);
1357
1538
  });
1358
1539
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-cache-optimizer",
3
- "version": "2.4.0",
3
+ "version": "2.4.2",
4
4
  "description": "Pi extension that improves provider-side KV/prompt cache hit rates (DeepSeek, OpenAI, Claude, Gemini) by reordering the system prompt, requesting long retention, and showing footer cache stats. Renamed from pi-deepseek-cache-optimizer.",
5
5
  "keywords": [
6
6
  "pi-package",