pi-cache-optimizer 2.6.2 → 2.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.ts +255 -16
  2. package/package.json +1 -1
package/index.ts CHANGED
@@ -162,9 +162,16 @@ type PersistedCacheStatsV2 = {
162
162
  };
163
163
 
164
164
  /** Per-model-key scoped state. Used in memory and for v3 persistence. */
165
+ type PersistedRoutedModelRef = {
166
+ provider: string;
167
+ id: string;
168
+ name?: string;
169
+ };
170
+
165
171
  type CacheStatsState = {
166
172
  statsByModel: Record<string, CacheStats>;
167
173
  legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
174
+ lastRoutedModelBySession?: Record<string, PersistedRoutedModelRef>;
168
175
  };
169
176
 
170
177
  type PersistedCacheStatsV3 = {
@@ -186,6 +193,13 @@ type PersistedCacheStatsV4 = {
186
193
  legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
187
194
  };
188
195
 
196
+ type PersistedCacheStatsV5 = {
197
+ version: 5;
198
+ sessions: Record<string, Record<string, CacheStats>>;
199
+ legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
200
+ lastRoutedModelBySession?: Record<string, PersistedRoutedModelRef>;
201
+ };
202
+
189
203
  type UsageSnapshot = {
190
204
  cacheRead: number;
191
205
  cacheWrite: number;
@@ -1381,6 +1395,34 @@ function modelKey(model: PiModel): string {
1381
1395
  return `${model.provider}/${model.id}`;
1382
1396
  }
1383
1397
 
1398
+ function isRouterModel(model: PiModel | undefined): boolean {
1399
+ return lower(model?.provider) === "router";
1400
+ }
1401
+
1402
+ function modelFromAssistantMessage(message: unknown, fallback: PiModel | undefined): PiModel | undefined {
1403
+ const record = getAssistantRecord(message);
1404
+ if (!record) return fallback;
1405
+
1406
+ const id = lower(record.responseModel) || lower(record.model) || fallback?.id;
1407
+ const provider = lower(record.provider) || fallback?.provider;
1408
+ const api = lower(record.api) || fallback?.api;
1409
+ if (!id || !provider || !api) return fallback;
1410
+
1411
+ return {
1412
+ ...(fallback ?? {}),
1413
+ id,
1414
+ name: id,
1415
+ provider,
1416
+ api,
1417
+ baseUrl: fallback?.baseUrl ?? "",
1418
+ reasoning: fallback?.reasoning ?? false,
1419
+ input: fallback?.input ?? ["text"],
1420
+ cost: fallback?.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
1421
+ contextWindow: fallback?.contextWindow ?? 0,
1422
+ maxTokens: fallback?.maxTokens ?? 0,
1423
+ } as PiModel;
1424
+ }
1425
+
1384
1426
  function keyForModelExt(model: { provider: string; id: string }): string {
1385
1427
  return `${model.provider}/${model.id}`;
1386
1428
  }
@@ -2835,7 +2877,8 @@ function selectAdapterForModel(model: PiModel | undefined): CacheProviderAdapter
2835
2877
  }
2836
2878
 
2837
2879
  function selectAdapterForAssistantMessage(message: unknown, model: PiModel | undefined): CacheProviderAdapter | undefined {
2838
- return CACHE_PROVIDER_ADAPTERS.find((adapter) => adapter.matchesAssistantMessage(message, model));
2880
+ const responseModel = isRouterModel(model) ? modelFromAssistantMessage(message, model) : model;
2881
+ return CACHE_PROVIDER_ADAPTERS.find((adapter) => adapter.matchesAssistantMessage(message, responseModel));
2839
2882
  }
2840
2883
 
2841
2884
  function notifyCacheCompatIfNeeded(
@@ -3076,12 +3119,55 @@ function parseCacheStats(value: unknown): CacheStats | undefined {
3076
3119
  };
3077
3120
  }
3078
3121
 
3122
+ function parsePersistedRoutedModelRef(value: unknown): PersistedRoutedModelRef | undefined {
3123
+ const record = asRecord(value);
3124
+ if (!record || !isNonEmptyString(record.provider) || !isNonEmptyString(record.id)) return undefined;
3125
+
3126
+ return {
3127
+ provider: record.provider.trim(),
3128
+ id: record.id.trim(),
3129
+ name: isNonEmptyString(record.name) ? record.name.trim() : record.id.trim(),
3130
+ };
3131
+ }
3132
+
3133
+ function routedModelRefToPiModel(ref: PersistedRoutedModelRef): PiModel {
3134
+ return {
3135
+ id: ref.id,
3136
+ name: ref.name ?? ref.id,
3137
+ provider: ref.provider,
3138
+ api: "",
3139
+ baseUrl: "",
3140
+ reasoning: false,
3141
+ input: ["text"],
3142
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
3143
+ contextWindow: 0,
3144
+ maxTokens: 0,
3145
+ } as PiModel;
3146
+ }
3147
+
3148
+ function buildExactRouterStatusEntry(
3149
+ sessionHash: string | undefined,
3150
+ statsByModel: Record<string, CacheStats>,
3151
+ lastRoutedModel: PersistedRoutedModelRef | undefined,
3152
+ ): { adapter: CacheProviderAdapter; stats: CacheStats } | undefined {
3153
+ if (!sessionHash || !lastRoutedModel) return undefined;
3154
+
3155
+ const model = routedModelRefToPiModel(lastRoutedModel);
3156
+ const adapter = selectAdapterForModel(model);
3157
+ if (!adapter) return undefined;
3158
+
3159
+ const key = makeSessionModelKey(sessionHash, lastRoutedModel.provider, lastRoutedModel.id);
3160
+ return { adapter, stats: statsByModel[key] ?? emptyCacheStats() };
3161
+ }
3162
+
3079
3163
  function parsePersistedCacheStats(value: unknown): CacheStatsState | undefined {
3080
3164
  const record = asRecord(value);
3081
3165
  if (!record) return undefined;
3082
3166
 
3083
- // version 4: session-scoped stats + legacy family fallback
3084
- if (record.version === 4) {
3167
+ // version 4/5: session-scoped stats + legacy family fallback.
3168
+ // v5 additionally persists the last actual routed model per session so
3169
+ // router/auto can restore the exact upstream footer after /reload.
3170
+ if (record.version === 4 || record.version === 5) {
3085
3171
  const legacyFamily: Partial<Record<CacheProviderId, CacheStats>> = {};
3086
3172
  const rawFamily = asRecord(record.legacyFamily);
3087
3173
  if (rawFamily) {
@@ -3109,10 +3195,19 @@ function parsePersistedCacheStats(value: unknown): CacheStatsState | undefined {
3109
3195
  }
3110
3196
  }
3111
3197
 
3112
- return { statsByModel, legacyFamily };
3198
+ const lastRoutedModelBySession: Record<string, PersistedRoutedModelRef> = {};
3199
+ const rawLastRoutedModels = asRecord(record.lastRoutedModelBySession);
3200
+ if (rawLastRoutedModels) {
3201
+ for (const [sessionHash, rawModel] of Object.entries(rawLastRoutedModels)) {
3202
+ const parsed = parsePersistedRoutedModelRef(rawModel);
3203
+ if (parsed) lastRoutedModelBySession[sessionHash] = parsed;
3204
+ }
3205
+ }
3206
+
3207
+ return { statsByModel, legacyFamily, lastRoutedModelBySession };
3113
3208
  }
3114
3209
 
3115
- // version 3: migrate to v4 semantics by wrapping statsByModel into sessions
3210
+ // version 3: migrate to v4/v5 semantics by wrapping statsByModel into sessions
3116
3211
  if (record.version === 3) {
3117
3212
  const statsByModel: Record<string, CacheStats> = {};
3118
3213
  const rawModelMap = asRecord(record.statsByModel);
@@ -3306,11 +3401,38 @@ function mergeCacheSessions(
3306
3401
  return sessions;
3307
3402
  }
3308
3403
 
3404
+ function mergeLastRoutedModels(
3405
+ existingLastRoutedModelBySession: Record<string, PersistedRoutedModelRef>,
3406
+ state: CacheStatsState,
3407
+ currentSessionHash?: string,
3408
+ ): Record<string, PersistedRoutedModelRef> {
3409
+ const merged: Record<string, PersistedRoutedModelRef> = { ...existingLastRoutedModelBySession };
3410
+ const incoming = state.lastRoutedModelBySession ?? {};
3411
+
3412
+ if (currentSessionHash !== undefined) {
3413
+ const current = incoming[currentSessionHash];
3414
+ if (current) {
3415
+ merged[currentSessionHash] = current;
3416
+ } else {
3417
+ // Explicit deletion: when incoming state has no entry for current session,
3418
+ // remove any existing stale entry to reflect intentional reset.
3419
+ delete merged[currentSessionHash];
3420
+ }
3421
+ return merged;
3422
+ }
3423
+
3424
+ for (const [sessionHash, ref] of Object.entries(incoming)) {
3425
+ merged[sessionHash] = ref;
3426
+ }
3427
+ return merged;
3428
+ }
3429
+
3309
3430
  async function writePersistedCacheStats(state: CacheStatsState, currentSessionHash?: string): Promise<void> {
3310
3431
  await mkdir(STATE_DIR, { recursive: true });
3311
3432
 
3312
3433
  // Read existing file to preserve other sessions' data.
3313
3434
  let existingSessions: Record<string, Record<string, CacheStats>> = {};
3435
+ let existingLastRoutedModelBySession: Record<string, PersistedRoutedModelRef> = {};
3314
3436
  try {
3315
3437
  const raw = await readFile(STATE_FILE_PATH, "utf8");
3316
3438
  const parsed = parsePersistedCacheStats(JSON.parse(raw));
@@ -3326,17 +3448,24 @@ async function writePersistedCacheStats(state: CacheStatsState, currentSessionHa
3326
3448
  existingSessions[hash][modelKey] = stats;
3327
3449
  }
3328
3450
  }
3451
+ existingLastRoutedModelBySession = { ...(parsed.lastRoutedModelBySession ?? {}) };
3329
3452
  }
3330
3453
  } catch {
3331
3454
  // Ignore read errors (file may not exist yet).
3332
3455
  }
3333
3456
 
3334
3457
  const sessions = mergeCacheSessions(existingSessions, state, currentSessionHash);
3458
+ const lastRoutedModelBySession = mergeLastRoutedModels(
3459
+ existingLastRoutedModelBySession,
3460
+ state,
3461
+ currentSessionHash,
3462
+ );
3335
3463
 
3336
- const payload: PersistedCacheStatsV4 = {
3337
- version: 4,
3464
+ const payload: PersistedCacheStatsV5 = {
3465
+ version: 5,
3338
3466
  sessions,
3339
3467
  legacyFamily: state.legacyFamily,
3468
+ ...(Object.keys(lastRoutedModelBySession).length > 0 ? { lastRoutedModelBySession } : {}),
3340
3469
  };
3341
3470
  const tempPath = `${STATE_FILE_PATH}.${process.pid}.${Date.now()}.tmp`;
3342
3471
 
@@ -4890,8 +5019,12 @@ export const __internals_for_tests = {
4890
5019
  makeSessionModelKey,
4891
5020
  modelKeyFromSessionKey,
4892
5021
  filterRestorableStatsForSession,
5022
+ parsePersistedRoutedModelRef,
5023
+ routedModelRefToPiModel,
5024
+ buildExactRouterStatusEntry,
4893
5025
  // Persistence helpers (for reload/reset tests)
4894
5026
  mergeCacheSessions,
5027
+ mergeLastRoutedModels,
4895
5028
  writePersistedCacheStats,
4896
5029
  readPersistedCacheStats,
4897
5030
  STATE_FILE_PATH,
@@ -4931,6 +5064,7 @@ export default function (pi: ExtensionAPI) {
4931
5064
  let currentSessionId = "";
4932
5065
  let currentSessionHash = "";
4933
5066
  let currentSessionHashSet = false;
5067
+ let lastActualRoutedModel: PersistedRoutedModelRef | undefined;
4934
5068
  const PERSIST_DEBOUNCE_MS = 2000;
4935
5069
  /** In-memory recent usage samples per model key (not persisted, cleared on reload). */
4936
5070
  const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
@@ -4941,6 +5075,7 @@ export default function (pi: ExtensionAPI) {
4941
5075
  currentSessionId = sid;
4942
5076
  currentSessionHash = hashSessionId(sid);
4943
5077
  currentSessionHashSet = true;
5078
+ lastActualRoutedModel = undefined;
4944
5079
  }
4945
5080
  }
4946
5081
 
@@ -4990,7 +5125,13 @@ export default function (pi: ExtensionAPI) {
4990
5125
  }
4991
5126
 
4992
5127
  function getCacheStatsState(): CacheStatsState {
4993
- return { statsByModel: cacheStatsByModel, legacyFamily: cacheStatsLegacyFamily };
5128
+ return {
5129
+ statsByModel: cacheStatsByModel,
5130
+ legacyFamily: cacheStatsLegacyFamily,
5131
+ ...(currentSessionHashSet && lastActualRoutedModel
5132
+ ? { lastRoutedModelBySession: { [currentSessionHash]: lastActualRoutedModel } }
5133
+ : {}),
5134
+ };
4994
5135
  }
4995
5136
 
4996
5137
  /** Look up active stats for a model, falling back to legacy family. */
@@ -5117,6 +5258,9 @@ export default function (pi: ExtensionAPI) {
5117
5258
  currentSessionHashSet ? currentSessionHash : undefined,
5118
5259
  );
5119
5260
  cacheStatsLegacyFamily = persisted?.legacyFamily ?? emptyAllCacheStats();
5261
+ lastActualRoutedModel = currentSessionHashSet
5262
+ ? persisted?.lastRoutedModelBySession?.[currentSessionHash]
5263
+ : undefined;
5120
5264
 
5121
5265
  await rollOverStatsIfNeeded(ctx);
5122
5266
  return;
@@ -5131,16 +5275,89 @@ export default function (pi: ExtensionAPI) {
5131
5275
  currentSessionHashSet ? currentSessionHash : undefined,
5132
5276
  );
5133
5277
  cacheStatsLegacyFamily = persisted?.legacyFamily ?? emptyAllCacheStats();
5278
+ lastActualRoutedModel = currentSessionHashSet
5279
+ ? persisted?.lastRoutedModelBySession?.[currentSessionHash]
5280
+ : undefined;
5134
5281
  lastStatusText = undefined;
5135
5282
  await rollOverStatsIfNeeded(ctx);
5136
5283
  }
5137
5284
 
5285
+ /**
5286
+ * Fallback for older persisted files that do not yet carry exact
5287
+ * last-routed-model metadata. When the current model is a router channel
5288
+ * (e.g. router/auto), restorable stats are stored under the real upstream
5289
+ * model's provider/id key, not under router/auto. Find the best valid entry
5290
+ * (highest totalRequests among adapter-detectable model keys) so we can show
5291
+ * meaningful footer content on session_start after reload.
5292
+ */
5293
+ function findBestRouterModelStats(): { adapter: CacheProviderAdapter; stats: CacheStats } | undefined {
5294
+ if (!currentSessionHash) return undefined;
5295
+ const prefix = `${currentSessionHash}:`;
5296
+ let best: { adapter: CacheProviderAdapter; stats: CacheStats; total: number } | undefined;
5297
+
5298
+ for (const [key, stats] of Object.entries(cacheStatsByModel)) {
5299
+ if (!key.startsWith(prefix)) continue;
5300
+
5301
+ // Extract provider/id from key like "abc123:run-claude/claude-opus-4-8"
5302
+ const modelKeyPart = key.slice(prefix.length);
5303
+ const slashIdx = modelKeyPart.indexOf("/");
5304
+ if (slashIdx < 0 || slashIdx >= modelKeyPart.length - 1) continue;
5305
+ const modelId = modelKeyPart.slice(slashIdx + 1);
5306
+ const providerName = modelKeyPart.slice(0, slashIdx);
5307
+
5308
+ // Construct a minimal model for adapter detection.
5309
+ // Every is*LikeModel function only accesses model.id and model.name
5310
+ // via getModelIdNameTokenValues, so { id, name } is sufficient.
5311
+ const mockModel = {
5312
+ id: modelId,
5313
+ name: modelId,
5314
+ provider: providerName,
5315
+ api: "",
5316
+ baseUrl: "",
5317
+ reasoning: false,
5318
+ input: ["text"],
5319
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
5320
+ contextWindow: 0,
5321
+ maxTokens: 0,
5322
+ } as PiModel;
5323
+
5324
+ const adapter = selectAdapterForModel(mockModel);
5325
+ if (!adapter) continue;
5326
+
5327
+ if (!best || stats.totalRequests > best.total) {
5328
+ best = { adapter, stats, total: stats.totalRequests };
5329
+ }
5330
+ }
5331
+
5332
+ return best ? { adapter: best.adapter, stats: best.stats } : undefined;
5333
+ }
5334
+
5138
5335
  async function publishStatus(ctx: ExtensionContext, model: PiModel | undefined = ctx.model): Promise<void> {
5139
5336
  syncSessionHash(ctx);
5140
5337
  await rollOverStatsIfNeeded(ctx);
5141
5338
 
5142
5339
  const adapter = selectAdapterForModel(model);
5143
5340
  let statusText: string | undefined;
5341
+ if (!adapter && isRouterModel(model)) {
5342
+ // On model_select (existing footer), keep the existing cache footer
5343
+ // visible instead of clearing it. On session_start (no footer yet
5344
+ // after reload/fresh start), restore the exact last actual routed model
5345
+ // for this session when available; fall back to older best-effort
5346
+ // heuristics only when no exact metadata exists.
5347
+ if (lastStatusText !== undefined) return;
5348
+ const realEntry = buildExactRouterStatusEntry(
5349
+ currentSessionHashSet ? currentSessionHash : undefined,
5350
+ cacheStatsByModel,
5351
+ lastActualRoutedModel,
5352
+ ) ?? findBestRouterModelStats();
5353
+ if (realEntry) {
5354
+ const statsText = formatCacheStats(realEntry.adapter, realEntry.stats);
5355
+ statusText = runtimeOptimizerEnabled
5356
+ ? statsText
5357
+ : `Cache Optimizer disabled · ${statsText}`;
5358
+ }
5359
+ }
5360
+
5144
5361
  if (adapter) {
5145
5362
  // Display session-scoped stats. A model that has never been used
5146
5363
  // in this session shows 0/0. The message_end hook populates
@@ -5322,30 +5539,52 @@ export default function (pi: ExtensionAPI) {
5322
5539
 
5323
5540
  const usage = adapter.normalizeUsage(event.message);
5324
5541
 
5542
+ const statsModel = isRouterModel(ctx.model) ? modelFromAssistantMessage(event.message, ctx.model) : ctx.model;
5543
+ let routedModelChanged = false;
5544
+ if (isRouterModel(ctx.model) && statsModel && !isRouterModel(statsModel)) {
5545
+ const nextRoutedModel: PersistedRoutedModelRef = {
5546
+ provider: statsModel.provider,
5547
+ id: statsModel.id,
5548
+ name: statsModel.name || statsModel.id,
5549
+ };
5550
+ if (
5551
+ !lastActualRoutedModel ||
5552
+ lastActualRoutedModel.provider !== nextRoutedModel.provider ||
5553
+ lastActualRoutedModel.id !== nextRoutedModel.id ||
5554
+ (lastActualRoutedModel.name || lastActualRoutedModel.id) !== (nextRoutedModel.name || nextRoutedModel.id)
5555
+ ) {
5556
+ lastActualRoutedModel = nextRoutedModel;
5557
+ routedModelChanged = true;
5558
+ }
5559
+ }
5560
+
5325
5561
  // Record recent sample (even when usage is missing, for trend diagnosis)
5326
- if (ctx.model) {
5327
- const sk = sessionModelKey(ctx.model);
5562
+ if (statsModel) {
5563
+ const sk = sessionModelKey(statsModel);
5328
5564
  const missingFields = usage === undefined || (usage.cacheRead === 0 && usage.cacheWrite === 0 && usage.totalInput === 0)
5329
5565
  ? true
5330
5566
  : hasMissingUsageFields(event.message, adapter);
5331
5567
  recordRecentSample(sk, usage ?? { cacheRead: 0, cacheWrite: 0, totalInput: 0 }, missingFields);
5332
5568
  }
5333
5569
 
5334
- if (!usage) return;
5570
+ if (!usage) {
5571
+ if (routedModelChanged) schedulePersistCacheStats(ctx);
5572
+ return;
5573
+ }
5335
5574
 
5336
5575
  await rollOverStatsIfNeeded(ctx);
5337
5576
 
5338
- // Update stats scoped to current session + active model.
5339
- // Falls back to legacy family when ctx.model is undefined.
5340
- if (ctx.model) {
5341
- const sk = sessionModelKey(ctx.model);
5577
+ // Update stats scoped to current session + actual routed model.
5578
+ // Falls back to legacy family when no model is available.
5579
+ if (statsModel) {
5580
+ const sk = sessionModelKey(statsModel);
5342
5581
  addUsageToCacheStats(getOrCreateStatsByModelKey(sk), usage);
5343
5582
  } else {
5344
5583
  addUsageToCacheStats(getStatsForModel(undefined, adapter), usage);
5345
5584
  }
5346
5585
 
5347
5586
  schedulePersistCacheStats(ctx);
5348
- await publishStatus(ctx);
5587
+ await publishStatus(ctx, statsModel);
5349
5588
  });
5350
5589
 
5351
5590
  // ────────────────────────────────────────────────────────────────
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-cache-optimizer",
3
- "version": "2.6.2",
3
+ "version": "2.6.4",
4
4
  "description": "Improve Pi prompt/KV cache hit rates with stable prompts, OpenAI-compatible cache keys, proxy compat warnings, and footer cache stats.",
5
5
  "keywords": [
6
6
  "pi-package",