pi-cache-optimizer 2.6.3 → 2.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.ts +213 -13
  2. package/package.json +1 -1
package/index.ts CHANGED
@@ -162,9 +162,16 @@ type PersistedCacheStatsV2 = {
162
162
  };
163
163
 
164
164
  /** Per-model-key scoped state. Used in memory and for v3 persistence. */
165
+ type PersistedRoutedModelRef = {
166
+ provider: string;
167
+ id: string;
168
+ name?: string;
169
+ };
170
+
165
171
  type CacheStatsState = {
166
172
  statsByModel: Record<string, CacheStats>;
167
173
  legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
174
+ lastRoutedModelBySession?: Record<string, PersistedRoutedModelRef>;
168
175
  };
169
176
 
170
177
  type PersistedCacheStatsV3 = {
@@ -186,6 +193,13 @@ type PersistedCacheStatsV4 = {
186
193
  legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
187
194
  };
188
195
 
196
+ type PersistedCacheStatsV5 = {
197
+ version: 5;
198
+ sessions: Record<string, Record<string, CacheStats>>;
199
+ legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
200
+ lastRoutedModelBySession?: Record<string, PersistedRoutedModelRef>;
201
+ };
202
+
189
203
  type UsageSnapshot = {
190
204
  cacheRead: number;
191
205
  cacheWrite: number;
@@ -3105,12 +3119,55 @@ function parseCacheStats(value: unknown): CacheStats | undefined {
3105
3119
  };
3106
3120
  }
3107
3121
 
3122
+ function parsePersistedRoutedModelRef(value: unknown): PersistedRoutedModelRef | undefined {
3123
+ const record = asRecord(value);
3124
+ if (!record || !isNonEmptyString(record.provider) || !isNonEmptyString(record.id)) return undefined;
3125
+
3126
+ return {
3127
+ provider: record.provider.trim(),
3128
+ id: record.id.trim(),
3129
+ name: isNonEmptyString(record.name) ? record.name.trim() : record.id.trim(),
3130
+ };
3131
+ }
3132
+
3133
+ function routedModelRefToPiModel(ref: PersistedRoutedModelRef): PiModel {
3134
+ return {
3135
+ id: ref.id,
3136
+ name: ref.name ?? ref.id,
3137
+ provider: ref.provider,
3138
+ api: "",
3139
+ baseUrl: "",
3140
+ reasoning: false,
3141
+ input: ["text"],
3142
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
3143
+ contextWindow: 0,
3144
+ maxTokens: 0,
3145
+ } as PiModel;
3146
+ }
3147
+
3148
+ function buildExactRouterStatusEntry(
3149
+ sessionHash: string | undefined,
3150
+ statsByModel: Record<string, CacheStats>,
3151
+ lastRoutedModel: PersistedRoutedModelRef | undefined,
3152
+ ): { adapter: CacheProviderAdapter; stats: CacheStats } | undefined {
3153
+ if (!sessionHash || !lastRoutedModel) return undefined;
3154
+
3155
+ const model = routedModelRefToPiModel(lastRoutedModel);
3156
+ const adapter = selectAdapterForModel(model);
3157
+ if (!adapter) return undefined;
3158
+
3159
+ const key = makeSessionModelKey(sessionHash, lastRoutedModel.provider, lastRoutedModel.id);
3160
+ return { adapter, stats: statsByModel[key] ?? emptyCacheStats() };
3161
+ }
3162
+
3108
3163
  function parsePersistedCacheStats(value: unknown): CacheStatsState | undefined {
3109
3164
  const record = asRecord(value);
3110
3165
  if (!record) return undefined;
3111
3166
 
3112
- // version 4: session-scoped stats + legacy family fallback
3113
- if (record.version === 4) {
3167
+ // version 4/5: session-scoped stats + legacy family fallback.
3168
+ // v5 additionally persists the last actual routed model per session so
3169
+ // router/auto can restore the exact upstream footer after /reload.
3170
+ if (record.version === 4 || record.version === 5) {
3114
3171
  const legacyFamily: Partial<Record<CacheProviderId, CacheStats>> = {};
3115
3172
  const rawFamily = asRecord(record.legacyFamily);
3116
3173
  if (rawFamily) {
@@ -3138,10 +3195,19 @@ function parsePersistedCacheStats(value: unknown): CacheStatsState | undefined {
3138
3195
  }
3139
3196
  }
3140
3197
 
3141
- return { statsByModel, legacyFamily };
3198
+ const lastRoutedModelBySession: Record<string, PersistedRoutedModelRef> = {};
3199
+ const rawLastRoutedModels = asRecord(record.lastRoutedModelBySession);
3200
+ if (rawLastRoutedModels) {
3201
+ for (const [sessionHash, rawModel] of Object.entries(rawLastRoutedModels)) {
3202
+ const parsed = parsePersistedRoutedModelRef(rawModel);
3203
+ if (parsed) lastRoutedModelBySession[sessionHash] = parsed;
3204
+ }
3205
+ }
3206
+
3207
+ return { statsByModel, legacyFamily, lastRoutedModelBySession };
3142
3208
  }
3143
3209
 
3144
- // version 3: migrate to v4 semantics by wrapping statsByModel into sessions
3210
+ // version 3: migrate to v4/v5 semantics by wrapping statsByModel into sessions
3145
3211
  if (record.version === 3) {
3146
3212
  const statsByModel: Record<string, CacheStats> = {};
3147
3213
  const rawModelMap = asRecord(record.statsByModel);
@@ -3335,11 +3401,38 @@ function mergeCacheSessions(
3335
3401
  return sessions;
3336
3402
  }
3337
3403
 
3404
+ function mergeLastRoutedModels(
3405
+ existingLastRoutedModelBySession: Record<string, PersistedRoutedModelRef>,
3406
+ state: CacheStatsState,
3407
+ currentSessionHash?: string,
3408
+ ): Record<string, PersistedRoutedModelRef> {
3409
+ const merged: Record<string, PersistedRoutedModelRef> = { ...existingLastRoutedModelBySession };
3410
+ const incoming = state.lastRoutedModelBySession ?? {};
3411
+
3412
+ if (currentSessionHash !== undefined) {
3413
+ const current = incoming[currentSessionHash];
3414
+ if (current) {
3415
+ merged[currentSessionHash] = current;
3416
+ } else {
3417
+ // Explicit deletion: when incoming state has no entry for current session,
3418
+ // remove any existing stale entry to reflect intentional reset.
3419
+ delete merged[currentSessionHash];
3420
+ }
3421
+ return merged;
3422
+ }
3423
+
3424
+ for (const [sessionHash, ref] of Object.entries(incoming)) {
3425
+ merged[sessionHash] = ref;
3426
+ }
3427
+ return merged;
3428
+ }
3429
+
3338
3430
  async function writePersistedCacheStats(state: CacheStatsState, currentSessionHash?: string): Promise<void> {
3339
3431
  await mkdir(STATE_DIR, { recursive: true });
3340
3432
 
3341
3433
  // Read existing file to preserve other sessions' data.
3342
3434
  let existingSessions: Record<string, Record<string, CacheStats>> = {};
3435
+ let existingLastRoutedModelBySession: Record<string, PersistedRoutedModelRef> = {};
3343
3436
  try {
3344
3437
  const raw = await readFile(STATE_FILE_PATH, "utf8");
3345
3438
  const parsed = parsePersistedCacheStats(JSON.parse(raw));
@@ -3355,17 +3448,24 @@ async function writePersistedCacheStats(state: CacheStatsState, currentSessionHa
3355
3448
  existingSessions[hash][modelKey] = stats;
3356
3449
  }
3357
3450
  }
3451
+ existingLastRoutedModelBySession = { ...(parsed.lastRoutedModelBySession ?? {}) };
3358
3452
  }
3359
3453
  } catch {
3360
3454
  // Ignore read errors (file may not exist yet).
3361
3455
  }
3362
3456
 
3363
3457
  const sessions = mergeCacheSessions(existingSessions, state, currentSessionHash);
3458
+ const lastRoutedModelBySession = mergeLastRoutedModels(
3459
+ existingLastRoutedModelBySession,
3460
+ state,
3461
+ currentSessionHash,
3462
+ );
3364
3463
 
3365
- const payload: PersistedCacheStatsV4 = {
3366
- version: 4,
3464
+ const payload: PersistedCacheStatsV5 = {
3465
+ version: 5,
3367
3466
  sessions,
3368
3467
  legacyFamily: state.legacyFamily,
3468
+ ...(Object.keys(lastRoutedModelBySession).length > 0 ? { lastRoutedModelBySession } : {}),
3369
3469
  };
3370
3470
  const tempPath = `${STATE_FILE_PATH}.${process.pid}.${Date.now()}.tmp`;
3371
3471
 
@@ -4919,8 +5019,12 @@ export const __internals_for_tests = {
4919
5019
  makeSessionModelKey,
4920
5020
  modelKeyFromSessionKey,
4921
5021
  filterRestorableStatsForSession,
5022
+ parsePersistedRoutedModelRef,
5023
+ routedModelRefToPiModel,
5024
+ buildExactRouterStatusEntry,
4922
5025
  // Persistence helpers (for reload/reset tests)
4923
5026
  mergeCacheSessions,
5027
+ mergeLastRoutedModels,
4924
5028
  writePersistedCacheStats,
4925
5029
  readPersistedCacheStats,
4926
5030
  STATE_FILE_PATH,
@@ -4960,6 +5064,7 @@ export default function (pi: ExtensionAPI) {
4960
5064
  let currentSessionId = "";
4961
5065
  let currentSessionHash = "";
4962
5066
  let currentSessionHashSet = false;
5067
+ let lastActualRoutedModel: PersistedRoutedModelRef | undefined;
4963
5068
  const PERSIST_DEBOUNCE_MS = 2000;
4964
5069
  /** In-memory recent usage samples per model key (not persisted, cleared on reload). */
4965
5070
  const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
@@ -4970,6 +5075,7 @@ export default function (pi: ExtensionAPI) {
4970
5075
  currentSessionId = sid;
4971
5076
  currentSessionHash = hashSessionId(sid);
4972
5077
  currentSessionHashSet = true;
5078
+ lastActualRoutedModel = undefined;
4973
5079
  }
4974
5080
  }
4975
5081
 
@@ -5019,7 +5125,13 @@ export default function (pi: ExtensionAPI) {
5019
5125
  }
5020
5126
 
5021
5127
  function getCacheStatsState(): CacheStatsState {
5022
- return { statsByModel: cacheStatsByModel, legacyFamily: cacheStatsLegacyFamily };
5128
+ return {
5129
+ statsByModel: cacheStatsByModel,
5130
+ legacyFamily: cacheStatsLegacyFamily,
5131
+ ...(currentSessionHashSet && lastActualRoutedModel
5132
+ ? { lastRoutedModelBySession: { [currentSessionHash]: lastActualRoutedModel } }
5133
+ : {}),
5134
+ };
5023
5135
  }
5024
5136
 
5025
5137
  /** Look up active stats for a model, falling back to legacy family. */
@@ -5146,6 +5258,9 @@ export default function (pi: ExtensionAPI) {
5146
5258
  currentSessionHashSet ? currentSessionHash : undefined,
5147
5259
  );
5148
5260
  cacheStatsLegacyFamily = persisted?.legacyFamily ?? emptyAllCacheStats();
5261
+ lastActualRoutedModel = currentSessionHashSet
5262
+ ? persisted?.lastRoutedModelBySession?.[currentSessionHash]
5263
+ : undefined;
5149
5264
 
5150
5265
  await rollOverStatsIfNeeded(ctx);
5151
5266
  return;
@@ -5160,10 +5275,63 @@ export default function (pi: ExtensionAPI) {
5160
5275
  currentSessionHashSet ? currentSessionHash : undefined,
5161
5276
  );
5162
5277
  cacheStatsLegacyFamily = persisted?.legacyFamily ?? emptyAllCacheStats();
5278
+ lastActualRoutedModel = currentSessionHashSet
5279
+ ? persisted?.lastRoutedModelBySession?.[currentSessionHash]
5280
+ : undefined;
5163
5281
  lastStatusText = undefined;
5164
5282
  await rollOverStatsIfNeeded(ctx);
5165
5283
  }
5166
5284
 
5285
+ /**
5286
+ * Fallback for older persisted files that do not yet carry exact
5287
+ * last-routed-model metadata. When the current model is a router channel
5288
+ * (e.g. router/auto), restorable stats are stored under the real upstream
5289
+ * model's provider/id key, not under router/auto. Find the best valid entry
5290
+ * (highest totalRequests among adapter-detectable model keys) so we can show
5291
+ * meaningful footer content on session_start after reload.
5292
+ */
5293
+ function findBestRouterModelStats(): { adapter: CacheProviderAdapter; stats: CacheStats } | undefined {
5294
+ if (!currentSessionHash) return undefined;
5295
+ const prefix = `${currentSessionHash}:`;
5296
+ let best: { adapter: CacheProviderAdapter; stats: CacheStats; total: number } | undefined;
5297
+
5298
+ for (const [key, stats] of Object.entries(cacheStatsByModel)) {
5299
+ if (!key.startsWith(prefix)) continue;
5300
+
5301
+ // Extract provider/id from key like "abc123:run-claude/claude-opus-4-8"
5302
+ const modelKeyPart = key.slice(prefix.length);
5303
+ const slashIdx = modelKeyPart.indexOf("/");
5304
+ if (slashIdx < 0 || slashIdx >= modelKeyPart.length - 1) continue;
5305
+ const modelId = modelKeyPart.slice(slashIdx + 1);
5306
+ const providerName = modelKeyPart.slice(0, slashIdx);
5307
+
5308
+ // Construct a minimal model for adapter detection.
5309
+ // Every is*LikeModel function only accesses model.id and model.name
5310
+ // via getModelIdNameTokenValues, so { id, name } is sufficient.
5311
+ const mockModel = {
5312
+ id: modelId,
5313
+ name: modelId,
5314
+ provider: providerName,
5315
+ api: "",
5316
+ baseUrl: "",
5317
+ reasoning: false,
5318
+ input: ["text"],
5319
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
5320
+ contextWindow: 0,
5321
+ maxTokens: 0,
5322
+ } as PiModel;
5323
+
5324
+ const adapter = selectAdapterForModel(mockModel);
5325
+ if (!adapter) continue;
5326
+
5327
+ if (!best || stats.totalRequests > best.total) {
5328
+ best = { adapter, stats, total: stats.totalRequests };
5329
+ }
5330
+ }
5331
+
5332
+ return best ? { adapter: best.adapter, stats: best.stats } : undefined;
5333
+ }
5334
+
5167
5335
  async function publishStatus(ctx: ExtensionContext, model: PiModel | undefined = ctx.model): Promise<void> {
5168
5336
  syncSessionHash(ctx);
5169
5337
  await rollOverStatsIfNeeded(ctx);
@@ -5171,11 +5339,23 @@ export default function (pi: ExtensionAPI) {
5171
5339
  const adapter = selectAdapterForModel(model);
5172
5340
  let statusText: string | undefined;
5173
5341
  if (!adapter && isRouterModel(model)) {
5174
- // router/auto has no stable target family before the first successful
5175
- // routed response. Keep the existing cache footer visible instead of
5176
- // clearing it on model_select; message_end will switch to the real
5177
- // upstream model/provider after pi-router relays the response metadata.
5178
- return;
5342
+ // On model_select (existing footer), keep the existing cache footer
5343
+ // visible instead of clearing it. On session_start (no footer yet
5344
+ // after reload/fresh start), restore the exact last actual routed model
5345
+ // for this session when available; fall back to older best-effort
5346
+ // heuristics only when no exact metadata exists.
5347
+ if (lastStatusText !== undefined) return;
5348
+ const realEntry = buildExactRouterStatusEntry(
5349
+ currentSessionHashSet ? currentSessionHash : undefined,
5350
+ cacheStatsByModel,
5351
+ lastActualRoutedModel,
5352
+ ) ?? findBestRouterModelStats();
5353
+ if (realEntry) {
5354
+ const statsText = formatCacheStats(realEntry.adapter, realEntry.stats);
5355
+ statusText = runtimeOptimizerEnabled
5356
+ ? statsText
5357
+ : `Cache Optimizer disabled · ${statsText}`;
5358
+ }
5179
5359
  }
5180
5360
 
5181
5361
  if (adapter) {
@@ -5360,6 +5540,23 @@ export default function (pi: ExtensionAPI) {
5360
5540
  const usage = adapter.normalizeUsage(event.message);
5361
5541
 
5362
5542
  const statsModel = isRouterModel(ctx.model) ? modelFromAssistantMessage(event.message, ctx.model) : ctx.model;
5543
+ let routedModelChanged = false;
5544
+ if (isRouterModel(ctx.model) && statsModel && !isRouterModel(statsModel)) {
5545
+ const nextRoutedModel: PersistedRoutedModelRef = {
5546
+ provider: statsModel.provider,
5547
+ id: statsModel.id,
5548
+ name: statsModel.name || statsModel.id,
5549
+ };
5550
+ if (
5551
+ !lastActualRoutedModel ||
5552
+ lastActualRoutedModel.provider !== nextRoutedModel.provider ||
5553
+ lastActualRoutedModel.id !== nextRoutedModel.id ||
5554
+ (lastActualRoutedModel.name || lastActualRoutedModel.id) !== (nextRoutedModel.name || nextRoutedModel.id)
5555
+ ) {
5556
+ lastActualRoutedModel = nextRoutedModel;
5557
+ routedModelChanged = true;
5558
+ }
5559
+ }
5363
5560
 
5364
5561
  // Record recent sample (even when usage is missing, for trend diagnosis)
5365
5562
  if (statsModel) {
@@ -5370,7 +5567,10 @@ export default function (pi: ExtensionAPI) {
5370
5567
  recordRecentSample(sk, usage ?? { cacheRead: 0, cacheWrite: 0, totalInput: 0 }, missingFields);
5371
5568
  }
5372
5569
 
5373
- if (!usage) return;
5570
+ if (!usage) {
5571
+ if (routedModelChanged) schedulePersistCacheStats(ctx);
5572
+ return;
5573
+ }
5374
5574
 
5375
5575
  await rollOverStatsIfNeeded(ctx);
5376
5576
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-cache-optimizer",
3
- "version": "2.6.3",
3
+ "version": "2.6.4",
4
4
  "description": "Improve Pi prompt/KV cache hit rates with stable prompts, OpenAI-compatible cache keys, proxy compat warnings, and footer cache stats.",
5
5
  "keywords": [
6
6
  "pi-package",