pi-cache-optimizer 2.6.5 → 2.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -33,8 +33,9 @@ Pi extension for improving provider-side KV / prompt cache hit rates. It keeps s
33
33
  - Warns once for third-party OpenAI-compatible proxies missing cache/session-affinity compat flags.
34
34
  - Detects Anthropic adaptive thinking models (opus-4.6+, sonnet-4.6+, fable-5+) missing `forceAdaptiveThinking: true` compat.
35
35
  - Shows session-scoped footer stats for supported model families.
36
+ - Supports optional router-extension integration through versioned global protocols (`Symbol.for("pi.routing.registry.v1")` and `Symbol.for("pi.cache.hints.v1")`) without importing router packages.
36
37
 
37
- Caching is provider-side and best-effort. Third-party proxies can still hide cache usage, reject unsupported parameters, or route requests across multiple upstreams.
38
+ Caching is provider-side and best-effort. Third-party proxies and router extensions can still hide cache usage, reject unsupported parameters, or route requests across multiple upstreams.
38
39
 
39
40
  ## Install
40
41
 
@@ -212,6 +213,8 @@ If only one model should change, use `modelOverrides`:
212
213
 
213
214
  Stats are read-only local counters stored at `~/.pi/agent/pi-cache-optimizer-stats.json` and scoped by Pi session + provider/model. They contain only dates and numeric counters — no API keys, prompts, payloads, headers, responses, or model output.
214
215
 
216
+ For virtual routing providers, completed assistant message metadata is authoritative: if the message carries real upstream `provider`, `model` / `responseModel`, `api`, and usage, stats are attributed to that upstream provider/model instead of the virtual router shell. Router extensions may also publish a live route adapter under `Symbol.for("pi.routing.registry.v1")` so footer, doctor, compat, and reset flows can resolve the current upstream before the final assistant message exists. The cache optimizer also exposes query-scoped prompt/cache hints via `Symbol.for("pi.cache.hints.v1")` for routers that forward to inner `streamSimple` calls. Both protocols are optional and versioned; no router package import is required.
217
+
215
218
  Example footer:
216
219
 
217
220
  ```text
package/README.zh-CN.md CHANGED
@@ -33,8 +33,9 @@
33
33
  - 对缺少缓存 / session-affinity compat 的第三方 OpenAI-compatible 代理给出一次性提醒。
34
34
  - 检测 Anthropic adaptive thinking 模型(opus-4.6+、sonnet-4.6+、fable-5+)是否缺少 `forceAdaptiveThinking: true` compat。
35
35
  - 为支持的模型家族显示按 session 隔离的底部缓存统计。
36
+ - 通过版本化全局协议(`Symbol.for("pi.routing.registry.v1")` 与 `Symbol.for("pi.cache.hints.v1")`)支持可选的 router extension 集成,而不导入任何 router 包。
36
37
 
37
- 缓存是 provider 侧的 best-effort 行为。第三方代理仍可能隐藏缓存 usage、拒绝不支持的参数,或把请求路由到多个上游。
38
+ 缓存是 provider 侧的 best-effort 行为。第三方代理和 router extension 仍可能隐藏缓存 usage、拒绝不支持的参数,或把请求路由到多个上游。
38
39
 
39
40
  ## 安装
40
41
 
@@ -212,6 +213,8 @@ Provider 级最小 override:
212
213
 
213
214
  统计是只读本地计数,保存在 `~/.pi/agent/pi-cache-optimizer-stats.json`,按 Pi session + provider/model 隔离。文件只包含日期和数字计数,不包含 API key、prompt、payload、headers、响应或模型输出。
214
215
 
216
+ 对于虚拟 routing provider,最终 assistant message 的 metadata 是权威来源:如果 message 携带真实上游 `provider`、`model` / `responseModel`、`api` 和 usage,统计会归因到真实上游 provider/model,而不是虚拟 router 外壳。Router extension 也可以在 `Symbol.for("pi.routing.registry.v1")` 下发布 live route adapter,让 footer、doctor、compat 和 reset 在最终 assistant message 出现前解析当前上游。本扩展还通过 `Symbol.for("pi.cache.hints.v1")` 暴露按查询过滤的 prompt/cache hints,供转发到内部 `streamSimple` 的 router 使用。两个协议都是可选、版本化的;不需要导入任何 router 包。
217
+
215
218
  示例 footer:
216
219
 
217
220
  ```text
package/index.ts CHANGED
@@ -71,6 +71,8 @@ const NO_OPENAI_CACHE_KEY_ENV = "PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY";
71
71
  const OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH = 64;
72
72
  const NO_SKILL_COMPRESSION_ENV = "PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION";
73
73
  const NO_PROMPT_REWRITE_ENV = "PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE";
74
+ const PI_ROUTING_REGISTRY_SYMBOL = Symbol.for("pi.routing.registry.v1");
75
+ const PI_CACHE_HINTS_SYMBOL = Symbol.for("pi.cache.hints.v1");
74
76
 
75
77
  let runtimeOptimizerEnabled = true;
76
78
 
@@ -168,6 +170,80 @@ type PersistedRoutedModelRef = {
168
170
  name?: string;
169
171
  };
170
172
 
173
+ type PiRouteSnapshot = {
174
+ virtualProvider: string;
175
+ virtualModelId: string;
176
+ provider: string;
177
+ modelId: string;
178
+ api?: string;
179
+ canonicalModelId?: string;
180
+ routeLabel?: string;
181
+ status?: "planned" | "trying" | "selected" | "success" | "failed";
182
+ sessionIdHash?: string;
183
+ requestId?: string;
184
+ timestamp: number;
185
+ };
186
+
187
+ type PiRouteResolveHint = {
188
+ sessionIdHash?: string;
189
+ requestId?: string;
190
+ };
191
+
192
+ type PiRouterAdapterV1 = {
193
+ virtualProvider: string;
194
+ resolveActiveRoute(
195
+ virtualModelId: string,
196
+ hint?: PiRouteResolveHint,
197
+ ): PiRouteSnapshot | undefined;
198
+ resolveCandidateRoutes?(virtualModelId: string): PiRouteSnapshot[];
199
+ subscribe?(listener: (event: PiRouteSnapshot) => void): () => void;
200
+ };
201
+
202
+ type PiRoutingRegistryV1 = {
203
+ version: 1;
204
+ registerRouter(adapter: PiRouterAdapterV1): () => void;
205
+ getRouter(virtualProvider: string): PiRouterAdapterV1 | undefined;
206
+ };
207
+
208
+ type PiCacheHintsInput = {
209
+ sessionIdHash?: string;
210
+ virtualProvider?: string;
211
+ virtualModelId?: string;
212
+ upstreamProvider?: string;
213
+ upstreamModelId?: string;
214
+ api?: string;
215
+ };
216
+
217
+ type PiCacheHintsOutput = {
218
+ systemPrompt?: string;
219
+ promptCacheKey?: string;
220
+ cacheRetention?: "long";
221
+ };
222
+
223
+ type PiCacheHintSnapshot = PiCacheHintsInput & PiCacheHintsOutput & {
224
+ timestamp: number;
225
+ };
226
+
227
+ type PiCacheHintsV1 = {
228
+ version: 1;
229
+ getHints(input: PiCacheHintsInput): PiCacheHintsOutput | undefined;
230
+ };
231
+
232
+ type ProtocolGlobal = typeof globalThis & Record<symbol, unknown> & {
233
+ __piCacheOptimizerRouter?: unknown;
234
+ __piCacheOptimizerCacheKey__?: unknown;
235
+ };
236
+
237
+ type ModelRegistryLike = {
238
+ find?(provider: string, modelId: string): PiModel | undefined;
239
+ getAvailable?(): PiModel[];
240
+ getAll?(): PiModel[];
241
+ };
242
+
243
+ type ContextWithOptionalModelRegistry = Pick<ExtensionContext, "sessionManager"> & {
244
+ modelRegistry?: ModelRegistryLike;
245
+ };
246
+
171
247
  type CacheStatsState = {
172
248
  statsByModel: Record<string, CacheStats>;
173
249
  legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
@@ -636,6 +712,210 @@ function hashSessionId(sessionId: string): string {
636
712
  return createHash("sha256").update(sessionId).digest("hex").slice(0, 16);
637
713
  }
638
714
 
715
+ function getProtocolGlobal(): ProtocolGlobal {
716
+ return globalThis as ProtocolGlobal;
717
+ }
718
+
719
+ function firstNonEmptyString(...values: unknown[]): string | undefined {
720
+ for (const value of values) {
721
+ if (isNonEmptyString(value)) return value.trim();
722
+ }
723
+ return undefined;
724
+ }
725
+
726
+ function sessionHashFromContext(ctx: Pick<ExtensionContext, "sessionManager">): string | undefined {
727
+ const sessionId = ctx.sessionManager.getSessionId();
728
+ return sessionId ? hashSessionId(sessionId) : undefined;
729
+ }
730
+
731
+ function isPiRouterAdapterV1(value: unknown): value is PiRouterAdapterV1 {
732
+ const record = asRecord(value);
733
+ return !!record && isNonEmptyString(record.virtualProvider) && typeof record.resolveActiveRoute === "function";
734
+ }
735
+
736
+ function isRoutingRegistryV1(value: unknown): value is PiRoutingRegistryV1 {
737
+ const record = asRecord(value);
738
+ return !!record && record.version === 1 && typeof record.registerRouter === "function" && typeof record.getRouter === "function";
739
+ }
740
+
741
+ function createRoutingRegistry(): PiRoutingRegistryV1 {
742
+ const routers = new Map<string, PiRouterAdapterV1>();
743
+ return {
744
+ version: 1,
745
+ registerRouter(adapter: PiRouterAdapterV1): () => void {
746
+ if (!isPiRouterAdapterV1(adapter)) return () => undefined;
747
+ const key = adapter.virtualProvider.trim();
748
+ routers.set(key, adapter);
749
+ return () => {
750
+ if (routers.get(key) === adapter) routers.delete(key);
751
+ };
752
+ },
753
+ getRouter(virtualProvider: string): PiRouterAdapterV1 | undefined {
754
+ return routers.get(virtualProvider);
755
+ },
756
+ };
757
+ }
758
+
759
+ function getRoutingRegistry(): PiRoutingRegistryV1 | undefined {
760
+ const candidate = getProtocolGlobal()[PI_ROUTING_REGISTRY_SYMBOL];
761
+ return isRoutingRegistryV1(candidate) ? candidate : undefined;
762
+ }
763
+
764
+ function ensureRoutingRegistry(): PiRoutingRegistryV1 {
765
+ const existing = getRoutingRegistry();
766
+ if (existing) return existing;
767
+
768
+ const created = createRoutingRegistry();
769
+ getProtocolGlobal()[PI_ROUTING_REGISTRY_SYMBOL] = created;
770
+ return created;
771
+ }
772
+
773
+ function parseRouteStatus(value: unknown): PiRouteSnapshot["status"] | undefined {
774
+ return value === "planned" || value === "trying" || value === "selected" || value === "success" || value === "failed"
775
+ ? value
776
+ : undefined;
777
+ }
778
+
779
+ function parseRouteSnapshot(
780
+ value: unknown,
781
+ fallbackVirtualProvider?: string,
782
+ fallbackVirtualModelId?: string,
783
+ ): PiRouteSnapshot | undefined {
784
+ const record = asRecord(value);
785
+ if (!record) return undefined;
786
+
787
+ const virtualProvider = firstNonEmptyString(record.virtualProvider, fallbackVirtualProvider);
788
+ const virtualModelId = firstNonEmptyString(record.virtualModelId, record.virtualModel, fallbackVirtualModelId);
789
+ const provider = firstNonEmptyString(record.provider, record.upstreamProvider, record.targetProvider);
790
+ const modelId = firstNonEmptyString(record.modelId, record.upstreamModelId, record.targetModelId, record.responseModel);
791
+ if (!virtualProvider || !virtualModelId || !provider || !modelId) return undefined;
792
+
793
+ const timestamp = getNumber(record.timestamp) ?? Date.now();
794
+ return {
795
+ virtualProvider,
796
+ virtualModelId,
797
+ provider,
798
+ modelId,
799
+ api: firstNonEmptyString(record.api),
800
+ canonicalModelId: firstNonEmptyString(record.canonicalModelId),
801
+ routeLabel: firstNonEmptyString(record.routeLabel, record.label),
802
+ status: parseRouteStatus(record.status),
803
+ sessionIdHash: firstNonEmptyString(record.sessionIdHash),
804
+ requestId: firstNonEmptyString(record.requestId),
805
+ timestamp,
806
+ };
807
+ }
808
+
809
+ function resolveActiveRouteSnapshot(
810
+ model: PiModel | undefined,
811
+ ctx?: Pick<ExtensionContext, "sessionManager">,
812
+ ): PiRouteSnapshot | undefined {
813
+ if (!model) return undefined;
814
+ const hint: PiRouteResolveHint | undefined = ctx ? { sessionIdHash: sessionHashFromContext(ctx) } : undefined;
815
+
816
+ const adapter = getRoutingRegistry()?.getRouter(model.provider);
817
+ if (adapter) {
818
+ try {
819
+ const snapshot = parseRouteSnapshot(
820
+ adapter.resolveActiveRoute(model.id, hint),
821
+ model.provider,
822
+ model.id,
823
+ );
824
+ if (snapshot) return snapshot;
825
+ } catch (error) {
826
+ console.warn(`${LOG_PREFIX}: routing registry adapter failed`, error);
827
+ }
828
+ }
829
+
830
+ // Temporary migration shim for the prototype global used by early router PRs.
831
+ // New integrations should use Symbol.for("pi.routing.registry.v1") instead.
832
+ const legacy = getProtocolGlobal().__piCacheOptimizerRouter;
833
+ if (!legacy || !lower(model.provider).includes("router")) return undefined;
834
+ try {
835
+ if (typeof legacy === "function") {
836
+ return parseRouteSnapshot(legacy(model.provider, model.id, hint), model.provider, model.id);
837
+ }
838
+ const legacyRecord = asRecord(legacy);
839
+ const resolver = legacyRecord?.resolveActiveRoute;
840
+ if (typeof resolver === "function") {
841
+ return parseRouteSnapshot(resolver.call(legacy, model.id, hint), model.provider, model.id);
842
+ }
843
+ return parseRouteSnapshot(legacy, model.provider, model.id);
844
+ } catch (error) {
845
+ console.warn(`${LOG_PREFIX}: legacy routing global failed`, error);
846
+ return undefined;
847
+ }
848
+ }
849
+
850
+ function routeSnapshotToPiModel(snapshot: PiRouteSnapshot, fallback?: PiModel): PiModel {
851
+ return {
852
+ ...(fallback ?? {}),
853
+ id: snapshot.modelId,
854
+ name: snapshot.canonicalModelId ?? snapshot.modelId,
855
+ provider: snapshot.provider,
856
+ api: snapshot.api ?? fallback?.api ?? "",
857
+ baseUrl: fallback?.baseUrl ?? "",
858
+ reasoning: fallback?.reasoning ?? false,
859
+ input: fallback?.input ?? ["text"],
860
+ cost: fallback?.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
861
+ contextWindow: fallback?.contextWindow ?? 0,
862
+ maxTokens: fallback?.maxTokens ?? 0,
863
+ compat: fallback?.compat,
864
+ } as PiModel;
865
+ }
866
+
867
+ function findModelInRegistry(registry: ModelRegistryLike | undefined, provider: string, id: string): PiModel | undefined {
868
+ const found = registry?.find?.(provider, id);
869
+ if (found) return found;
870
+
871
+ const available = registry?.getAvailable?.() ?? [];
872
+ const availableMatch = available.find((candidate) => candidate.provider === provider && candidate.id === id);
873
+ if (availableMatch) return availableMatch;
874
+
875
+ const all = registry?.getAll?.() ?? [];
876
+ return all.find((candidate) => candidate.provider === provider && candidate.id === id);
877
+ }
878
+
879
+ function resolveRouteModel(
880
+ model: PiModel | undefined,
881
+ ctx?: ContextWithOptionalModelRegistry,
882
+ ): PiModel | undefined {
883
+ const snapshot = resolveActiveRouteSnapshot(model, ctx);
884
+ if (!snapshot) return undefined;
885
+
886
+ return findModelInRegistry(ctx?.modelRegistry, snapshot.provider, snapshot.modelId)
887
+ ?? routeSnapshotToPiModel(snapshot, model);
888
+ }
889
+
890
+ function isVirtualRoutingModel(model: PiModel | undefined, ctx?: Pick<ExtensionContext, "sessionManager">): boolean {
891
+ if (!model) return false;
892
+ return isRouterModel(model) || !!getRoutingRegistry()?.getRouter(model.provider) || !!resolveActiveRouteSnapshot(model, ctx);
893
+ }
894
+
895
+ function isCacheHintsServiceV1(value: unknown): value is PiCacheHintsV1 {
896
+ const record = asRecord(value);
897
+ return !!record && record.version === 1 && typeof record.getHints === "function";
898
+ }
899
+
900
+ function getCacheHintsService(): PiCacheHintsV1 | undefined {
901
+ const candidate = getProtocolGlobal()[PI_CACHE_HINTS_SYMBOL];
902
+ return isCacheHintsServiceV1(candidate) ? candidate : undefined;
903
+ }
904
+
905
+ function installCacheHintsService(service: PiCacheHintsV1): () => void {
906
+ const globals = getProtocolGlobal();
907
+ const previous = globals[PI_CACHE_HINTS_SYMBOL];
908
+ globals[PI_CACHE_HINTS_SYMBOL] = service;
909
+ return () => {
910
+ if (globals[PI_CACHE_HINTS_SYMBOL] !== service) return;
911
+ if (previous === undefined) {
912
+ delete globals[PI_CACHE_HINTS_SYMBOL];
913
+ } else {
914
+ globals[PI_CACHE_HINTS_SYMBOL] = previous;
915
+ }
916
+ };
917
+ }
918
+
639
919
  /**
640
920
  * Build a session-scoped stats key from a session hash + provider/id.
641
921
  * Pure function (no closure dependency) for use by tests and internals.
@@ -1403,10 +1683,10 @@ function modelFromAssistantMessage(message: unknown, fallback: PiModel | undefin
1403
1683
  const record = getAssistantRecord(message);
1404
1684
  if (!record) return fallback;
1405
1685
 
1406
- const id = lower(record.responseModel) || lower(record.model) || fallback?.id;
1407
- const provider = lower(record.provider) || fallback?.provider;
1408
- const api = lower(record.api) || fallback?.api;
1409
- if (!id || !provider || !api) return fallback;
1686
+ const id = firstNonEmptyString(record.responseModel, record.model, fallback?.id);
1687
+ const provider = firstNonEmptyString(record.provider, fallback?.provider);
1688
+ const api = firstNonEmptyString(record.api, fallback?.api) ?? "";
1689
+ if (!id || !provider) return fallback;
1410
1690
 
1411
1691
  return {
1412
1692
  ...(fallback ?? {}),
@@ -2886,7 +3166,10 @@ function selectAdapterForModel(model: PiModel | undefined): CacheProviderAdapter
2886
3166
  }
2887
3167
 
2888
3168
  function selectAdapterForAssistantMessage(message: unknown, model: PiModel | undefined): CacheProviderAdapter | undefined {
2889
- const responseModel = isRouterModel(model) ? modelFromAssistantMessage(message, model) : model;
3169
+ // Assistant message metadata is request-local and authoritative for virtual
3170
+ // routing providers. Use it first for every model; direct providers normally
3171
+ // echo the same provider/model and therefore remain unchanged.
3172
+ const responseModel = modelFromAssistantMessage(message, model);
2890
3173
  return CACHE_PROVIDER_ADAPTERS.find((adapter) => adapter.matchesAssistantMessage(message, responseModel));
2891
3174
  }
2892
3175
 
@@ -3161,7 +3444,7 @@ function buildExactRouterStatusEntry(
3161
3444
  sessionHash: string | undefined,
3162
3445
  statsByModel: Record<string, CacheStats>,
3163
3446
  lastRoutedModel: PersistedRoutedModelRef | undefined,
3164
- ): { adapter: CacheProviderAdapter; stats: CacheStats } | undefined {
3447
+ ): { model: PiModel; adapter: CacheProviderAdapter; stats: CacheStats } | undefined {
3165
3448
  if (!sessionHash || !lastRoutedModel) return undefined;
3166
3449
 
3167
3450
  const model = routedModelRefToPiModel(lastRoutedModel);
@@ -3169,7 +3452,7 @@ function buildExactRouterStatusEntry(
3169
3452
  if (!adapter) return undefined;
3170
3453
 
3171
3454
  const key = makeSessionModelKey(sessionHash, lastRoutedModel.provider, lastRoutedModel.id);
3172
- return { adapter, stats: statsByModel[key] ?? emptyCacheStats() };
3455
+ return { model, adapter, stats: statsByModel[key] ?? emptyCacheStats() };
3173
3456
  }
3174
3457
 
3175
3458
  function parsePersistedCacheStats(value: unknown): CacheStatsState | undefined {
@@ -5062,6 +5345,18 @@ export const __internals_for_tests = {
5062
5345
  parsePersistedRoutedModelRef,
5063
5346
  routedModelRefToPiModel,
5064
5347
  buildExactRouterStatusEntry,
5348
+ // Routing-provider protocol helpers
5349
+ PI_ROUTING_REGISTRY_SYMBOL,
5350
+ PI_CACHE_HINTS_SYMBOL,
5351
+ ensureRoutingRegistry,
5352
+ getRoutingRegistry,
5353
+ parseRouteSnapshot,
5354
+ resolveActiveRouteSnapshot,
5355
+ routeSnapshotToPiModel,
5356
+ resolveRouteModel,
5357
+ isVirtualRoutingModel,
5358
+ installCacheHintsService,
5359
+ getCacheHintsService,
5065
5360
  // Persistence helpers (for reload/reset tests)
5066
5361
  mergeCacheSessions,
5067
5362
  mergeLastRoutedModels,
@@ -5109,6 +5404,7 @@ export default function (pi: ExtensionAPI) {
5109
5404
  let currentSessionHash = "";
5110
5405
  let currentSessionHashSet = false;
5111
5406
  let lastActualRoutedModel: PersistedRoutedModelRef | undefined;
5407
+ let latestCacheHint: PiCacheHintSnapshot | undefined;
5112
5408
  const PERSIST_DEBOUNCE_MS = 2000;
5113
5409
  /** In-memory recent usage samples per model key (not persisted, cleared on reload). */
5114
5410
  const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
@@ -5123,6 +5419,28 @@ export default function (pi: ExtensionAPI) {
5123
5419
  }
5124
5420
  }
5125
5421
 
5422
+ const uninstallCacheHintsService = installCacheHintsService({
5423
+ version: 1,
5424
+ getHints(input: PiCacheHintsInput): PiCacheHintsOutput | undefined {
5425
+ if (!runtimeOptimizerEnabled || isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV])) return undefined;
5426
+ const hint = latestCacheHint;
5427
+ if (!hint) return undefined;
5428
+ if (input.sessionIdHash && hint.sessionIdHash && input.sessionIdHash !== hint.sessionIdHash) return undefined;
5429
+ if (input.virtualProvider && hint.virtualProvider && input.virtualProvider !== hint.virtualProvider) return undefined;
5430
+ if (input.virtualModelId && hint.virtualModelId && input.virtualModelId !== hint.virtualModelId) return undefined;
5431
+ if (input.upstreamProvider && hint.upstreamProvider && input.upstreamProvider !== hint.upstreamProvider) return undefined;
5432
+ if (input.upstreamModelId && hint.upstreamModelId && input.upstreamModelId !== hint.upstreamModelId) return undefined;
5433
+ if (input.api && hint.api && input.api !== hint.api) return undefined;
5434
+
5435
+ return {
5436
+ systemPrompt: hint.systemPrompt,
5437
+ promptCacheKey: hint.promptCacheKey,
5438
+ cacheRetention: hint.cacheRetention,
5439
+ };
5440
+ },
5441
+ });
5442
+ void uninstallCacheHintsService;
5443
+
5126
5444
  /**
5127
5445
  * Build a session-scoped stats key from the current session hash + model key.
5128
5446
  * Returns `${sessionHash}:${provider}/${id}`.
@@ -5206,6 +5524,13 @@ export default function (pi: ExtensionAPI) {
5206
5524
  return created;
5207
5525
  }
5208
5526
 
5527
+ function resetStatsForModel(model: PiModel): void {
5528
+ const sk = sessionModelKey(model);
5529
+ delete cacheStatsByModel[sk];
5530
+ recentSamplesByModelKey.delete(sk);
5531
+ lastStatusText = undefined;
5532
+ }
5533
+
5209
5534
  function resetCurrentSessionStats(): void {
5210
5535
  const prefix = `${currentSessionHash || "_nosession"}:`;
5211
5536
  for (const key of Object.keys(cacheStatsByModel)) {
@@ -5214,6 +5539,7 @@ export default function (pi: ExtensionAPI) {
5214
5539
  for (const key of Array.from(recentSamplesByModelKey.keys())) {
5215
5540
  if (key.startsWith(prefix)) recentSamplesByModelKey.delete(key);
5216
5541
  }
5542
+ lastActualRoutedModel = undefined;
5217
5543
  lastStatusText = undefined;
5218
5544
  }
5219
5545
 
@@ -5380,9 +5706,13 @@ export default function (pi: ExtensionAPI) {
5380
5706
  syncSessionHash(ctx);
5381
5707
  await rollOverStatsIfNeeded(ctx);
5382
5708
 
5383
- const adapter = selectAdapterForModel(model);
5709
+ const routedModel = resolveRouteModel(model, ctx);
5710
+ const displayModel = routedModel ?? model;
5711
+ const adapter = selectAdapterForModel(displayModel);
5712
+ const activeIsVirtualRoute = !!routedModel || isVirtualRoutingModel(model, ctx);
5384
5713
  let statusText: string | undefined;
5385
- if (!adapter && isRouterModel(model)) {
5714
+
5715
+ if (!adapter && !routedModel && activeIsVirtualRoute) {
5386
5716
  // On model_select (existing footer), keep the existing cache footer
5387
5717
  // visible instead of clearing it. On session_start (no footer yet
5388
5718
  // after reload/fresh start), restore the exact last actual routed model
@@ -5405,8 +5735,8 @@ export default function (pi: ExtensionAPI) {
5405
5735
  if (adapter) {
5406
5736
  // Display session-scoped stats. A model that has never been used
5407
5737
  // in this session shows 0/0. The message_end hook populates
5408
- // cacheStatsByModel[sessionModelKey(model)] on first use.
5409
- const sk = model ? sessionModelKey(model) : undefined;
5738
+ // cacheStatsByModel[sessionModelKey(displayModel)] on first use.
5739
+ const sk = displayModel ? sessionModelKey(displayModel) : undefined;
5410
5740
  const stats = sk ? cacheStatsByModel[sk] : undefined;
5411
5741
  const statsText = formatCacheStats(adapter, stats ?? emptyCacheStats());
5412
5742
  statusText = runtimeOptimizerEnabled ? statsText : `Cache Optimizer disabled · ${statsText}`;
@@ -5443,12 +5773,12 @@ export default function (pi: ExtensionAPI) {
5443
5773
  // Re-evaluated on every status update so the marker persists through stats
5444
5774
  // changes and day rollovers. Redundant setStatus calls are blocked by the
5445
5775
  // `lastStatusText` early return above.
5446
- if (runtimeOptimizerEnabled && statusText !== undefined && model) {
5776
+ if (runtimeOptimizerEnabled && statusText !== undefined && displayModel) {
5447
5777
  // Only show ⚠️ compat when there are safe-fixable missing compat keys.
5448
5778
  // Optional/advisory-only flags (e.g. supportsLongCacheRetention on generic
5449
5779
  // OpenAI-compatible proxies) do NOT trigger the marker — the doctor/compat
5450
5780
  // commands still mention them as optional guidance.
5451
- if (buildFixSuggestion(model) !== undefined) {
5781
+ if (buildFixSuggestion(displayModel) !== undefined) {
5452
5782
  statusText = statusText + " ⚠️ compat";
5453
5783
  }
5454
5784
  }
@@ -5459,18 +5789,26 @@ export default function (pi: ExtensionAPI) {
5459
5789
  ctx.ui.setStatus(STATUS_KEY, statusText);
5460
5790
  }
5461
5791
 
5792
+ ensureRoutingRegistry();
5793
+
5462
5794
  pi.on("session_start", async (event, ctx) => {
5463
5795
  await restoreCacheStats(event.reason, ctx);
5464
- if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(ctx.model, ctx, warnedModels);
5796
+ if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(resolveRouteModel(ctx.model, ctx) ?? ctx.model, ctx, warnedModels);
5465
5797
  await publishStatus(ctx);
5466
5798
  });
5467
5799
 
5468
5800
  pi.on("model_select", async (event, ctx) => {
5469
- if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(event.model, ctx, warnedModels);
5801
+ if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(resolveRouteModel(event.model, ctx) ?? event.model, ctx, warnedModels);
5470
5802
  await publishStatus(ctx, event.model);
5471
5803
  });
5472
5804
 
5473
5805
  pi.on("before_agent_start", async (event, _ctx) => {
5806
+ latestCacheHint = undefined;
5807
+ const routeSnapshot = resolveActiveRouteSnapshot(_ctx.model, _ctx);
5808
+ const routedModel = routeSnapshot
5809
+ ? findModelInRegistry(_ctx.modelRegistry, routeSnapshot.provider, routeSnapshot.modelId) ?? routeSnapshotToPiModel(routeSnapshot, _ctx.model)
5810
+ : undefined;
5811
+
5474
5812
  // ────────────────────────────────────────────────────────────────
5475
5813
  // OpenAI Responses-family bypass (codex-responses + responses + azure responses)
5476
5814
  //
@@ -5497,7 +5835,7 @@ export default function (pi: ExtensionAPI) {
5497
5835
  // compression, reorder) for these APIs. Third-party providers
5498
5836
  // that use openai-completions are unaffected.
5499
5837
  // ────────────────────────────────────────────────────────────────
5500
- const model = _ctx.model;
5838
+ const model = routedModel ?? _ctx.model;
5501
5839
  if (model && isResponsesPromptRewriteBypassApi(model.api)) {
5502
5840
  return {};
5503
5841
  }
@@ -5535,7 +5873,27 @@ export default function (pi: ExtensionAPI) {
5535
5873
  // ships to the provider.
5536
5874
  const optimized = optimizeSystemPrompt(compressedPrompt, event.systemPromptOptions);
5537
5875
 
5876
+ const promptCacheKey = getSessionPromptCacheKey(_ctx);
5877
+ const cacheRetention = process.env[PI_CACHE_RETENTION_ENV] === LONG_CACHE_RETENTION_VALUE ? LONG_CACHE_RETENTION_VALUE : undefined;
5878
+ const publishHint = (systemPrompt: string): void => {
5879
+ latestCacheHint = {
5880
+ sessionIdHash: currentSessionHashSet ? currentSessionHash : sessionHashFromContext(_ctx),
5881
+ virtualProvider: routeSnapshot?.virtualProvider ?? _ctx.model?.provider,
5882
+ virtualModelId: routeSnapshot?.virtualModelId ?? _ctx.model?.id,
5883
+ upstreamProvider: routeSnapshot?.provider ?? model?.provider,
5884
+ upstreamModelId: routeSnapshot?.modelId ?? model?.id,
5885
+ api: model?.api,
5886
+ systemPrompt,
5887
+ promptCacheKey,
5888
+ cacheRetention,
5889
+ timestamp: Date.now(),
5890
+ };
5891
+ const globals = getProtocolGlobal();
5892
+ globals.__piCacheOptimizerCacheKey__ = promptCacheKey;
5893
+ };
5894
+
5538
5895
  if (optimized.changed && optimized.systemPrompt.trim().length > 0) {
5896
+ publishHint(optimized.systemPrompt);
5539
5897
  return { systemPrompt: optimized.systemPrompt };
5540
5898
  }
5541
5899
 
@@ -5544,24 +5902,28 @@ export default function (pi: ExtensionAPI) {
5544
5902
  // the volume cut even when reorder is a no-op (e.g., short sessions
5545
5903
  // where no stable candidate is long enough).
5546
5904
  if (compressedPrompt !== strippedPrompt && compressedPrompt.trim().length > 0) {
5905
+ publishHint(compressedPrompt);
5547
5906
  return { systemPrompt: compressedPrompt };
5548
5907
  }
5549
5908
  if (strippedPrompt !== event.systemPrompt && strippedPrompt.trim().length > 0) {
5909
+ publishHint(strippedPrompt);
5550
5910
  return { systemPrompt: strippedPrompt };
5551
5911
  }
5552
5912
 
5913
+ publishHint(event.systemPrompt);
5553
5914
  return {};
5554
5915
  });
5555
5916
 
5556
5917
  pi.on("before_provider_request", (event, ctx) => {
5557
5918
  if (!shouldInjectOpenAIPromptCacheKey()) return undefined;
5558
- if (!isOpenAICompatibleApi(ctx.model?.api)) return undefined;
5919
+ const requestModel = resolveRouteModel(ctx.model, ctx) ?? ctx.model;
5920
+ if (!isOpenAICompatibleApi(requestModel?.api)) return undefined;
5559
5921
 
5560
5922
  return addOpenAIPromptCacheKey(event.payload, getSessionPromptCacheKey(ctx));
5561
5923
  });
5562
5924
 
5563
5925
  pi.on("after_provider_response", (event, ctx) => {
5564
- const model = ctx.model;
5926
+ const model = resolveRouteModel(ctx.model, ctx) ?? ctx.model;
5565
5927
  if (!runtimeOptimizerEnabled || !model) return;
5566
5928
  if (event.status !== 400) return;
5567
5929
  if (!isPromptCacheRetention400Applicable(model)) return;
@@ -5586,9 +5948,12 @@ export default function (pi: ExtensionAPI) {
5586
5948
 
5587
5949
  const usage = adapter.normalizeUsage(event.message);
5588
5950
 
5589
- const statsModel = isRouterModel(ctx.model) ? modelFromAssistantMessage(event.message, ctx.model) : ctx.model;
5951
+ // Completed message metadata is request-local and authoritative for virtual
5952
+ // routing providers. Use it whenever it supplies provider/model identity;
5953
+ // fall back to the active context model for direct providers.
5954
+ const statsModel = modelFromAssistantMessage(event.message, ctx.model) ?? ctx.model;
5590
5955
  let routedModelChanged = false;
5591
- if (isRouterModel(ctx.model) && statsModel && !isRouterModel(statsModel)) {
5956
+ if (isVirtualRoutingModel(ctx.model, ctx) && statsModel && !isVirtualRoutingModel(statsModel, ctx)) {
5592
5957
  const nextRoutedModel: PersistedRoutedModelRef = {
5593
5958
  provider: statsModel.provider,
5594
5959
  id: statsModel.id,
@@ -5651,7 +6016,8 @@ export default function (pi: ExtensionAPI) {
5651
6016
  description: "Diagnose Pi cache configuration",
5652
6017
  handler: async (args: string, cmdCtx) => {
5653
6018
  syncSessionHash(cmdCtx);
5654
- const model = cmdCtx.model;
6019
+ const selectedModel = cmdCtx.model;
6020
+ const model = resolveRouteModel(selectedModel, cmdCtx as unknown as ExtensionContext) ?? selectedModel;
5655
6021
  const subcommand = args.trim().toLowerCase().split(/\s+/)[0] || "help";
5656
6022
 
5657
6023
  if (subcommand === "enable") {
@@ -5719,14 +6085,12 @@ export default function (pi: ExtensionAPI) {
5719
6085
  return;
5720
6086
  }
5721
6087
 
5722
- const sk = sessionModelKey(model);
5723
6088
  const displayKey = modelKey(model);
5724
6089
 
5725
- // Reset session-scoped stats for the active model.
5726
- delete cacheStatsByModel[sk];
5727
-
5728
- // Clear recent samples for this session+model key.
5729
- recentSamplesByModelKey.delete(sk);
6090
+ // Reset session-scoped stats for the effective active model. If the
6091
+ // selected model is a virtual router and the protocol exposes a live
6092
+ // route, this clears the real upstream bucket, not the router shell.
6093
+ resetStatsForModel(model);
5730
6094
 
5731
6095
  // Persist immediately.
5732
6096
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
@@ -6069,10 +6433,8 @@ export default function (pi: ExtensionAPI) {
6069
6433
  if (!adapter) {
6070
6434
  cmdCtx.ui.notify("ℹ️ Active model does not match a cache adapter. No stats to reset.", "info");
6071
6435
  } else {
6072
- const sk = sessionModelKey(model);
6073
6436
  const displayKey = modelKey(model);
6074
- delete cacheStatsByModel[sk];
6075
- recentSamplesByModelKey.delete(sk);
6437
+ resetStatsForModel(model);
6076
6438
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6077
6439
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6078
6440
  cmdCtx.ui.notify(
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-cache-optimizer",
3
- "version": "2.6.5",
3
+ "version": "2.6.6",
4
4
  "description": "Improve Pi prompt/KV cache hit rates with stable prompts, OpenAI-compatible cache keys, proxy compat warnings, and footer cache stats.",
5
5
  "keywords": [
6
6
  "pi-package",