pi-cache-optimizer 2.6.4 → 2.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts CHANGED
@@ -71,6 +71,8 @@ const NO_OPENAI_CACHE_KEY_ENV = "PI_CACHE_OPTIMIZER_NO_OPENAI_CACHE_KEY";
71
71
  const OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH = 64;
72
72
  const NO_SKILL_COMPRESSION_ENV = "PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION";
73
73
  const NO_PROMPT_REWRITE_ENV = "PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE";
74
+ const PI_ROUTING_REGISTRY_SYMBOL = Symbol.for("pi.routing.registry.v1");
75
+ const PI_CACHE_HINTS_SYMBOL = Symbol.for("pi.cache.hints.v1");
74
76
 
75
77
  let runtimeOptimizerEnabled = true;
76
78
 
@@ -168,6 +170,80 @@ type PersistedRoutedModelRef = {
168
170
  name?: string;
169
171
  };
170
172
 
173
+ type PiRouteSnapshot = {
174
+ virtualProvider: string;
175
+ virtualModelId: string;
176
+ provider: string;
177
+ modelId: string;
178
+ api?: string;
179
+ canonicalModelId?: string;
180
+ routeLabel?: string;
181
+ status?: "planned" | "trying" | "selected" | "success" | "failed";
182
+ sessionIdHash?: string;
183
+ requestId?: string;
184
+ timestamp: number;
185
+ };
186
+
187
+ type PiRouteResolveHint = {
188
+ sessionIdHash?: string;
189
+ requestId?: string;
190
+ };
191
+
192
+ type PiRouterAdapterV1 = {
193
+ virtualProvider: string;
194
+ resolveActiveRoute(
195
+ virtualModelId: string,
196
+ hint?: PiRouteResolveHint,
197
+ ): PiRouteSnapshot | undefined;
198
+ resolveCandidateRoutes?(virtualModelId: string): PiRouteSnapshot[];
199
+ subscribe?(listener: (event: PiRouteSnapshot) => void): () => void;
200
+ };
201
+
202
+ type PiRoutingRegistryV1 = {
203
+ version: 1;
204
+ registerRouter(adapter: PiRouterAdapterV1): () => void;
205
+ getRouter(virtualProvider: string): PiRouterAdapterV1 | undefined;
206
+ };
207
+
208
+ type PiCacheHintsInput = {
209
+ sessionIdHash?: string;
210
+ virtualProvider?: string;
211
+ virtualModelId?: string;
212
+ upstreamProvider?: string;
213
+ upstreamModelId?: string;
214
+ api?: string;
215
+ };
216
+
217
+ type PiCacheHintsOutput = {
218
+ systemPrompt?: string;
219
+ promptCacheKey?: string;
220
+ cacheRetention?: "long";
221
+ };
222
+
223
+ type PiCacheHintSnapshot = PiCacheHintsInput & PiCacheHintsOutput & {
224
+ timestamp: number;
225
+ };
226
+
227
+ type PiCacheHintsV1 = {
228
+ version: 1;
229
+ getHints(input: PiCacheHintsInput): PiCacheHintsOutput | undefined;
230
+ };
231
+
232
+ type ProtocolGlobal = typeof globalThis & Record<symbol, unknown> & {
233
+ __piCacheOptimizerRouter?: unknown;
234
+ __piCacheOptimizerCacheKey__?: unknown;
235
+ };
236
+
237
+ type ModelRegistryLike = {
238
+ find?(provider: string, modelId: string): PiModel | undefined;
239
+ getAvailable?(): PiModel[];
240
+ getAll?(): PiModel[];
241
+ };
242
+
243
+ type ContextWithOptionalModelRegistry = Pick<ExtensionContext, "sessionManager"> & {
244
+ modelRegistry?: ModelRegistryLike;
245
+ };
246
+
171
247
  type CacheStatsState = {
172
248
  statsByModel: Record<string, CacheStats>;
173
249
  legacyFamily: Partial<Record<CacheProviderId, CacheStats>>;
@@ -636,6 +712,210 @@ function hashSessionId(sessionId: string): string {
636
712
  return createHash("sha256").update(sessionId).digest("hex").slice(0, 16);
637
713
  }
638
714
 
715
+ function getProtocolGlobal(): ProtocolGlobal {
716
+ return globalThis as ProtocolGlobal;
717
+ }
718
+
719
+ function firstNonEmptyString(...values: unknown[]): string | undefined {
720
+ for (const value of values) {
721
+ if (isNonEmptyString(value)) return value.trim();
722
+ }
723
+ return undefined;
724
+ }
725
+
726
+ function sessionHashFromContext(ctx: Pick<ExtensionContext, "sessionManager">): string | undefined {
727
+ const sessionId = ctx.sessionManager.getSessionId();
728
+ return sessionId ? hashSessionId(sessionId) : undefined;
729
+ }
730
+
731
+ function isPiRouterAdapterV1(value: unknown): value is PiRouterAdapterV1 {
732
+ const record = asRecord(value);
733
+ return !!record && isNonEmptyString(record.virtualProvider) && typeof record.resolveActiveRoute === "function";
734
+ }
735
+
736
+ function isRoutingRegistryV1(value: unknown): value is PiRoutingRegistryV1 {
737
+ const record = asRecord(value);
738
+ return !!record && record.version === 1 && typeof record.registerRouter === "function" && typeof record.getRouter === "function";
739
+ }
740
+
741
+ function createRoutingRegistry(): PiRoutingRegistryV1 {
742
+ const routers = new Map<string, PiRouterAdapterV1>();
743
+ return {
744
+ version: 1,
745
+ registerRouter(adapter: PiRouterAdapterV1): () => void {
746
+ if (!isPiRouterAdapterV1(adapter)) return () => undefined;
747
+ const key = adapter.virtualProvider.trim();
748
+ routers.set(key, adapter);
749
+ return () => {
750
+ if (routers.get(key) === adapter) routers.delete(key);
751
+ };
752
+ },
753
+ getRouter(virtualProvider: string): PiRouterAdapterV1 | undefined {
754
+ return routers.get(virtualProvider);
755
+ },
756
+ };
757
+ }
758
+
759
+ function getRoutingRegistry(): PiRoutingRegistryV1 | undefined {
760
+ const candidate = getProtocolGlobal()[PI_ROUTING_REGISTRY_SYMBOL];
761
+ return isRoutingRegistryV1(candidate) ? candidate : undefined;
762
+ }
763
+
764
+ function ensureRoutingRegistry(): PiRoutingRegistryV1 {
765
+ const existing = getRoutingRegistry();
766
+ if (existing) return existing;
767
+
768
+ const created = createRoutingRegistry();
769
+ getProtocolGlobal()[PI_ROUTING_REGISTRY_SYMBOL] = created;
770
+ return created;
771
+ }
772
+
773
+ function parseRouteStatus(value: unknown): PiRouteSnapshot["status"] | undefined {
774
+ return value === "planned" || value === "trying" || value === "selected" || value === "success" || value === "failed"
775
+ ? value
776
+ : undefined;
777
+ }
778
+
779
+ function parseRouteSnapshot(
780
+ value: unknown,
781
+ fallbackVirtualProvider?: string,
782
+ fallbackVirtualModelId?: string,
783
+ ): PiRouteSnapshot | undefined {
784
+ const record = asRecord(value);
785
+ if (!record) return undefined;
786
+
787
+ const virtualProvider = firstNonEmptyString(record.virtualProvider, fallbackVirtualProvider);
788
+ const virtualModelId = firstNonEmptyString(record.virtualModelId, record.virtualModel, fallbackVirtualModelId);
789
+ const provider = firstNonEmptyString(record.provider, record.upstreamProvider, record.targetProvider);
790
+ const modelId = firstNonEmptyString(record.modelId, record.upstreamModelId, record.targetModelId, record.responseModel);
791
+ if (!virtualProvider || !virtualModelId || !provider || !modelId) return undefined;
792
+
793
+ const timestamp = getNumber(record.timestamp) ?? Date.now();
794
+ return {
795
+ virtualProvider,
796
+ virtualModelId,
797
+ provider,
798
+ modelId,
799
+ api: firstNonEmptyString(record.api),
800
+ canonicalModelId: firstNonEmptyString(record.canonicalModelId),
801
+ routeLabel: firstNonEmptyString(record.routeLabel, record.label),
802
+ status: parseRouteStatus(record.status),
803
+ sessionIdHash: firstNonEmptyString(record.sessionIdHash),
804
+ requestId: firstNonEmptyString(record.requestId),
805
+ timestamp,
806
+ };
807
+ }
808
+
809
+ function resolveActiveRouteSnapshot(
810
+ model: PiModel | undefined,
811
+ ctx?: Pick<ExtensionContext, "sessionManager">,
812
+ ): PiRouteSnapshot | undefined {
813
+ if (!model) return undefined;
814
+ const hint: PiRouteResolveHint | undefined = ctx ? { sessionIdHash: sessionHashFromContext(ctx) } : undefined;
815
+
816
+ const adapter = getRoutingRegistry()?.getRouter(model.provider);
817
+ if (adapter) {
818
+ try {
819
+ const snapshot = parseRouteSnapshot(
820
+ adapter.resolveActiveRoute(model.id, hint),
821
+ model.provider,
822
+ model.id,
823
+ );
824
+ if (snapshot) return snapshot;
825
+ } catch (error) {
826
+ console.warn(`${LOG_PREFIX}: routing registry adapter failed`, error);
827
+ }
828
+ }
829
+
830
+ // Temporary migration shim for the prototype global used by early router PRs.
831
+ // New integrations should use Symbol.for("pi.routing.registry.v1") instead.
832
+ const legacy = getProtocolGlobal().__piCacheOptimizerRouter;
833
+ if (!legacy || !lower(model.provider).includes("router")) return undefined;
834
+ try {
835
+ if (typeof legacy === "function") {
836
+ return parseRouteSnapshot(legacy(model.provider, model.id, hint), model.provider, model.id);
837
+ }
838
+ const legacyRecord = asRecord(legacy);
839
+ const resolver = legacyRecord?.resolveActiveRoute;
840
+ if (typeof resolver === "function") {
841
+ return parseRouteSnapshot(resolver.call(legacy, model.id, hint), model.provider, model.id);
842
+ }
843
+ return parseRouteSnapshot(legacy, model.provider, model.id);
844
+ } catch (error) {
845
+ console.warn(`${LOG_PREFIX}: legacy routing global failed`, error);
846
+ return undefined;
847
+ }
848
+ }
849
+
850
+ function routeSnapshotToPiModel(snapshot: PiRouteSnapshot, fallback?: PiModel): PiModel {
851
+ return {
852
+ ...(fallback ?? {}),
853
+ id: snapshot.modelId,
854
+ name: snapshot.canonicalModelId ?? snapshot.modelId,
855
+ provider: snapshot.provider,
856
+ api: snapshot.api ?? fallback?.api ?? "",
857
+ baseUrl: fallback?.baseUrl ?? "",
858
+ reasoning: fallback?.reasoning ?? false,
859
+ input: fallback?.input ?? ["text"],
860
+ cost: fallback?.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
861
+ contextWindow: fallback?.contextWindow ?? 0,
862
+ maxTokens: fallback?.maxTokens ?? 0,
863
+ compat: fallback?.compat,
864
+ } as PiModel;
865
+ }
866
+
867
+ function findModelInRegistry(registry: ModelRegistryLike | undefined, provider: string, id: string): PiModel | undefined {
868
+ const found = registry?.find?.(provider, id);
869
+ if (found) return found;
870
+
871
+ const available = registry?.getAvailable?.() ?? [];
872
+ const availableMatch = available.find((candidate) => candidate.provider === provider && candidate.id === id);
873
+ if (availableMatch) return availableMatch;
874
+
875
+ const all = registry?.getAll?.() ?? [];
876
+ return all.find((candidate) => candidate.provider === provider && candidate.id === id);
877
+ }
878
+
879
+ function resolveRouteModel(
880
+ model: PiModel | undefined,
881
+ ctx?: ContextWithOptionalModelRegistry,
882
+ ): PiModel | undefined {
883
+ const snapshot = resolveActiveRouteSnapshot(model, ctx);
884
+ if (!snapshot) return undefined;
885
+
886
+ return findModelInRegistry(ctx?.modelRegistry, snapshot.provider, snapshot.modelId)
887
+ ?? routeSnapshotToPiModel(snapshot, model);
888
+ }
889
+
890
+ function isVirtualRoutingModel(model: PiModel | undefined, ctx?: Pick<ExtensionContext, "sessionManager">): boolean {
891
+ if (!model) return false;
892
+ return isRouterModel(model) || !!getRoutingRegistry()?.getRouter(model.provider) || !!resolveActiveRouteSnapshot(model, ctx);
893
+ }
894
+
895
+ function isCacheHintsServiceV1(value: unknown): value is PiCacheHintsV1 {
896
+ const record = asRecord(value);
897
+ return !!record && record.version === 1 && typeof record.getHints === "function";
898
+ }
899
+
900
+ function getCacheHintsService(): PiCacheHintsV1 | undefined {
901
+ const candidate = getProtocolGlobal()[PI_CACHE_HINTS_SYMBOL];
902
+ return isCacheHintsServiceV1(candidate) ? candidate : undefined;
903
+ }
904
+
905
+ function installCacheHintsService(service: PiCacheHintsV1): () => void {
906
+ const globals = getProtocolGlobal();
907
+ const previous = globals[PI_CACHE_HINTS_SYMBOL];
908
+ globals[PI_CACHE_HINTS_SYMBOL] = service;
909
+ return () => {
910
+ if (globals[PI_CACHE_HINTS_SYMBOL] !== service) return;
911
+ if (previous === undefined) {
912
+ delete globals[PI_CACHE_HINTS_SYMBOL];
913
+ } else {
914
+ globals[PI_CACHE_HINTS_SYMBOL] = previous;
915
+ }
916
+ };
917
+ }
918
+
639
919
  /**
640
920
  * Build a session-scoped stats key from a session hash + provider/id.
641
921
  * Pure function (no closure dependency) for use by tests and internals.
@@ -1403,10 +1683,10 @@ function modelFromAssistantMessage(message: unknown, fallback: PiModel | undefin
1403
1683
  const record = getAssistantRecord(message);
1404
1684
  if (!record) return fallback;
1405
1685
 
1406
- const id = lower(record.responseModel) || lower(record.model) || fallback?.id;
1407
- const provider = lower(record.provider) || fallback?.provider;
1408
- const api = lower(record.api) || fallback?.api;
1409
- if (!id || !provider || !api) return fallback;
1686
+ const id = firstNonEmptyString(record.responseModel, record.model, fallback?.id);
1687
+ const provider = firstNonEmptyString(record.provider, fallback?.provider);
1688
+ const api = firstNonEmptyString(record.api, fallback?.api) ?? "";
1689
+ if (!id || !provider) return fallback;
1410
1690
 
1411
1691
  return {
1412
1692
  ...(fallback ?? {}),
@@ -1612,7 +1892,7 @@ function hasEffectivePromptCacheKey(record: UnknownRecord): boolean {
1612
1892
  return isNonEmptyString(record.prompt_cache_key) || isNonEmptyString(record.promptCacheKey);
1613
1893
  }
1614
1894
 
1615
- function isNonEmptyString(value: unknown): boolean {
1895
+ function isNonEmptyString(value: unknown): value is string {
1616
1896
  return typeof value === "string" && value.trim().length > 0;
1617
1897
  }
1618
1898
 
@@ -1637,9 +1917,6 @@ function describeMissingOpenAIFamilyProxyCompat(model: PiModel): string[] {
1637
1917
  if (!isOpenAICompatibleProxyApi(model.api)) return missing;
1638
1918
  if (isOfficialOpenAIBaseUrl(model)) return missing;
1639
1919
 
1640
- if (compat.supportsLongCacheRetention !== true) {
1641
- missing.push("supportsLongCacheRetention");
1642
- }
1643
1920
  if (compat.sendSessionAffinityHeaders !== true) {
1644
1921
  missing.push("sendSessionAffinityHeaders");
1645
1922
  }
@@ -1660,9 +1937,6 @@ function describeMissingOpenAICompatibleProxyCompat(model: PiModel): string[] {
1660
1937
  if (!isOpenAICompatibleProxyApi(model.api)) return missing;
1661
1938
  if (isOfficialOpenAIBaseUrl(model)) return missing;
1662
1939
 
1663
- if (compat.supportsLongCacheRetention !== true) {
1664
- missing.push("supportsLongCacheRetention");
1665
- }
1666
1940
  if (compat.sendSessionAffinityHeaders !== true) {
1667
1941
  missing.push("sendSessionAffinityHeaders");
1668
1942
  }
@@ -1670,6 +1944,20 @@ function describeMissingOpenAICompatibleProxyCompat(model: PiModel): string[] {
1670
1944
  return missing;
1671
1945
  }
1672
1946
 
1947
+ function describeOptionalOpenAICompatibleProxyCompat(model: PiModel): string[] {
1948
+ const compat = getCompat(model);
1949
+ const optional: string[] = [];
1950
+
1951
+ if (!isOpenAICompatibleProxyApi(model.api)) return optional;
1952
+ if (isOfficialOpenAIBaseUrl(model)) return optional;
1953
+
1954
+ if (compat.supportsLongCacheRetention !== true) {
1955
+ optional.push("supportsLongCacheRetention");
1956
+ }
1957
+
1958
+ return optional;
1959
+ }
1960
+
1673
1961
  function buildSafeOpenAIProxyCompatSuggestion(missing: string[]): Record<string, boolean> {
1674
1962
  const suggestion: Record<string, boolean> = {};
1675
1963
  if (missing.includes("sendSessionAffinityHeaders")) {
@@ -1760,21 +2048,22 @@ function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[],
1760
2048
  lines.push("Safe default suggestion:");
1761
2049
  }
1762
2050
  lines.push(JSON.stringify(suggestion, null, 2));
1763
- } else if (missing.includes("supportsLongCacheRetention")) {
1764
- lines.push("No safe automatic JSON change is recommended for `supportsLongCacheRetention`.");
1765
2051
  }
1766
2052
 
1767
2053
  if (missing.includes("sendSessionAffinityHeaders")) {
1768
2054
  lines.push("- sendSessionAffinityHeaders: recommended for third-party proxies when supported; it helps keep one Pi session on the same upstream/backend.");
1769
2055
  }
1770
- if (missing.includes("supportsLongCacheRetention")) {
1771
- lines.push("- supportsLongCacheRetention: optional. Enable only after your endpoint/proxy explicitly supports OpenAI long prompt cache retention.");
1772
- lines.push(`- ${getPromptCacheRetentionUnsupportedHint()}`);
1773
- }
1774
-
1775
2056
  appendCredentialSafeProviderGuidance(lines, options, suggestion);
1776
2057
  }
1777
2058
 
2059
+ function appendOptionalOpenAIProxyCompatAdviceLines(lines: string[], optional: string[]): void {
2060
+ if (!optional.includes("supportsLongCacheRetention")) return;
2061
+ lines.push("");
2062
+ lines.push("Optional (not required, not auto-fixed):");
2063
+ lines.push("- supportsLongCacheRetention: enable only after your endpoint/proxy explicitly supports OpenAI long prompt cache retention.");
2064
+ lines.push(`- ${getPromptCacheRetentionUnsupportedHint()}`);
2065
+ }
2066
+
1778
2067
  /**
1779
2068
  * Build the warning text displayed to users when an OpenAI-family third-party
1780
2069
  * proxy is missing one or more cache/session-affinity compat flags.
@@ -2877,7 +3166,10 @@ function selectAdapterForModel(model: PiModel | undefined): CacheProviderAdapter
2877
3166
  }
2878
3167
 
2879
3168
  function selectAdapterForAssistantMessage(message: unknown, model: PiModel | undefined): CacheProviderAdapter | undefined {
2880
- const responseModel = isRouterModel(model) ? modelFromAssistantMessage(message, model) : model;
3169
+ // Assistant message metadata is request-local and authoritative for virtual
3170
+ // routing providers. Use it first for every model; direct providers normally
3171
+ // echo the same provider/model and therefore remain unchanged.
3172
+ const responseModel = modelFromAssistantMessage(message, model);
2881
3173
  return CACHE_PROVIDER_ADAPTERS.find((adapter) => adapter.matchesAssistantMessage(message, responseModel));
2882
3174
  }
2883
3175
 
@@ -3121,12 +3413,15 @@ function parseCacheStats(value: unknown): CacheStats | undefined {
3121
3413
 
3122
3414
  function parsePersistedRoutedModelRef(value: unknown): PersistedRoutedModelRef | undefined {
3123
3415
  const record = asRecord(value);
3124
- if (!record || !isNonEmptyString(record.provider) || !isNonEmptyString(record.id)) return undefined;
3416
+ const provider = record?.provider;
3417
+ const id = record?.id;
3418
+ const name = record?.name;
3419
+ if (!isNonEmptyString(provider) || !isNonEmptyString(id)) return undefined;
3125
3420
 
3126
3421
  return {
3127
- provider: record.provider.trim(),
3128
- id: record.id.trim(),
3129
- name: isNonEmptyString(record.name) ? record.name.trim() : record.id.trim(),
3422
+ provider: provider.trim(),
3423
+ id: id.trim(),
3424
+ name: isNonEmptyString(name) ? name.trim() : id.trim(),
3130
3425
  };
3131
3426
  }
3132
3427
 
@@ -3149,7 +3444,7 @@ function buildExactRouterStatusEntry(
3149
3444
  sessionHash: string | undefined,
3150
3445
  statsByModel: Record<string, CacheStats>,
3151
3446
  lastRoutedModel: PersistedRoutedModelRef | undefined,
3152
- ): { adapter: CacheProviderAdapter; stats: CacheStats } | undefined {
3447
+ ): { model: PiModel; adapter: CacheProviderAdapter; stats: CacheStats } | undefined {
3153
3448
  if (!sessionHash || !lastRoutedModel) return undefined;
3154
3449
 
3155
3450
  const model = routedModelRefToPiModel(lastRoutedModel);
@@ -3157,7 +3452,7 @@ function buildExactRouterStatusEntry(
3157
3452
  if (!adapter) return undefined;
3158
3453
 
3159
3454
  const key = makeSessionModelKey(sessionHash, lastRoutedModel.provider, lastRoutedModel.id);
3160
- return { adapter, stats: statsByModel[key] ?? emptyCacheStats() };
3455
+ return { model, adapter, stats: statsByModel[key] ?? emptyCacheStats() };
3161
3456
  }
3162
3457
 
3163
3458
  function parsePersistedCacheStats(value: unknown): CacheStatsState | undefined {
@@ -3530,8 +3825,9 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
3530
3825
  provider.includes("openrouter")
3531
3826
  ) {
3532
3827
  const compat = getCompat(model);
3533
- const hasOnly = !!(compat as Record<string, unknown>)["openRouterRouting"]?.only;
3534
- const hasOrder = !!(compat as Record<string, unknown>)["openRouterRouting"]?.order;
3828
+ const routing = asRecord((compat as Record<string, unknown>)["openRouterRouting"]);
3829
+ const hasOnly = !!routing?.only;
3830
+ const hasOrder = !!routing?.order;
3535
3831
 
3536
3832
  notes.push(
3537
3833
  "🔀 Router/channel: OpenRouter detected. OpenRouter is a multi-provider router; " +
@@ -3566,8 +3862,9 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
3566
3862
  provider.includes("vercel-ai-gateway")
3567
3863
  ) {
3568
3864
  const compat = getCompat(model);
3569
- const hasOnly = !!(compat as Record<string, unknown>)["vercelGatewayRouting"]?.only;
3570
- const hasOrder = !!(compat as Record<string, unknown>)["vercelGatewayRouting"]?.order;
3865
+ const routing = asRecord((compat as Record<string, unknown>)["vercelGatewayRouting"]);
3866
+ const hasOnly = !!routing?.only;
3867
+ const hasOrder = !!routing?.order;
3571
3868
 
3572
3869
  notes.push(
3573
3870
  "🔀 Router/channel: Vercel AI Gateway detected. The gateway may route to different " +
@@ -3694,8 +3991,21 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
3694
3991
  const adaptiveThinkingApplicable = isAdaptiveThinkingCompatApplicable(model);
3695
3992
  const deepSeekCompatApplicable = isDeepSeekCompatCheckApplicable(model);
3696
3993
  const missing = describeMissingCacheCompatForModel(model);
3994
+ const optionalOpenAIProxyCompat = (!adaptiveThinkingApplicable && !deepSeekCompatApplicable)
3995
+ ? describeOptionalOpenAICompatibleProxyCompat(model)
3996
+ : [];
3997
+ const fixSug = buildFixSuggestion(model);
3998
+ const safeFixableMissing = fixSug ? Object.keys(fixSug.compatKeys) : [];
3999
+ const advisoryMissing = missing.filter(m => !safeFixableMissing.includes(m));
4000
+
4001
+ if (safeFixableMissing.length > 0) {
4002
+ lines.push(`⚠️ Missing compat flags: ${safeFixableMissing.join(", ")}`);
4003
+ }
4004
+ if (advisoryMissing.length > 0) {
4005
+ lines.push(`ℹ️ Optional: ${advisoryMissing.join(", ")} (enable only if needed)`);
4006
+ }
4007
+
3697
4008
  if (missing.length > 0) {
3698
- lines.push(`⚠️ Missing compat flags: ${missing.join(", ")}`);
3699
4009
  const key = modelKey(model);
3700
4010
  const slashIdx = key.indexOf("/");
3701
4011
  const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
@@ -3707,9 +4017,11 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
3707
4017
  appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
3708
4018
  } else {
3709
4019
  appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
4020
+ appendOptionalOpenAIProxyCompatAdviceLines(lines, optionalOpenAIProxyCompat);
3710
4021
  }
3711
4022
  } else if (adaptiveThinkingApplicable || deepSeekCompatApplicable || isCompatCheckApplicable(model)) {
3712
4023
  lines.push("✅ Compat fully configured.");
4024
+ appendOptionalOpenAIProxyCompatAdviceLines(lines, optionalOpenAIProxyCompat);
3713
4025
  } else {
3714
4026
  lines.push(...getCompatCheckNotApplicableLines(model));
3715
4027
  }
@@ -3767,7 +4079,8 @@ function buildLowHitDiagnosis(
3767
4079
  const lines: string[] = [];
3768
4080
 
3769
4081
  // 1. Missing compat flags (adapter-aware: DeepSeek has extra reasoning compat)
3770
- const missingCompat = describeMissingCacheCompatForModel(model);
4082
+ const fixSugLHD = buildFixSuggestion(model);
4083
+ const safeFixableMissingLHD = fixSugLHD ? Object.keys(fixSugLHD.compatKeys) : [];
3771
4084
 
3772
4085
  // 2. Router/channel risk (reuse existing check)
3773
4086
  const routerNotes = describeRouterChannelDiagnostics(model);
@@ -3785,7 +4098,7 @@ function buildLowHitDiagnosis(
3785
4098
  // 5. Today's overall trend from persisted stats
3786
4099
  const todayStats = stats ?? emptyCacheStats();
3787
4100
 
3788
- const hasMissingCompat = missingCompat.length > 0;
4101
+ const hasMissingCompat = safeFixableMissingLHD.length > 0;
3789
4102
  const hasRouterRisk = routerNotes.length > 0;
3790
4103
  const hasUsageMissing = missingUsageSamples > 0;
3791
4104
 
@@ -3814,7 +4127,7 @@ function buildLowHitDiagnosis(
3814
4127
 
3815
4128
  // Priority 1: missing compat flags
3816
4129
  if (hasMissingCompat) {
3817
- lines.push(`⚠️ Missing compat flags: ${missingCompat.join(", ")}`);
4130
+ lines.push(`⚠️ Missing compat flags: ${safeFixableMissingLHD.join(", ")}`);
3818
4131
  lines.push(" These flags enable prompt caching and session-affinity routing.");
3819
4132
  lines.push(" Run /cache-optimizer compat for edit instructions.");
3820
4133
  }
@@ -3867,11 +4180,17 @@ function buildLowHitDiagnosis(
3867
4180
 
3868
4181
  function buildCompatDiagnosis(model: PiModel): string | undefined {
3869
4182
  const missing = describeMissingCacheCompatForModel(model);
4183
+ const fixSugC = buildFixSuggestion(model);
4184
+ const safeFixableMissingC = fixSugC ? Object.keys(fixSugC.compatKeys) : [];
4185
+ const advisoryMissingC = missing.filter(m => !safeFixableMissingC.includes(m));
3870
4186
  const adaptiveThinkingApplicable = isAdaptiveThinkingCompatApplicable(model);
3871
4187
  const deepSeekCompatApplicable = isDeepSeekCompatCheckApplicable(model);
4188
+ const optionalOpenAIProxyCompat = (!adaptiveThinkingApplicable && !deepSeekCompatApplicable)
4189
+ ? describeOptionalOpenAICompatibleProxyCompat(model)
4190
+ : [];
3872
4191
  const routerNotes = describeRouterChannelDiagnostics(model);
3873
4192
 
3874
- if (missing.length === 0 && routerNotes.length === 0) return undefined;
4193
+ if (missing.length === 0 && routerNotes.length === 0 && optionalOpenAIProxyCompat.length === 0) return undefined;
3875
4194
 
3876
4195
  const key = modelKey(model);
3877
4196
  const lines: string[] = [];
@@ -3881,7 +4200,12 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
3881
4200
  const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
3882
4201
  const modelsJsonPath = getModelsJsonDisplayPath();
3883
4202
  lines.push(`Active model: ${key}`);
3884
- lines.push(`Missing: ${missing.join(", ")}`);
4203
+ if (safeFixableMissingC.length > 0) {
4204
+ lines.push(`Safe-fixable: ${safeFixableMissingC.join(", ")}`);
4205
+ }
4206
+ if (advisoryMissingC.length > 0) {
4207
+ lines.push(`Optional: ${advisoryMissingC.join(", ")} (enable only if needed)`);
4208
+ }
3885
4209
  lines.push("");
3886
4210
  lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
3887
4211
  lines.push(`(at the same level as baseUrl/api/apiKey/models).`);
@@ -3891,16 +4215,18 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
3891
4215
  appendDeepSeekCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
3892
4216
  } else {
3893
4217
  appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
4218
+ appendOptionalOpenAIProxyCompatAdviceLines(lines, optionalOpenAIProxyCompat);
3894
4219
  }
3895
4220
  }
3896
4221
 
3897
- // When compat is fully configured but router notes exist, prefix the status.
3898
- if (routerNotes.length > 0 && missing.length === 0) {
4222
+ // When compat is fully configured but router/optional notes exist, prefix the status.
4223
+ if ((routerNotes.length > 0 || optionalOpenAIProxyCompat.length > 0) && missing.length === 0) {
3899
4224
  if (adaptiveThinkingApplicable || deepSeekCompatApplicable || isCompatCheckApplicable(model)) {
3900
4225
  lines.push("✅ Compat fully configured.");
3901
4226
  if (isPromptCacheRetention400Applicable(model)) {
3902
4227
  lines.push(getPromptCacheRetentionUnsupportedHint());
3903
4228
  }
4229
+ appendOptionalOpenAIProxyCompatAdviceLines(lines, optionalOpenAIProxyCompat);
3904
4230
  } else {
3905
4231
  lines.push(...getCompatCheckNotApplicableLines(model));
3906
4232
  }
@@ -4131,8 +4457,10 @@ function stripJsoncComments(text: string): string {
4131
4457
  let i = 0;
4132
4458
  while (i < text.length) {
4133
4459
  const ch = text[i];
4460
+
4134
4461
  if (ch === '"') {
4135
- // String literal — copy until closing quote (handle escapes)
4462
+ // String literal — copy byte-for-byte until the closing quote.
4463
+ // Escaped quotes/slashes must not be mistaken for comment delimiters.
4136
4464
  out.push(ch);
4137
4465
  i++;
4138
4466
  while (i < text.length) {
@@ -4146,39 +4474,74 @@ function stripJsoncComments(text: string): string {
4146
4474
  break;
4147
4475
  }
4148
4476
  }
4149
- } else if (ch === '/' && i + 1 < text.length && text[i + 1] === '/') {
4150
- // Line comment — replace with spaces until newline
4151
- out.push(' ');
4152
- i++;
4477
+ continue;
4478
+ }
4479
+
4480
+ if (ch === '/' && i + 1 < text.length && text[i + 1] === '/') {
4481
+ // Line comment — replace BOTH slashes and every comment byte with
4482
+ // spaces, but leave the newline to be copied by the normal path.
4483
+ out.push(' ', ' ');
4484
+ i += 2;
4153
4485
  while (i < text.length && text[i] !== '\n') {
4154
4486
  out.push(' ');
4155
4487
  i++;
4156
4488
  }
4157
- } else if (ch === '/' && i + 1 < text.length && text[i + 1] === '*') {
4158
- // Block comment — replace with spaces (preserve newlines)
4159
- out.push(' ');
4160
- i++;
4161
- while (i + 1 < text.length) {
4162
- if (text[i] === '*' && text[i + 1] === '/') {
4163
- out.push(' ');
4489
+ continue;
4490
+ }
4491
+
4492
+ if (ch === '/' && i + 1 < text.length && text[i + 1] === '*') {
4493
+ // Block comment replace every byte with a space except newlines.
4494
+ // This deliberately preserves text.length and all structural offsets.
4495
+ out.push(' ', ' ');
4496
+ i += 2;
4497
+ while (i < text.length) {
4498
+ if (text[i] === '*' && i + 1 < text.length && text[i + 1] === '/') {
4499
+ out.push(' ', ' ');
4164
4500
  i += 2;
4165
4501
  break;
4166
4502
  }
4167
- if (text[i] === '\n') {
4168
- out.push('\n');
4169
- } else {
4170
- out.push(' ');
4171
- }
4503
+ out.push(text[i] === '\n' ? '\n' : ' ');
4172
4504
  i++;
4173
4505
  }
4174
- } else {
4175
- out.push(ch);
4176
- i++;
4506
+ continue;
4177
4507
  }
4508
+
4509
+ out.push(ch);
4510
+ i++;
4178
4511
  }
4179
4512
  return out.join('');
4180
4513
  }
4181
4514
 
4515
+ /**
4516
+ * Remove JSONC trailing commas from already comment-stripped text.
4517
+ * The returned text stays length-preserving (commas become spaces), which
4518
+ * gives JSON.parse a tolerant JSONC surface without affecting diagnostics.
4519
+ */
4520
+ function stripJsoncTrailingCommas(text: string): string {
4521
+ const chars = text.split("");
4522
+ let i = 0;
4523
+ while (i < chars.length) {
4524
+ if (chars[i] === '"') {
4525
+ const str = readJsonStringLiteral(text, i);
4526
+ if (!str) break;
4527
+ i = str.end;
4528
+ continue;
4529
+ }
4530
+
4531
+ if (chars[i] === ',') {
4532
+ let j = i + 1;
4533
+ while (j < chars.length && isJsonWhitespace(chars[j])) j++;
4534
+ if (chars[j] === '}' || chars[j] === ']') chars[i] = ' ';
4535
+ }
4536
+ i++;
4537
+ }
4538
+ return chars.join('');
4539
+ }
4540
+
4541
+ function parseJsonc(text: string): unknown {
4542
+ return JSON.parse(stripJsoncTrailingCommas(stripJsoncComments(text)));
4543
+ }
4544
+
4182
4545
  /**
4183
4546
  * JSONC scanner: locate the provider block and model entry in models.json text.
4184
4547
  * Returns the byte offsets for surgical insertion, or undefined if ambiguous.
@@ -4222,153 +4585,51 @@ function locateModelInJsonc(
4222
4585
  // Clean text of comments first for reliable structural scanning
4223
4586
  const clean = stripJsoncComments(text);
4224
4587
 
4225
- // Strategy: find `"providers"` key in the root object, then find the
4226
- // provider key under it, then the `"models"` array, then the array
4227
- // element whose `"id"` matches. We map via the stripped text (comment
4228
- // removal replaces comment chars with spaces, preserving offsets).
4229
-
4230
- const pos = clean.indexOf('"providers"');
4231
- if (pos < 0) return undefined;
4232
-
4233
- // Scan from `"providers"` to find the `{` of the provider block
4234
- let cur = pos + '"providers"'.length;
4235
- // Skip `:`, whitespace, etc.
4236
- while (cur < clean.length && clean[cur] !== '{') cur++;
4237
- if (cur >= clean.length) return undefined;
4238
- cur++; // Skip `{`
4239
-
4240
- // Now scan key-value pairs in the providers object to find the matching providerLabel
4241
- const providerLabelJson = JSON.stringify(providerLabel);
4242
- let providerBrace = -1;
4243
- let providerEndBrace = -1;
4244
-
4245
- while (cur < clean.length) {
4246
- // Skip whitespace/comments
4247
- while (cur < clean.length && (clean[cur] === ' ' || clean[cur] === '\n' || clean[cur] === '\r' || clean[cur] === '\t')) cur++;
4248
- if (cur >= clean.length) break;
4249
- if (clean[cur] === '}') break; // End of providers
4250
-
4251
- // Try to read a string key
4252
- if (clean[cur] !== '"') { cur++; continue; }
4253
- const keyEnd = clean.indexOf('"', cur + 1);
4254
- if (keyEnd < 0) return undefined;
4255
- const key = clean.slice(cur + 1, keyEnd);
4256
- cur = keyEnd + 1;
4257
-
4258
- // Skip `:`
4259
- while (cur < clean.length && clean[cur] !== ':') cur++;
4260
- if (cur >= clean.length) return undefined;
4261
- cur++; // Skip `:`
4262
- while (cur < clean.length && (clean[cur] === ' ' || clean[cur] === '\n' || clean[cur] === '\r' || clean[cur] === '\t')) cur++;
4263
-
4264
- if (key === providerLabel) {
4265
- // Found — expect `{` starting the provider object
4266
- if (clean[cur] !== '{') return undefined;
4267
- providerBrace = cur;
4268
- // Find matching closing `}` for the provider object (track depth)
4269
- let depth = 1;
4270
- let scan = cur + 1;
4271
- while (scan < clean.length && depth > 0) {
4272
- if (clean[scan] === '{') depth++;
4273
- else if (clean[scan] === '}') depth--;
4274
- if (depth > 0) scan++;
4275
- }
4276
- providerEndBrace = scan;
4277
- break;
4278
- }
4279
-
4280
- // Skip the value
4281
- if (clean[cur] === '{') {
4282
- let depth = 1;
4283
- cur++;
4284
- while (cur < clean.length && depth > 0) {
4285
- if (clean[cur] === '{') depth++;
4286
- else if (clean[cur] === '}') depth--;
4287
- cur++;
4288
- }
4289
- } else if (clean[cur] === '[') {
4290
- let depth = 1;
4291
- cur++;
4292
- while (cur < clean.length && depth > 0) {
4293
- if (clean[cur] === '[') depth++;
4294
- else if (clean[cur] === ']') depth--;
4295
- cur++;
4296
- }
4297
- } else if (clean[cur] === '"') {
4298
- const strEnd = clean.indexOf('"', cur + 1);
4299
- if (strEnd < 0) return undefined;
4300
- cur = strEnd + 1;
4301
- } else {
4302
- // Number, boolean, etc.
4303
- while (cur < clean.length && clean[cur] !== ',' && clean[cur] !== '}' && clean[cur] !== '\n') cur++;
4304
- }
4305
- // Skip comma
4306
- if (cur < clean.length && clean[cur] === ',') cur++;
4307
- }
4308
-
4309
- if (providerBrace < 0 || providerEndBrace < 0) return undefined;
4310
-
4311
- // Scan provider object at depth 1 for a provider-level "compat" object.
4312
- // Depth-aware + string-aware so nested model compat objects are not confused
4313
- // with the provider-level one.
4588
+ // Strategy: find `"providers"` as a direct root key, then find the
4589
+ // provider key under it, then the provider's direct `"models"` key.
4590
+ // All object/value traversal uses the string-aware primitives above so
4591
+ // braces, brackets, comment markers, or escaped quotes inside strings do
4592
+ // not corrupt offsets.
4593
+ const rootBrace = skipJsonWhitespace(clean, 0);
4594
+ if (clean[rootBrace] !== "{") return undefined;
4595
+
4596
+ const providersKey = findJsonObjectKey(clean, rootBrace, "providers");
4597
+ if (!providersKey) return undefined;
4598
+ const providersBrace = skipJsonWhitespace(clean, providersKey.valueStart);
4599
+ if (clean[providersBrace] !== "{") return undefined;
4600
+ const providersEnd = findMatchingBracket(clean, providersBrace);
4601
+ if (providersEnd === undefined) return undefined;
4602
+
4603
+ const providerKey = findJsonObjectKey(clean, providersBrace, providerLabel);
4604
+ if (!providerKey || providerKey.keyStart > providersEnd) return undefined;
4605
+ const providerBrace = skipJsonWhitespace(clean, providerKey.valueStart);
4606
+ if (clean[providerBrace] !== "{") return undefined;
4607
+ const providerEndBrace = findMatchingBracket(clean, providerBrace);
4608
+ if (providerEndBrace === undefined || providerEndBrace > providersEnd) return undefined;
4609
+
4610
+ // Provider-level compat is a direct provider child only. Nested model
4611
+ // compat objects are intentionally skipped whole by findJsonObjectKey.
4314
4612
  let providerCompatBrace = -1;
4315
4613
  let providerCompatEnd = -1;
4316
- {
4317
- let pScan = providerBrace + 1;
4318
- let pDepth = 1;
4319
- while (pScan < providerEndBrace && pDepth > 0) {
4320
- const ch = clean[pScan];
4321
- if (ch === '"') {
4322
- // Read the string (key or value) fully
4323
- const strEnd = clean.indexOf('"', pScan + 1);
4324
- if (strEnd < 0) break;
4325
- const str = clean.slice(pScan + 1, strEnd);
4326
- if (pDepth === 1 && str === 'compat') {
4327
- // Confirm it's a key: next non-ws char must be ':'
4328
- let after = strEnd + 1;
4329
- while (after < providerEndBrace && (clean[after] === ' ' || clean[after] === '\n' || clean[after] === '\r' || clean[after] === '\t')) after++;
4330
- if (clean[after] === ':') {
4331
- after++;
4332
- while (after < providerEndBrace && (clean[after] === ' ' || clean[after] === '\n' || clean[after] === '\r' || clean[after] === '\t')) after++;
4333
- if (clean[after] === '{') {
4334
- providerCompatBrace = after;
4335
- let d = 1;
4336
- let s = after + 1;
4337
- while (s < clean.length && d > 0) {
4338
- if (clean[s] === '"') {
4339
- const e = clean.indexOf('"', s + 1);
4340
- if (e < 0) break;
4341
- s = e + 1;
4342
- continue;
4343
- }
4344
- if (clean[s] === '{') d++;
4345
- else if (clean[s] === '}') d--;
4346
- if (d > 0) s++;
4347
- }
4348
- providerCompatEnd = s;
4349
- pScan = s + 1;
4350
- continue;
4351
- }
4352
- }
4353
- }
4354
- pScan = strEnd + 1;
4355
- continue;
4614
+ const providerCompatKey = findJsonObjectKey(clean, providerBrace, "compat");
4615
+ if (providerCompatKey && providerCompatKey.keyStart < providerEndBrace) {
4616
+ const brace = skipJsonWhitespace(clean, providerCompatKey.valueStart);
4617
+ if (clean[brace] === "{") {
4618
+ const end = findMatchingBracket(clean, brace);
4619
+ if (end !== undefined && end <= providerEndBrace) {
4620
+ providerCompatBrace = brace;
4621
+ providerCompatEnd = end;
4356
4622
  }
4357
- if (ch === '{' || ch === '[') pDepth++;
4358
- else if (ch === '}' || ch === ']') pDepth--;
4359
- pScan++;
4360
4623
  }
4361
4624
  }
4362
4625
 
4363
- // Now find the `"models"` array within the provider
4364
- const providerContent = clean.slice(providerBrace + 1, providerEndBrace);
4365
- const modelsIdx = providerContent.indexOf('"models"');
4366
- if (modelsIdx < 0) return undefined;
4626
+ const modelsKey = findJsonObjectKey(clean, providerBrace, "models");
4627
+ if (!modelsKey || modelsKey.keyStart > providerEndBrace) return undefined;
4367
4628
 
4368
- // Find the `[` of the models array
4369
- let modelsScan = providerBrace + 1 + modelsIdx + '"models"'.length;
4370
- while (modelsScan < clean.length && clean[modelsScan] !== '[') modelsScan++;
4371
- if (modelsScan >= clean.length) return undefined;
4629
+ let modelsScan = skipJsonWhitespace(clean, modelsKey.valueStart);
4630
+ if (clean[modelsScan] !== "[") return undefined;
4631
+ const modelsEnd = findMatchingBracket(clean, modelsScan);
4632
+ if (modelsEnd === undefined || modelsEnd > providerEndBrace) return undefined;
4372
4633
  modelsScan++; // Skip `[`
4373
4634
 
4374
4635
  // Scan ALL array elements: collect every model id, and record the target's position
@@ -4379,83 +4640,52 @@ function locateModelInJsonc(
4379
4640
  let compatBrace = -1;
4380
4641
  let compatEndBrace = -1;
4381
4642
 
4382
- while (modelsScan < clean.length) {
4383
- // Skip whitespace/comma
4384
- while (modelsScan < clean.length && (clean[modelsScan] === ' ' || clean[modelsScan] === '\n' || clean[modelsScan] === '\r' || clean[modelsScan] === '\t' || clean[modelsScan] === ',')) modelsScan++;
4385
- if (modelsScan >= clean.length) break;
4386
- if (clean[modelsScan] === ']') break; // End of array
4387
-
4388
- if (clean[modelsScan] !== '{') { modelsScan++; continue; }
4643
+ while (modelsScan < modelsEnd) {
4644
+ modelsScan = skipJsonWhitespace(clean, modelsScan);
4645
+ if (clean[modelsScan] === ',') {
4646
+ modelsScan++;
4647
+ continue;
4648
+ }
4649
+ if (modelsScan >= modelsEnd || clean[modelsScan] === ']') break;
4650
+ if (clean[modelsScan] !== '{') return undefined;
4389
4651
 
4390
- // Found a model object `{`
4391
4652
  const elementBrace = modelsScan;
4653
+ const elementEnd = findMatchingBracket(clean, elementBrace);
4654
+ if (elementEnd === undefined || elementEnd > modelsEnd) return undefined;
4392
4655
 
4393
- // Find the matching closing `}` and extract this element's `"id"` at depth 1
4394
- let depth = 1;
4395
- let scan = modelsScan + 1;
4656
+ const idKey = findJsonObjectKey(clean, elementBrace, "id");
4396
4657
  let elementId: string | undefined;
4397
-
4398
- while (scan < clean.length && depth > 0) {
4399
- if (clean[scan] === '"') {
4400
- const strEnd = clean.indexOf('"', scan + 1);
4401
- if (strEnd < 0) break;
4402
- if (depth === 1 && elementId === undefined && clean.slice(scan, scan + 4) === '"id"') {
4403
- // Found "id" key — find the colon and the value
4404
- let afterKey = scan + 4;
4405
- while (afterKey < clean.length && clean[afterKey] !== ':') afterKey++;
4406
- if (afterKey < clean.length) {
4407
- afterKey++; // skip ':'
4408
- while (afterKey < clean.length && (clean[afterKey] === ' ' || clean[afterKey] === '\n' || clean[afterKey] === '\r' || clean[afterKey] === '\t')) afterKey++;
4409
- if (afterKey < clean.length && clean[afterKey] === '"') {
4410
- const idStart = afterKey + 1;
4411
- const idEnd = clean.indexOf('"', idStart);
4412
- if (idEnd > idStart) {
4413
- elementId = clean.slice(idStart, idEnd);
4414
- }
4415
- }
4416
- }
4417
- }
4418
- scan = strEnd + 1;
4419
- continue;
4658
+ if (idKey && idKey.keyStart < elementEnd) {
4659
+ const idValueStart = skipJsonWhitespace(clean, idKey.valueStart);
4660
+ const idLiteral = readJsonStringLiteral(clean, idValueStart);
4661
+ if (idLiteral && idLiteral.end <= elementEnd) {
4662
+ elementId = idLiteral.value;
4420
4663
  }
4421
- if (clean[scan] === '{') depth++;
4422
- else if (clean[scan] === '}') depth--;
4423
- scan++;
4424
4664
  }
4425
4665
 
4426
- const elementEnd = scan - 1; // The `}` that closed this element
4427
-
4428
4666
  if (elementId !== undefined) {
4429
4667
  allModelIds.push(elementId);
4430
4668
  }
4431
4669
 
4432
4670
  if (elementId === modelId && modelBrace < 0) {
4433
- // This is the target model — record its position and find its compat
4434
4671
  modelBrace = elementBrace;
4435
4672
  modelEndBrace = elementEnd;
4436
- const modelContent = clean.slice(modelBrace + 1, modelEndBrace);
4437
- const compatIdx = modelContent.indexOf('"compat"');
4438
- if (compatIdx >= 0) {
4439
- compatKeyStartClean = modelBrace + 1 + compatIdx;
4440
- let compatScan = compatKeyStartClean + '"compat"'.length;
4441
- while (compatScan < clean.length && clean[compatScan] !== ':') compatScan++;
4442
- compatScan++;
4443
- while (compatScan < clean.length && (clean[compatScan] === ' ' || clean[compatScan] === '\n' || clean[compatScan] === '\r' || clean[compatScan] === '\t')) compatScan++;
4444
- if (compatScan < clean.length && clean[compatScan] === '{') {
4445
- compatBrace = compatScan;
4446
- let cdepth = 1;
4447
- let cscan = compatScan + 1;
4448
- while (cscan < clean.length && cdepth > 0) {
4449
- if (clean[cscan] === '{') cdepth++;
4450
- else if (clean[cscan] === '}') cdepth--;
4451
- if (cdepth > 0) cscan++;
4673
+
4674
+ const compatKey = findJsonObjectKey(clean, modelBrace, "compat");
4675
+ if (compatKey && compatKey.keyStart < modelEndBrace) {
4676
+ compatKeyStartClean = compatKey.keyStart;
4677
+ const brace = skipJsonWhitespace(clean, compatKey.valueStart);
4678
+ if (clean[brace] === "{") {
4679
+ const end = findMatchingBracket(clean, brace);
4680
+ if (end !== undefined && end <= modelEndBrace) {
4681
+ compatBrace = brace;
4682
+ compatEndBrace = end;
4452
4683
  }
4453
- compatEndBrace = cscan;
4454
4684
  }
4455
4685
  }
4456
4686
  }
4457
4687
 
4458
- modelsScan = scan;
4688
+ modelsScan = elementEnd + 1;
4459
4689
  }
4460
4690
 
4461
4691
  if (modelBrace < 0 || modelEndBrace < 0) return undefined;
@@ -4596,6 +4826,48 @@ function decideFixPlacement(
4596
4826
  };
4597
4827
  }
4598
4828
 
4829
+ function findExistingCompatKeysInJsonc(
4830
+ original: string,
4831
+ compatBrace: number,
4832
+ compatEnd: number,
4833
+ keys: string[],
4834
+ ): string[] {
4835
+ if (compatBrace < 0 || compatEnd <= compatBrace) return [];
4836
+ const clean = stripJsoncComments(original);
4837
+ return keys.filter((key) => {
4838
+ const found = findJsonObjectKey(clean, compatBrace, key);
4839
+ return !!found && found.keyStart < compatEnd;
4840
+ });
4841
+ }
4842
+
4843
+ function chooseFixPlacement(
4844
+ original: string,
4845
+ location: ModelNodeLocation,
4846
+ compatKeys: Record<string, unknown>,
4847
+ providerLabel: string,
4848
+ ): { placement: "provider" | "model"; reason: string } {
4849
+ const decision = decideFixPlacement(compatKeys, providerLabel, location.allModelIds);
4850
+ const existingModelKeys = findExistingCompatKeysInJsonc(
4851
+ original,
4852
+ location.compatObjectBrace,
4853
+ location.compatObjectEnd,
4854
+ Object.keys(compatKeys),
4855
+ );
4856
+
4857
+ // Provider-level writes cannot override a model-level compat key because Pi's
4858
+ // merge order is provider.compat then model.compat. If the active model already
4859
+ // has one of the keys we need to repair (e.g. thinkingFormat: "legacy"), write
4860
+ // at model level even when the key would otherwise be provider-safe.
4861
+ if (decision.placement === "provider" && existingModelKeys.length > 0) {
4862
+ return {
4863
+ placement: "model",
4864
+ reason: `model-level compat already contains ${existingModelKeys.join(", ")} — repairing the active model override directly`,
4865
+ };
4866
+ }
4867
+
4868
+ return decision;
4869
+ }
4870
+
4599
4871
  function composeFixInsertion(
4600
4872
  original: string,
4601
4873
  location: ModelNodeLocation,
@@ -4607,14 +4879,12 @@ function composeFixInsertion(
4607
4879
  const targetCompatEnd = placement === "provider" ? location.providerCompatEnd : location.compatObjectEnd;
4608
4880
  const containerBrace = placement === "provider" ? location.providerObjectBrace : location.modelObjectBrace;
4609
4881
 
4610
- // Helper: format the new keys as lines with the given indent, alphabetically sorted.
4611
- const formatKeys = (indent: string): string =>
4612
- Object.entries(compatKeys)
4613
- .sort(([a], [b]) => a.localeCompare(b))
4614
- .map(([k, v]) => {
4615
- const val = typeof v === 'string' ? `"${v}"` : JSON.stringify(v);
4616
- return `${indent}${JSON.stringify(k)}: ${val}`;
4617
- })
4882
+ // Helper: format key/value pairs as lines with the given indent,
4883
+ // alphabetically sorted for stable previews and deterministic edits.
4884
+ const sortedEntries = Object.entries(compatKeys).sort(([a], [b]) => a.localeCompare(b));
4885
+ const formatEntries = (indent: string, entries: Array<[string, unknown]>): string =>
4886
+ entries
4887
+ .map(([k, v]) => `${indent}${JSON.stringify(k)}: ${JSON.stringify(v)}`)
4618
4888
  .join(',\n');
4619
4889
 
4620
4890
  // Helper: line-start indentation of the line containing `offset` in `original`.
@@ -4627,25 +4897,52 @@ function composeFixInsertion(
4627
4897
  };
4628
4898
 
4629
4899
  if (targetCompatBrace >= 0 && targetCompatEnd > targetCompatBrace) {
4630
- // ── Existing compat object: insert new key lines right after `{`. ──
4631
- // The existing interior is preserved BYTE-FOR-BYTE (no reflow, no re-indent).
4900
+ // ── Existing compat object: insert absent keys and surgically replace
4901
+ // direct existing keys whose value is wrong (e.g. thinkingFormat: "legacy").
4902
+ // Unrelated interior bytes/comments/key order are preserved.
4632
4903
  const interiorStart = targetCompatBrace + 1;
4633
4904
  const interior = original.slice(interiorStart, targetCompatEnd);
4634
4905
  const hasContent = interior.trim().length > 0;
4906
+ const clean = stripJsoncComments(original);
4635
4907
 
4636
- // Indent for the new key lines: copy the first existing key line's indent,
4908
+ // Indent for inserted key lines: copy the first existing key line's indent,
4637
4909
  // else derive one level deeper than the compat brace's own line.
4638
4910
  const braceLineIndent = lineIndentAt(targetCompatBrace);
4639
4911
  const innerMatch = interior.match(/\r?\n([ \t]+)\S/);
4640
4912
  const innerIndent = innerMatch ? innerMatch[1] : braceLineIndent + ' ';
4641
- const keysFormatted = formatKeys(innerIndent);
4642
4913
 
4643
- if (hasContent) {
4644
- // `{` + "\n<new keys>," + <original interior untouched> + `}`
4645
- return original.slice(0, interiorStart) + `\n${keysFormatted},` + original.slice(interiorStart);
4914
+ const edits: Array<{ start: number; end: number; text: string }> = [];
4915
+ const missingEntries: Array<[string, unknown]> = [];
4916
+
4917
+ for (const [key, value] of sortedEntries) {
4918
+ const existing = findJsonObjectKey(clean, targetCompatBrace, key);
4919
+ if (existing && existing.keyStart < targetCompatEnd) {
4920
+ const valueStart = skipJsonWhitespace(clean, existing.valueStart);
4921
+ const valueEnd = skipJsonValue(clean, valueStart);
4922
+ if (valueEnd !== undefined && valueEnd <= targetCompatEnd) {
4923
+ const nextValue = JSON.stringify(value);
4924
+ if (original.slice(valueStart, valueEnd) !== nextValue) {
4925
+ edits.push({ start: valueStart, end: valueEnd, text: nextValue });
4926
+ }
4927
+ continue;
4928
+ }
4929
+ }
4930
+ missingEntries.push([key, value]);
4646
4931
  }
4647
- // Empty compat `{}` (or whitespace only): write keys + put `}` back on its own line.
4648
- return original.slice(0, interiorStart) + `\n${keysFormatted}\n${braceLineIndent}` + original.slice(targetCompatEnd);
4932
+
4933
+ if (missingEntries.length > 0) {
4934
+ const keysFormatted = formatEntries(innerIndent, missingEntries);
4935
+ if (hasContent) {
4936
+ edits.push({ start: interiorStart, end: interiorStart, text: `\n${keysFormatted},` });
4937
+ } else {
4938
+ edits.push({ start: interiorStart, end: targetCompatEnd, text: `\n${keysFormatted}\n${braceLineIndent}` });
4939
+ }
4940
+ }
4941
+
4942
+ // Apply later edits first so earlier offsets remain valid.
4943
+ return edits
4944
+ .sort((a, b) => b.start - a.start)
4945
+ .reduce((text, edit) => text.slice(0, edit.start) + edit.text + text.slice(edit.end), original);
4649
4946
  }
4650
4947
 
4651
4948
  // ── No compat object yet: create one right after the container `{`. ──
@@ -4666,12 +4963,12 @@ function composeFixInsertion(
4666
4963
  : ' ';
4667
4964
  const innerIndent = keyIndent + unit;
4668
4965
 
4669
- const compatBlock = `\n${keyIndent}"compat": {\n${formatKeys(innerIndent)}\n${keyIndent}},`;
4966
+ const compatBlock = `\n${keyIndent}"compat": {\n${formatEntries(innerIndent, sortedEntries)}\n${keyIndent}},`;
4670
4967
  return original.slice(0, afterBrace) + compatBlock + suffix;
4671
4968
  }
4672
4969
 
4673
4970
  /**
4674
- * Self-check after compose: parse original and modified via stripJsoncComments,
4971
+ * Self-check after compose: parse original and modified as JSONC,
4675
4972
  * assert target compat flags exist in the right path, and remaining structure
4676
4973
  * is deep-equal (ignoring the inserted keys).
4677
4974
  * Returns null on success, error message on failure.
@@ -4684,17 +4981,17 @@ function selfCheckFix(
4684
4981
  compatKeys: Record<string, unknown>,
4685
4982
  ): string | null {
4686
4983
  try {
4687
- // Step 1: Parse both versions (this validates JSON syntax)
4688
- const origParsed = JSON.parse(stripJsoncComments(original));
4689
- const modParsed = JSON.parse(stripJsoncComments(modified));
4984
+ // Step 1: Parse both versions as JSONC (comments + trailing commas allowed).
4985
+ const origParsed = parseJsonc(original);
4986
+ const modParsed = parseJsonc(modified);
4690
4987
 
4691
4988
  // Step 2: Validate modified file has correct structure
4692
- const providers = modParsed?.providers;
4693
- if (!providers || typeof providers !== 'object') {
4989
+ const providers = asRecord(asRecord(modParsed)?.providers);
4990
+ if (!providers) {
4694
4991
  return "Modified file: providers object missing or invalid";
4695
4992
  }
4696
- const provider = providers[providerLabel];
4697
- if (!provider || typeof provider !== 'object') {
4993
+ const provider = asRecord(providers[providerLabel]);
4994
+ if (!provider) {
4698
4995
  return `Modified file: provider "${providerLabel}" not found`;
4699
4996
  }
4700
4997
 
@@ -4712,6 +5009,18 @@ function selfCheckFix(
4712
5009
  if (!targetModel || typeof targetModel !== 'object') {
4713
5010
  return `Modified file: model "${modelId}" not found in provider`;
4714
5011
  }
5012
+
5013
+ // Locate the corresponding original provider/model objects. The structure
5014
+ // preservation check below may allow repaired compat values to differ, but
5015
+ // only on these exact target/provider compat objects — never on siblings.
5016
+ const origProviders = asRecord(asRecord(origParsed)?.providers);
5017
+ const origProvider = asRecord(origProviders?.[providerLabel]);
5018
+ const origModels = Array.isArray(origProvider?.models) ? origProvider.models : undefined;
5019
+ const origTargetModel = origModels?.find((m: unknown) => asRecord(m)?.id === modelId);
5020
+ const origTargetModelRecord = asRecord(origTargetModel);
5021
+ if (!origProvider || !origTargetModelRecord) {
5022
+ return `Original file: provider/model "${providerLabel}/${modelId}" not found`;
5023
+ }
4715
5024
 
4716
5025
  // Step 5: Compute the EFFECTIVE merged compat (provider-level + model-level),
4717
5026
  // mirroring Pi's mergeCompat behavior (model wins on conflicts). The fix may
@@ -4754,15 +5063,23 @@ function selfCheckFix(
4754
5063
  for (const key of Object.keys(origObj)) {
4755
5064
  if (!(key in modObj)) return false;
4756
5065
  if (key === 'compat') {
4757
- // For compat, allow extra keys in modified (the inserted ones)
5066
+ // For compat, allow extra keys in modified (the inserted ones).
5067
+ // Use recursive isSubset so nested objects (e.g. { deep: true })
5068
+ // are compared by content, not reference.
4758
5069
  if (typeof origObj[key] !== 'object' || typeof modObj[key] !== 'object') {
4759
5070
  if (origObj[key] !== modObj[key]) return false;
4760
5071
  } else {
4761
- // Check all original compat keys are present and equal
4762
5072
  const origCompat = origObj[key] as Record<string, unknown>;
4763
5073
  const modCompat = modObj[key] as Record<string, unknown>;
5074
+ const mayRepairThisCompat = origObj === origProvider || origObj === origTargetModelRecord;
4764
5075
  for (const ck of Object.keys(origCompat)) {
4765
- if (origCompat[ck] !== modCompat[ck]) return false;
5076
+ if (!(ck in modCompat)) return false;
5077
+ // The fix may repair an existing wrong compat value (for example
5078
+ // thinkingFormat: "legacy" -> "deepseek"), but only on the
5079
+ // target provider/model compat objects. Sibling compat blocks must
5080
+ // remain structure-equivalent.
5081
+ if (mayRepairThisCompat && Object.prototype.hasOwnProperty.call(compatKeys, ck)) continue;
5082
+ if (!isSubset(origCompat[ck], modCompat[ck], `${path}.${ck}`)) return false;
4766
5083
  }
4767
5084
  }
4768
5085
  } else if (!isSubset(origObj[key], modObj[key], `${path}.${key}`)) {
@@ -4781,11 +5098,17 @@ function selfCheckFix(
4781
5098
  return "Modified file: content is shorter than original (possible truncation)";
4782
5099
  }
4783
5100
 
4784
- // Step 9: Validate no syntax issues by checking brackets balance
4785
- const openBraces = (modified.match(/{/g) || []).length;
4786
- const closeBraces = (modified.match(/}/g) || []).length;
4787
- if (openBraces !== closeBraces) {
4788
- return `Modified file: bracket mismatch (${openBraces} open, ${closeBraces} close)`;
5101
+ // Step 9: Validate root bracket integrity with the same string/comment-aware
5102
+ // scanner used for edits. Do not count raw braces: comments or strings may
5103
+ // legitimately contain unmatched `{` / `}` bytes.
5104
+ const modifiedClean = stripJsoncComments(modified);
5105
+ const rootStart = skipJsonWhitespace(modifiedClean, 0);
5106
+ const rootEnd = findMatchingBracket(modifiedClean, rootStart);
5107
+ if (rootEnd === undefined) {
5108
+ return "Modified file: root bracket mismatch";
5109
+ }
5110
+ if (skipJsonWhitespace(modifiedClean, rootEnd + 1) !== modifiedClean.length) {
5111
+ return "Modified file: trailing non-whitespace content after root object";
4789
5112
  }
4790
5113
 
4791
5114
  return null;
@@ -4801,8 +5124,7 @@ function selfCheckFix(
4801
5124
  function formatCompatKeysForInsertion(compatKeys: Record<string, unknown>): string {
4802
5125
  return Object.entries(compatKeys)
4803
5126
  .map(([k, v]) => {
4804
- const val = typeof v === 'string' ? `"${v}"` : String(v);
4805
- return ` ${JSON.stringify(k)}: ${val}`;
5127
+ return ` ${JSON.stringify(k)}: ${JSON.stringify(v)}`;
4806
5128
  })
4807
5129
  .join(',\n');
4808
5130
  }
@@ -4852,6 +5174,7 @@ export const __internals_for_tests = {
4852
5174
  isOpenAIFamilyToken,
4853
5175
  describeMissingOpenAIFamilyProxyCompat,
4854
5176
  describeMissingOpenAICompatibleProxyCompat,
5177
+ describeOptionalOpenAICompatibleProxyCompat,
4855
5178
  describeMissingDeepSeekCompat,
4856
5179
  isDeepSeekCompatCheckApplicable,
4857
5180
  describeMissingCacheCompatForModel,
@@ -5022,6 +5345,18 @@ export const __internals_for_tests = {
5022
5345
  parsePersistedRoutedModelRef,
5023
5346
  routedModelRefToPiModel,
5024
5347
  buildExactRouterStatusEntry,
5348
+ // Routing-provider protocol helpers
5349
+ PI_ROUTING_REGISTRY_SYMBOL,
5350
+ PI_CACHE_HINTS_SYMBOL,
5351
+ ensureRoutingRegistry,
5352
+ getRoutingRegistry,
5353
+ parseRouteSnapshot,
5354
+ resolveActiveRouteSnapshot,
5355
+ routeSnapshotToPiModel,
5356
+ resolveRouteModel,
5357
+ isVirtualRoutingModel,
5358
+ installCacheHintsService,
5359
+ getCacheHintsService,
5025
5360
  // Persistence helpers (for reload/reset tests)
5026
5361
  mergeCacheSessions,
5027
5362
  mergeLastRoutedModels,
@@ -5033,10 +5368,14 @@ export const __internals_for_tests = {
5033
5368
  // JSONC surgical edit helpers
5034
5369
  MODELS_JSON_PATH,
5035
5370
  stripJsoncComments,
5371
+ stripJsoncTrailingCommas,
5372
+ parseJsonc,
5036
5373
  locateModelInJsonc,
5037
5374
  composeFixInsertion,
5038
5375
  selfCheckFix,
5039
5376
  decideFixPlacement,
5377
+ chooseFixPlacement,
5378
+ findExistingCompatKeysInJsonc,
5040
5379
  deepEqualIgnoringKeys,
5041
5380
  formatCompatKeysForInsertion,
5042
5381
  backupTimestamp,
@@ -5065,6 +5404,7 @@ export default function (pi: ExtensionAPI) {
5065
5404
  let currentSessionHash = "";
5066
5405
  let currentSessionHashSet = false;
5067
5406
  let lastActualRoutedModel: PersistedRoutedModelRef | undefined;
5407
+ let latestCacheHint: PiCacheHintSnapshot | undefined;
5068
5408
  const PERSIST_DEBOUNCE_MS = 2000;
5069
5409
  /** In-memory recent usage samples per model key (not persisted, cleared on reload). */
5070
5410
  const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
@@ -5079,6 +5419,28 @@ export default function (pi: ExtensionAPI) {
5079
5419
  }
5080
5420
  }
5081
5421
 
5422
+ const uninstallCacheHintsService = installCacheHintsService({
5423
+ version: 1,
5424
+ getHints(input: PiCacheHintsInput): PiCacheHintsOutput | undefined {
5425
+ if (!runtimeOptimizerEnabled || isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV])) return undefined;
5426
+ const hint = latestCacheHint;
5427
+ if (!hint) return undefined;
5428
+ if (input.sessionIdHash && hint.sessionIdHash && input.sessionIdHash !== hint.sessionIdHash) return undefined;
5429
+ if (input.virtualProvider && hint.virtualProvider && input.virtualProvider !== hint.virtualProvider) return undefined;
5430
+ if (input.virtualModelId && hint.virtualModelId && input.virtualModelId !== hint.virtualModelId) return undefined;
5431
+ if (input.upstreamProvider && hint.upstreamProvider && input.upstreamProvider !== hint.upstreamProvider) return undefined;
5432
+ if (input.upstreamModelId && hint.upstreamModelId && input.upstreamModelId !== hint.upstreamModelId) return undefined;
5433
+ if (input.api && hint.api && input.api !== hint.api) return undefined;
5434
+
5435
+ return {
5436
+ systemPrompt: hint.systemPrompt,
5437
+ promptCacheKey: hint.promptCacheKey,
5438
+ cacheRetention: hint.cacheRetention,
5439
+ };
5440
+ },
5441
+ });
5442
+ void uninstallCacheHintsService;
5443
+
5082
5444
  /**
5083
5445
  * Build a session-scoped stats key from the current session hash + model key.
5084
5446
  * Returns `${sessionHash}:${provider}/${id}`.
@@ -5162,6 +5524,13 @@ export default function (pi: ExtensionAPI) {
5162
5524
  return created;
5163
5525
  }
5164
5526
 
5527
+ function resetStatsForModel(model: PiModel): void {
5528
+ const sk = sessionModelKey(model);
5529
+ delete cacheStatsByModel[sk];
5530
+ recentSamplesByModelKey.delete(sk);
5531
+ lastStatusText = undefined;
5532
+ }
5533
+
5165
5534
  function resetCurrentSessionStats(): void {
5166
5535
  const prefix = `${currentSessionHash || "_nosession"}:`;
5167
5536
  for (const key of Object.keys(cacheStatsByModel)) {
@@ -5170,6 +5539,7 @@ export default function (pi: ExtensionAPI) {
5170
5539
  for (const key of Array.from(recentSamplesByModelKey.keys())) {
5171
5540
  if (key.startsWith(prefix)) recentSamplesByModelKey.delete(key);
5172
5541
  }
5542
+ lastActualRoutedModel = undefined;
5173
5543
  lastStatusText = undefined;
5174
5544
  }
5175
5545
 
@@ -5336,9 +5706,13 @@ export default function (pi: ExtensionAPI) {
5336
5706
  syncSessionHash(ctx);
5337
5707
  await rollOverStatsIfNeeded(ctx);
5338
5708
 
5339
- const adapter = selectAdapterForModel(model);
5709
+ const routedModel = resolveRouteModel(model, ctx);
5710
+ const displayModel = routedModel ?? model;
5711
+ const adapter = selectAdapterForModel(displayModel);
5712
+ const activeIsVirtualRoute = !!routedModel || isVirtualRoutingModel(model, ctx);
5340
5713
  let statusText: string | undefined;
5341
- if (!adapter && isRouterModel(model)) {
5714
+
5715
+ if (!adapter && !routedModel && activeIsVirtualRoute) {
5342
5716
  // On model_select (existing footer), keep the existing cache footer
5343
5717
  // visible instead of clearing it. On session_start (no footer yet
5344
5718
  // after reload/fresh start), restore the exact last actual routed model
@@ -5361,8 +5735,8 @@ export default function (pi: ExtensionAPI) {
5361
5735
  if (adapter) {
5362
5736
  // Display session-scoped stats. A model that has never been used
5363
5737
  // in this session shows 0/0. The message_end hook populates
5364
- // cacheStatsByModel[sessionModelKey(model)] on first use.
5365
- const sk = model ? sessionModelKey(model) : undefined;
5738
+ // cacheStatsByModel[sessionModelKey(displayModel)] on first use.
5739
+ const sk = displayModel ? sessionModelKey(displayModel) : undefined;
5366
5740
  const stats = sk ? cacheStatsByModel[sk] : undefined;
5367
5741
  const statsText = formatCacheStats(adapter, stats ?? emptyCacheStats());
5368
5742
  statusText = runtimeOptimizerEnabled ? statsText : `Cache Optimizer disabled · ${statsText}`;
@@ -5399,9 +5773,12 @@ export default function (pi: ExtensionAPI) {
5399
5773
  // Re-evaluated on every status update so the marker persists through stats
5400
5774
  // changes and day rollovers. Redundant setStatus calls are blocked by the
5401
5775
  // `lastStatusText` early return above.
5402
- if (runtimeOptimizerEnabled && statusText !== undefined && model) {
5403
- const compatMissing = describeMissingCacheCompatForModel(model);
5404
- if (compatMissing.length > 0) {
5776
+ if (runtimeOptimizerEnabled && statusText !== undefined && displayModel) {
5777
+ // Only show ⚠️ compat when there are safe-fixable missing compat keys.
5778
+ // Optional/advisory-only flags (e.g. supportsLongCacheRetention on generic
5779
+ // OpenAI-compatible proxies) do NOT trigger the marker — the doctor/compat
5780
+ // commands still mention them as optional guidance.
5781
+ if (buildFixSuggestion(displayModel) !== undefined) {
5405
5782
  statusText = statusText + " ⚠️ compat";
5406
5783
  }
5407
5784
  }
@@ -5412,18 +5789,26 @@ export default function (pi: ExtensionAPI) {
5412
5789
  ctx.ui.setStatus(STATUS_KEY, statusText);
5413
5790
  }
5414
5791
 
5792
+ ensureRoutingRegistry();
5793
+
5415
5794
  pi.on("session_start", async (event, ctx) => {
5416
5795
  await restoreCacheStats(event.reason, ctx);
5417
- if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(ctx.model, ctx, warnedModels);
5796
+ if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(resolveRouteModel(ctx.model, ctx) ?? ctx.model, ctx, warnedModels);
5418
5797
  await publishStatus(ctx);
5419
5798
  });
5420
5799
 
5421
5800
  pi.on("model_select", async (event, ctx) => {
5422
- if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(event.model, ctx, warnedModels);
5801
+ if (runtimeOptimizerEnabled) notifyCacheCompatIfNeeded(resolveRouteModel(event.model, ctx) ?? event.model, ctx, warnedModels);
5423
5802
  await publishStatus(ctx, event.model);
5424
5803
  });
5425
5804
 
5426
5805
  pi.on("before_agent_start", async (event, _ctx) => {
5806
+ latestCacheHint = undefined;
5807
+ const routeSnapshot = resolveActiveRouteSnapshot(_ctx.model, _ctx);
5808
+ const routedModel = routeSnapshot
5809
+ ? findModelInRegistry(_ctx.modelRegistry, routeSnapshot.provider, routeSnapshot.modelId) ?? routeSnapshotToPiModel(routeSnapshot, _ctx.model)
5810
+ : undefined;
5811
+
5427
5812
  // ────────────────────────────────────────────────────────────────
5428
5813
  // OpenAI Responses-family bypass (codex-responses + responses + azure responses)
5429
5814
  //
@@ -5450,7 +5835,7 @@ export default function (pi: ExtensionAPI) {
5450
5835
  // compression, reorder) for these APIs. Third-party providers
5451
5836
  // that use openai-completions are unaffected.
5452
5837
  // ────────────────────────────────────────────────────────────────
5453
- const model = _ctx.model;
5838
+ const model = routedModel ?? _ctx.model;
5454
5839
  if (model && isResponsesPromptRewriteBypassApi(model.api)) {
5455
5840
  return {};
5456
5841
  }
@@ -5488,7 +5873,27 @@ export default function (pi: ExtensionAPI) {
5488
5873
  // ships to the provider.
5489
5874
  const optimized = optimizeSystemPrompt(compressedPrompt, event.systemPromptOptions);
5490
5875
 
5876
+ const promptCacheKey = getSessionPromptCacheKey(_ctx);
5877
+ const cacheRetention = process.env[PI_CACHE_RETENTION_ENV] === LONG_CACHE_RETENTION_VALUE ? LONG_CACHE_RETENTION_VALUE : undefined;
5878
+ const publishHint = (systemPrompt: string): void => {
5879
+ latestCacheHint = {
5880
+ sessionIdHash: currentSessionHashSet ? currentSessionHash : sessionHashFromContext(_ctx),
5881
+ virtualProvider: routeSnapshot?.virtualProvider ?? _ctx.model?.provider,
5882
+ virtualModelId: routeSnapshot?.virtualModelId ?? _ctx.model?.id,
5883
+ upstreamProvider: routeSnapshot?.provider ?? model?.provider,
5884
+ upstreamModelId: routeSnapshot?.modelId ?? model?.id,
5885
+ api: model?.api,
5886
+ systemPrompt,
5887
+ promptCacheKey,
5888
+ cacheRetention,
5889
+ timestamp: Date.now(),
5890
+ };
5891
+ const globals = getProtocolGlobal();
5892
+ globals.__piCacheOptimizerCacheKey__ = promptCacheKey;
5893
+ };
5894
+
5491
5895
  if (optimized.changed && optimized.systemPrompt.trim().length > 0) {
5896
+ publishHint(optimized.systemPrompt);
5492
5897
  return { systemPrompt: optimized.systemPrompt };
5493
5898
  }
5494
5899
 
@@ -5497,24 +5902,28 @@ export default function (pi: ExtensionAPI) {
5497
5902
  // the volume cut even when reorder is a no-op (e.g., short sessions
5498
5903
  // where no stable candidate is long enough).
5499
5904
  if (compressedPrompt !== strippedPrompt && compressedPrompt.trim().length > 0) {
5905
+ publishHint(compressedPrompt);
5500
5906
  return { systemPrompt: compressedPrompt };
5501
5907
  }
5502
5908
  if (strippedPrompt !== event.systemPrompt && strippedPrompt.trim().length > 0) {
5909
+ publishHint(strippedPrompt);
5503
5910
  return { systemPrompt: strippedPrompt };
5504
5911
  }
5505
5912
 
5913
+ publishHint(event.systemPrompt);
5506
5914
  return {};
5507
5915
  });
5508
5916
 
5509
5917
  pi.on("before_provider_request", (event, ctx) => {
5510
5918
  if (!shouldInjectOpenAIPromptCacheKey()) return undefined;
5511
- if (!isOpenAICompatibleApi(ctx.model?.api)) return undefined;
5919
+ const requestModel = resolveRouteModel(ctx.model, ctx) ?? ctx.model;
5920
+ if (!isOpenAICompatibleApi(requestModel?.api)) return undefined;
5512
5921
 
5513
5922
  return addOpenAIPromptCacheKey(event.payload, getSessionPromptCacheKey(ctx));
5514
5923
  });
5515
5924
 
5516
5925
  pi.on("after_provider_response", (event, ctx) => {
5517
- const model = ctx.model;
5926
+ const model = resolveRouteModel(ctx.model, ctx) ?? ctx.model;
5518
5927
  if (!runtimeOptimizerEnabled || !model) return;
5519
5928
  if (event.status !== 400) return;
5520
5929
  if (!isPromptCacheRetention400Applicable(model)) return;
@@ -5539,9 +5948,12 @@ export default function (pi: ExtensionAPI) {
5539
5948
 
5540
5949
  const usage = adapter.normalizeUsage(event.message);
5541
5950
 
5542
- const statsModel = isRouterModel(ctx.model) ? modelFromAssistantMessage(event.message, ctx.model) : ctx.model;
5951
+ // Completed message metadata is request-local and authoritative for virtual
5952
+ // routing providers. Use it whenever it supplies provider/model identity;
5953
+ // fall back to the active context model for direct providers.
5954
+ const statsModel = modelFromAssistantMessage(event.message, ctx.model) ?? ctx.model;
5543
5955
  let routedModelChanged = false;
5544
- if (isRouterModel(ctx.model) && statsModel && !isRouterModel(statsModel)) {
5956
+ if (isVirtualRoutingModel(ctx.model, ctx) && statsModel && !isVirtualRoutingModel(statsModel, ctx)) {
5545
5957
  const nextRoutedModel: PersistedRoutedModelRef = {
5546
5958
  provider: statsModel.provider,
5547
5959
  id: statsModel.id,
@@ -5604,7 +6016,8 @@ export default function (pi: ExtensionAPI) {
5604
6016
  description: "Diagnose Pi cache configuration",
5605
6017
  handler: async (args: string, cmdCtx) => {
5606
6018
  syncSessionHash(cmdCtx);
5607
- const model = cmdCtx.model;
6019
+ const selectedModel = cmdCtx.model;
6020
+ const model = resolveRouteModel(selectedModel, cmdCtx as unknown as ExtensionContext) ?? selectedModel;
5608
6021
  const subcommand = args.trim().toLowerCase().split(/\s+/)[0] || "help";
5609
6022
 
5610
6023
  if (subcommand === "enable") {
@@ -5672,14 +6085,12 @@ export default function (pi: ExtensionAPI) {
5672
6085
  return;
5673
6086
  }
5674
6087
 
5675
- const sk = sessionModelKey(model);
5676
6088
  const displayKey = modelKey(model);
5677
6089
 
5678
- // Reset session-scoped stats for the active model.
5679
- delete cacheStatsByModel[sk];
5680
-
5681
- // Clear recent samples for this session+model key.
5682
- recentSamplesByModelKey.delete(sk);
6090
+ // Reset session-scoped stats for the effective active model. If the
6091
+ // selected model is a virtual router and the protocol exposes a live
6092
+ // route, this clears the real upstream bucket, not the router shell.
6093
+ resetStatsForModel(model);
5683
6094
 
5684
6095
  // Persist immediately.
5685
6096
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
@@ -5755,7 +6166,7 @@ export default function (pi: ExtensionAPI) {
5755
6166
 
5756
6167
  // Compose the modified text — auto-detect the best placement level:
5757
6168
  // provider level (channel-wide) when safe for all sibling models, else model level.
5758
- const decision = decideFixPlacement(suggestion.compatKeys, suggestion.providerLabel, location.allModelIds);
6169
+ const decision = chooseFixPlacement(originalText, location, suggestion.compatKeys, suggestion.providerLabel);
5759
6170
  const modifiedText = composeFixInsertion(originalText, location, suggestion.compatKeys, decision.placement);
5760
6171
 
5761
6172
  // Self-check
@@ -5769,10 +6180,9 @@ export default function (pi: ExtensionAPI) {
5769
6180
  return;
5770
6181
  }
5771
6182
 
5772
- // Build preview snippet
5773
- const keysPreview = Object.entries(suggestion.compatKeys)
5774
- .map(([k, v]) => ` ${k}: ${JSON.stringify(v)}`)
5775
- .join("\n");
6183
+ // Build preview snippet as copyable JSON (the surgical editor will
6184
+ // insert or repair these exact compat key/value pairs).
6185
+ const keysPreview = JSON.stringify(suggestion.compatKeys, null, 2);
5776
6186
  const targetHasCompat = decision.placement === "provider" ? location.providerCompatBrace >= 0 : location.compatObjectBrace >= 0;
5777
6187
  const placementDesc = targetHasCompat ? `existing "compat" object` : `new "compat" object`;
5778
6188
  const locationDesc = decision.placement === "provider"
@@ -5791,7 +6201,7 @@ export default function (pi: ExtensionAPI) {
5791
6201
  ``,
5792
6202
  `Location: ${locationDesc}`,
5793
6203
  `Placement: ${decision.placement} level — ${decision.reason}`,
5794
- `Keys to insert:`,
6204
+ `Compat JSON to write:`,
5795
6205
  keysPreview,
5796
6206
  ``,
5797
6207
  `⚠️ Risk notice:`,
@@ -5948,7 +6358,7 @@ export default function (pi: ExtensionAPI) {
5948
6358
  return;
5949
6359
  }
5950
6360
 
5951
- const menuDecision = decideFixPlacement(suggestion.compatKeys, suggestion.providerLabel, location.allModelIds);
6361
+ const menuDecision = chooseFixPlacement(originalText, location, suggestion.compatKeys, suggestion.providerLabel);
5952
6362
  const modifiedText = composeFixInsertion(originalText, location, suggestion.compatKeys, menuDecision.placement);
5953
6363
  const checkError = selfCheckFix(originalText, modifiedText, suggestion.providerLabel, suggestion.modelId, suggestion.compatKeys);
5954
6364
  if (checkError !== null) {
@@ -5956,9 +6366,7 @@ export default function (pi: ExtensionAPI) {
5956
6366
  return;
5957
6367
  }
5958
6368
 
5959
- const keysPreview = Object.entries(suggestion.compatKeys)
5960
- .map(([k, v]) => ` ${k}: ${JSON.stringify(v)}`)
5961
- .join("\n");
6369
+ const keysPreview = JSON.stringify(suggestion.compatKeys, null, 2);
5962
6370
  const ts = backupTimestamp();
5963
6371
  const backupPath = `${MODELS_JSON_PATH}.backup-cache-optimizer-${ts}`;
5964
6372
 
@@ -5973,7 +6381,7 @@ export default function (pi: ExtensionAPI) {
5973
6381
  `📝 Preview of changes to ${getModelsJsonDisplayPath()}:`,
5974
6382
  `Location: ${menuLocationDesc}`,
5975
6383
  `Placement: ${menuDecision.placement} level — ${menuDecision.reason}`,
5976
- `Keys to insert:`,
6384
+ `Compat JSON to write:`,
5977
6385
  keysPreview,
5978
6386
  ``,
5979
6387
  `⚠️ Risk notice:`,
@@ -6025,10 +6433,8 @@ export default function (pi: ExtensionAPI) {
6025
6433
  if (!adapter) {
6026
6434
  cmdCtx.ui.notify("ℹ️ Active model does not match a cache adapter. No stats to reset.", "info");
6027
6435
  } else {
6028
- const sk = sessionModelKey(model);
6029
6436
  const displayKey = modelKey(model);
6030
- delete cacheStatsByModel[sk];
6031
- recentSamplesByModelKey.delete(sk);
6437
+ resetStatsForModel(model);
6032
6438
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6033
6439
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6034
6440
  cmdCtx.ui.notify(