pi-cache-optimizer 2.4.6 → 2.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/README.md +64 -4
  2. package/README.zh-CN.md +48 -1
  3. package/index.ts +1203 -141
  4. package/package.json +1 -1
package/index.ts CHANGED
@@ -91,6 +91,15 @@ const MIN_STABLE_CANDIDATE_LENGTH = 8;
91
91
  const ASSISTANT_MESSAGE_MODEL_TOKEN_KEYS = ["model", "name"];
92
92
  const OPENAI_REASONING_MODEL_PATTERN = /(^|[/\s:_-])o[1345]($|[-_.:/\s])/;
93
93
  const XAI_MODEL_PATTERN = /(^|[/\s:_-])xai($|[-_.:/\s])/;
94
+ const PPLX_MODEL_PATTERN = /(^|[/\s:_-])pplx($|[-_.:/\s])/i;
95
+ const NOVA_MODEL_PATTERN = /(^|[/\s:_-])nova($|[-_.:/\s])/i;
96
+ const MPT_MODEL_PATTERN = /(^|[/\s:_-])mpt($|[-_.:/\s])/i;
97
+ const ALEPH_MODEL_PATTERN = /(^|[/\s:_-])aleph($|[-_.:/\s])/i;
98
+
99
+ // Safe-boundary patterns for models with short or ambiguous tokens
100
+ const ARCTIC_MODEL_PATTERN = /(^|[\/\s:_-])arctic($|[\-_.:\/\s])/i;
101
+ const AYA_MODEL_PATTERN = /(^|[\/\s:_-])aya($|[\-_.:\/\s])/i;
102
+ const ORION_MODEL_PATTERN = /(^|[\/\s:_-])orion($|[\-_.:\/\s])/i;
94
103
 
95
104
  type CacheCompat = {
96
105
  sendSessionAffinityHeaders?: boolean;
@@ -847,6 +856,285 @@ function isSolarLikeAssistantMessage(message: unknown, model: PiModel | undefine
847
856
  return modelOrAssistantMessageHas(message, model, ["solar", "upstage"]);
848
857
  }
849
858
 
859
+ // ── New OpenAI-compatible model detection (batch 3, 12 families) ──────
860
+
861
+ // Perplexity / Sonar
862
+ function isPerplexityLikeModel(model: PiModel | undefined): boolean {
863
+ const tokens = getModelIdNameTokenValues(model);
864
+ return hasAnyTokenContaining(tokens, ["sonar", "perplexity"]) || tokens.some((t) => PPLX_MODEL_PATTERN.test(t));
865
+ }
866
+ function isPerplexityLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
867
+ const allTokens = [
868
+ ...getModelIdNameTokenValues(model),
869
+ ...getAssistantMessageModelTokenValues(message),
870
+ ];
871
+ return hasAnyTokenContaining(allTokens, ["sonar", "perplexity"]) || allTokens.some((t) => PPLX_MODEL_PATTERN.test(t));
872
+ }
873
+
874
+ // Amazon Nova
875
+ function isNovaLikeModel(model: PiModel | undefined): boolean {
876
+ const tokens = getModelIdNameTokenValues(model);
877
+ return hasAnyTokenContaining(tokens, ["amazon-nova"]) || tokens.some((t) => NOVA_MODEL_PATTERN.test(t));
878
+ }
879
+ function isNovaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
880
+ const allTokens = [
881
+ ...getModelIdNameTokenValues(model),
882
+ ...getAssistantMessageModelTokenValues(message),
883
+ ];
884
+ return hasAnyTokenContaining(allTokens, ["amazon-nova"]) || allTokens.some((t) => NOVA_MODEL_PATTERN.test(t));
885
+ }
886
+
887
+ // Reka
888
+ function isRekaLikeModel(model: PiModel | undefined): boolean {
889
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["reka"]);
890
+ }
891
+ function isRekaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
892
+ return modelOrAssistantMessageHas(message, model, ["reka"]);
893
+ }
894
+
895
+ // Falcon / TII
896
+ function isFalconLikeModel(model: PiModel | undefined): boolean {
897
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["falcon", "tiiuae"]);
898
+ }
899
+ function isFalconLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
900
+ return modelOrAssistantMessageHas(message, model, ["falcon", "tiiuae"]);
901
+ }
902
+
903
+ // Databricks DBRX
904
+ function isDbrxLikeModel(model: PiModel | undefined): boolean {
905
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["dbrx", "databricks"]);
906
+ }
907
+ function isDbrxLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
908
+ return modelOrAssistantMessageHas(message, model, ["dbrx", "databricks"]);
909
+ }
910
+
911
+ // MosaicML MPT
912
+ function isMptLikeModel(model: PiModel | undefined): boolean {
913
+ const tokens = getModelIdNameTokenValues(model);
914
+ return hasAnyTokenContaining(tokens, ["mosaicml", "mpt-"]) || tokens.some((t) => MPT_MODEL_PATTERN.test(t));
915
+ }
916
+ function isMptLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
917
+ const allTokens = [
918
+ ...getModelIdNameTokenValues(model),
919
+ ...getAssistantMessageModelTokenValues(message),
920
+ ];
921
+ return hasAnyTokenContaining(allTokens, ["mosaicml", "mpt-"]) || allTokens.some((t) => MPT_MODEL_PATTERN.test(t));
922
+ }
923
+
924
+ // StableLM / Stability AI
925
+ function isStableLMLikeModel(model: PiModel | undefined): boolean {
926
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["stablelm", "stable-lm", "stability-ai"]);
927
+ }
928
+ function isStableLMLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
929
+ return modelOrAssistantMessageHas(message, model, ["stablelm", "stable-lm", "stability-ai"]);
930
+ }
931
+
932
+ // BAAI / Aquila
933
+ function isAquilaLikeModel(model: PiModel | undefined): boolean {
934
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["aquila", "baai"]);
935
+ }
936
+ function isAquilaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
937
+ return modelOrAssistantMessageHas(message, model, ["aquila", "baai"]);
938
+ }
939
+
940
+ // LG EXAONE
941
+ function isExaoneLikeModel(model: PiModel | undefined): boolean {
942
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["exaone"]);
943
+ }
944
+ function isExaoneLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
945
+ return modelOrAssistantMessageHas(message, model, ["exaone"]);
946
+ }
947
+
948
+ // Naver HyperCLOVA X (conservative: hyperclova, clova-x only)
949
+ function isHyperCLOVALikeModel(model: PiModel | undefined): boolean {
950
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["hyperclova", "clova-x"]);
951
+ }
952
+ function isHyperCLOVALikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
953
+ return modelOrAssistantMessageHas(message, model, ["hyperclova", "clova-x"]);
954
+ }
955
+
956
+ // Aleph Alpha Luminous
957
+ function isLuminousLikeModel(model: PiModel | undefined): boolean {
958
+ const tokens = getModelIdNameTokenValues(model);
959
+ return hasAnyTokenContaining(tokens, ["luminous", "aleph-alpha"]) || tokens.some((t) => ALEPH_MODEL_PATTERN.test(t));
960
+ }
961
+ function isLuminousLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
962
+ const allTokens = [
963
+ ...getModelIdNameTokenValues(model),
964
+ ...getAssistantMessageModelTokenValues(message),
965
+ ];
966
+ return hasAnyTokenContaining(allTokens, ["luminous", "aleph-alpha"]) || allTokens.some((t) => ALEPH_MODEL_PATTERN.test(t));
967
+ }
968
+
969
+ // Nous / Hermes / OpenHermes
970
+ function isHermesLikeModel(model: PiModel | undefined): boolean {
971
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["nous", "hermes", "openhermes"]);
972
+ }
973
+ function isHermesLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
974
+ return modelOrAssistantMessageHas(message, model, ["nous", "hermes", "openhermes"]);
975
+ }
976
+
977
+ // ── More OpenAI-compatible model detection (batch 4, 18 families) ──
978
+
979
+ // IBM Granite
980
+ function isGraniteLikeModel(model: PiModel | undefined): boolean {
981
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["granite", "ibm-granite"]);
982
+ }
983
+ function isGraniteLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
984
+ return modelOrAssistantMessageHas(message, model, ["granite", "ibm-granite"]);
985
+ }
986
+
987
+ // Snowflake Arctic
988
+ function isArcticLikeModel(model: PiModel | undefined): boolean {
989
+ const tokens = getModelIdNameTokenValues(model);
990
+ return hasAnyTokenContaining(tokens, ["snowflake-arctic"]) || tokens.some((t) => ARCTIC_MODEL_PATTERN.test(t));
991
+ }
992
+ function isArcticLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
993
+ const allTokens = [
994
+ ...getModelIdNameTokenValues(model),
995
+ ...getAssistantMessageModelTokenValues(message),
996
+ ];
997
+ return hasAnyTokenContaining(allTokens, ["snowflake-arctic"]) || allTokens.some((t) => ARCTIC_MODEL_PATTERN.test(t));
998
+ }
999
+
1000
+ // Huawei Pangu / 盘古
1001
+ function isPanguLikeModel(model: PiModel | undefined): boolean {
1002
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["pangu", "pan-gu", "盘古", "huawei-pangu"]);
1003
+ }
1004
+ function isPanguLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
1005
+ return modelOrAssistantMessageHas(message, model, ["pangu", "pan-gu", "盘古", "huawei-pangu"]);
1006
+ }
1007
+
1008
+ // SenseTime SenseNova / 商汤
1009
+ function isSenseNovaLikeModel(model: PiModel | undefined): boolean {
1010
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["sensenova", "sense-nova", "sensechat", "商汤"]);
1011
+ }
1012
+ function isSenseNovaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
1013
+ return modelOrAssistantMessageHas(message, model, ["sensenova", "sense-nova", "sensechat", "商汤"]);
1014
+ }
1015
+
1016
+ // 360 Zhinao / 智脑
1017
+ function isZhinaoLikeModel(model: PiModel | undefined): boolean {
1018
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["360gpt", "360-gpt", "zhinao", "智脑"]);
1019
+ }
1020
+ function isZhinaoLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
1021
+ return modelOrAssistantMessageHas(message, model, ["360gpt", "360-gpt", "zhinao", "智脑"]);
1022
+ }
1023
+
1024
+ // OpenBMB MiniCPM
1025
+ function isMiniCPMLikeModel(model: PiModel | undefined): boolean {
1026
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["minicpm", "mini-cpm", "openbmb"]);
1027
+ }
1028
+ function isMiniCPMLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
1029
+ return modelOrAssistantMessageHas(message, model, ["minicpm", "mini-cpm", "openbmb"]);
1030
+ }
1031
+
1032
+ // XVERSE
1033
+ function isXVerseLikeModel(model: PiModel | undefined): boolean {
1034
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["xverse"]);
1035
+ }
1036
+ function isXVerseLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
1037
+ return modelOrAssistantMessageHas(message, model, ["xverse"]);
1038
+ }
1039
+
1040
+ // OrionStar Orion
1041
+ function isOrionLikeModel(model: PiModel | undefined): boolean {
1042
+ const tokens = getModelIdNameTokenValues(model);
1043
+ return hasAnyTokenContaining(tokens, ["orionstar", "orion-star"]) || tokens.some((t) => ORION_MODEL_PATTERN.test(t));
1044
+ }
1045
+ function isOrionLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
1046
+ const allTokens = [
1047
+ ...getModelIdNameTokenValues(model),
1048
+ ...getAssistantMessageModelTokenValues(message),
1049
+ ];
1050
+ return hasAnyTokenContaining(allTokens, ["orionstar", "orion-star"]) || allTokens.some((t) => ORION_MODEL_PATTERN.test(t));
1051
+ }
1052
+
1053
+ // OpenChat
1054
+ function isOpenChatLikeModel(model: PiModel | undefined): boolean {
1055
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["openchat"]);
1056
+ }
1057
+ function isOpenChatLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
1058
+ return modelOrAssistantMessageHas(message, model, ["openchat"]);
1059
+ }
1060
+
1061
+ // Vicuna
1062
+ function isVicunaLikeModel(model: PiModel | undefined): boolean {
1063
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["vicuna"]);
1064
+ }
1065
+ function isVicunaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
1066
+ return modelOrAssistantMessageHas(message, model, ["vicuna"]);
1067
+ }
1068
+
1069
+ // WizardLM / WizardCoder
1070
+ function isWizardLikeModel(model: PiModel | undefined): boolean {
1071
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["wizardlm", "wizard-lm", "wizardcoder", "wizard-coder"]);
1072
+ }
1073
+ function isWizardLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
1074
+ return modelOrAssistantMessageHas(message, model, ["wizardlm", "wizard-lm", "wizardcoder", "wizard-coder"]);
1075
+ }
1076
+
1077
+ // Zephyr
1078
+ function isZephyrLikeModel(model: PiModel | undefined): boolean {
1079
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["zephyr"]);
1080
+ }
1081
+ function isZephyrLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
1082
+ return modelOrAssistantMessageHas(message, model, ["zephyr"]);
1083
+ }
1084
+
1085
+ // Dolphin
1086
+ function isDolphinLikeModel(model: PiModel | undefined): boolean {
1087
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["dolphin"]);
1088
+ }
1089
+ function isDolphinLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
1090
+ return modelOrAssistantMessageHas(message, model, ["dolphin"]);
1091
+ }
1092
+
1093
+ // OpenOrca
1094
+ function isOpenOrcaLikeModel(model: PiModel | undefined): boolean {
1095
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["openorca", "open-orca"]);
1096
+ }
1097
+ function isOpenOrcaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
1098
+ return modelOrAssistantMessageHas(message, model, ["openorca", "open-orca"]);
1099
+ }
1100
+
1101
+ // Starling
1102
+ function isStarlingLikeModel(model: PiModel | undefined): boolean {
1103
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["starling"]);
1104
+ }
1105
+ function isStarlingLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
1106
+ return modelOrAssistantMessageHas(message, model, ["starling"]);
1107
+ }
1108
+
1109
+ // BLOOM / BigScience
1110
+ function isBloomLikeModel(model: PiModel | undefined): boolean {
1111
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["bloom", "bigscience"]);
1112
+ }
1113
+ function isBloomLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
1114
+ return modelOrAssistantMessageHas(message, model, ["bloom", "bigscience"]);
1115
+ }
1116
+
1117
+ // RWKV
1118
+ function isRwkvLikeModel(model: PiModel | undefined): boolean {
1119
+ return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["rwkv"]);
1120
+ }
1121
+ function isRwkvLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
1122
+ return modelOrAssistantMessageHas(message, model, ["rwkv"]);
1123
+ }
1124
+
1125
+ // Cohere Aya
1126
+ function isAyaLikeModel(model: PiModel | undefined): boolean {
1127
+ const tokens = getModelIdNameTokenValues(model);
1128
+ return hasAnyTokenContaining(tokens, ["aya-expanse"]) || tokens.some((t) => AYA_MODEL_PATTERN.test(t));
1129
+ }
1130
+ function isAyaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
1131
+ const allTokens = [
1132
+ ...getModelIdNameTokenValues(model),
1133
+ ...getAssistantMessageModelTokenValues(message),
1134
+ ];
1135
+ return hasAnyTokenContaining(allTokens, ["aya-expanse"]) || allTokens.some((t) => AYA_MODEL_PATTERN.test(t));
1136
+ }
1137
+
850
1138
  // ── Model key ──────────────────────────────────────────────────────
851
1139
 
852
1140
  function modelKey(model: PiModel): string {
@@ -1594,134 +1882,646 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
1594
1882
  return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
1595
1883
  },
1596
1884
  },
1597
- ];
1598
-
1599
- function selectAdapterForModel(model: PiModel | undefined): CacheProviderAdapter | undefined {
1600
- return CACHE_PROVIDER_ADAPTERS.find((adapter) => adapter.matchesModel(model));
1601
- }
1602
-
1603
- function selectAdapterForAssistantMessage(message: unknown, model: PiModel | undefined): CacheProviderAdapter | undefined {
1604
- return CACHE_PROVIDER_ADAPTERS.find((adapter) => adapter.matchesAssistantMessage(message, model));
1605
- }
1606
-
1607
- function notifyCacheCompatIfNeeded(
1608
- model: PiModel | undefined,
1609
- ctx: ExtensionContext,
1610
- warnedModels: Set<string>,
1611
- ): void {
1612
- if (!model) return;
1613
-
1614
- const adapter = selectAdapterForModel(model);
1615
- const text = adapter?.warningText?.(model);
1616
- if (!adapter || !text) return;
1617
-
1618
- const key = `${adapter.id}:${modelKey(model)}`;
1619
- if (warnedModels.has(key)) return;
1620
- warnedModels.add(key);
1621
-
1622
- ctx.ui.notify(text, "warning");
1623
- }
1624
-
1625
- function currentLocalDay(): string {
1626
- const now = new Date();
1627
- const year = now.getFullYear();
1628
- const month = String(now.getMonth() + 1).padStart(2, "0");
1629
- const day = String(now.getDate()).padStart(2, "0");
1630
- return `${year}-${month}-${day}`;
1631
- }
1632
-
1633
- function emptyCacheStats(day = currentLocalDay()): CacheStats {
1634
- return {
1635
- day,
1636
- totalRequests: 0,
1637
- hitRequests: 0,
1638
- cachedInputTokens: 0,
1639
- cacheWriteInputTokens: 0,
1640
- totalInputTokens: 0,
1641
- };
1642
- }
1643
-
1644
- function emptyAllCacheStats(day = currentLocalDay()): Partial<Record<CacheProviderId, CacheStats>> {
1645
- return Object.fromEntries(CACHE_PROVIDER_IDS.map((id) => [id, emptyCacheStats(day)])) as Partial<Record<CacheProviderId, CacheStats>>;
1646
- }
1647
-
1648
- function addUsageToCacheStats(stats: CacheStats, usage: UsageSnapshot): void {
1649
- stats.totalRequests += 1;
1650
- if (usage.cacheRead > 0) stats.hitRequests += 1;
1651
- stats.cachedInputTokens += usage.cacheRead;
1652
- stats.cacheWriteInputTokens += usage.cacheWrite;
1653
- stats.totalInputTokens += usage.totalInput;
1654
- }
1655
-
1656
- function formatTokenCount(value: number): string {
1657
- const millions = Math.max(0, Math.round(value)) / 1_000_000;
1658
- if (millions === 0) return "0M";
1659
- if (millions < 0.001) return `${millions.toFixed(4)}M`;
1660
- if (millions < 0.01) return `${millions.toFixed(3)}M`;
1661
- if (millions >= 10) return `${millions.toFixed(1)}M`;
1662
- return `${millions.toFixed(2)}M`;
1663
- }
1664
-
1665
- function formatCacheStats(adapter: CacheProviderAdapter, stats: CacheStats): string {
1666
- const percent = stats.totalInputTokens > 0
1667
- ? ` (${Math.round((stats.cachedInputTokens / stats.totalInputTokens) * 100)}%)`
1668
- : "";
1669
- const writeText = adapter.showCacheWrite && stats.cacheWriteInputTokens > 0
1670
- ? ` · write ${formatTokenCount(stats.cacheWriteInputTokens)} tok`
1671
- : "";
1672
-
1673
- return `${adapter.label} ${stats.hitRequests}/${stats.totalRequests} · ${formatTokenCount(stats.cachedInputTokens)}/${formatTokenCount(stats.totalInputTokens)} tok${percent}${writeText}`;
1674
- }
1675
-
1676
- function getErrorCode(error: unknown): string | undefined {
1677
- return typeof error === "object" && error !== null && "code" in error
1678
- ? String((error as { code?: unknown }).code)
1679
- : undefined;
1680
- }
1681
-
1682
- function parseCacheStats(value: unknown): CacheStats | undefined {
1683
- const stats = asRecord(value);
1684
- if (!stats || typeof stats.day !== "string" || !/^\d{4}-\d{2}-\d{2}$/.test(stats.day)) {
1685
- return undefined;
1686
- }
1687
-
1688
- const totalRequests = getNonNegativeNumber(stats, "totalRequests");
1689
- const hitRequests = getNonNegativeNumber(stats, "hitRequests");
1690
- const cachedInputTokens = getNonNegativeNumber(stats, "cachedInputTokens");
1691
- const cacheWriteInputTokens = getNonNegativeNumber(stats, "cacheWriteInputTokens") ?? 0;
1692
- const totalInputTokens = getNonNegativeNumber(stats, "totalInputTokens");
1693
-
1694
- if (
1695
- totalRequests === undefined ||
1696
- hitRequests === undefined ||
1697
- cachedInputTokens === undefined ||
1698
- totalInputTokens === undefined ||
1699
- hitRequests > totalRequests ||
1700
- cachedInputTokens > totalInputTokens ||
1701
- cacheWriteInputTokens > totalInputTokens
1702
- ) {
1703
- return undefined;
1704
- }
1705
-
1706
- return {
1707
- day: stats.day,
1708
- totalRequests,
1709
- hitRequests,
1710
- cachedInputTokens,
1711
- cacheWriteInputTokens,
1712
- totalInputTokens,
1713
- };
1714
- }
1715
-
1716
- function parsePersistedCacheStats(value: unknown): CacheStatsState | undefined {
1717
- const record = asRecord(value);
1718
- if (!record) return undefined;
1719
-
1720
- // version 3: model-scoped stats + legacy family fallback
1721
- if (record.version === 3) {
1722
- const statsByModel: Record<string, CacheStats> = {};
1723
- const rawModelMap = asRecord(record.statsByModel);
1724
- if (rawModelMap) {
1885
+ // ── New OpenAI-compatible adapters (batch 3, 12 families) ────────
1886
+ {
1887
+ id: "openai" as CacheProviderId,
1888
+ label: "Sonar cache",
1889
+ matchesModel: isPerplexityLikeModel,
1890
+ matchesAssistantMessage(message, model) {
1891
+ if (!isAssistantMessage(message)) return false;
1892
+ return isPerplexityLikeAssistantMessage(message, model);
1893
+ },
1894
+ normalizeUsage(message) {
1895
+ return normalizeWithFallback(message, getOpenAIRawUsage);
1896
+ },
1897
+ warningText(model) {
1898
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
1899
+ if (missing.length === 0) return undefined;
1900
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
1901
+ },
1902
+ },
1903
+ {
1904
+ id: "openai" as CacheProviderId,
1905
+ label: "Nova cache",
1906
+ matchesModel: isNovaLikeModel,
1907
+ matchesAssistantMessage(message, model) {
1908
+ if (!isAssistantMessage(message)) return false;
1909
+ return isNovaLikeAssistantMessage(message, model);
1910
+ },
1911
+ normalizeUsage(message) {
1912
+ return normalizeWithFallback(message, getOpenAIRawUsage);
1913
+ },
1914
+ warningText(model) {
1915
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
1916
+ if (missing.length === 0) return undefined;
1917
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
1918
+ },
1919
+ },
1920
+ {
1921
+ id: "openai" as CacheProviderId,
1922
+ label: "Reka cache",
1923
+ matchesModel: isRekaLikeModel,
1924
+ matchesAssistantMessage(message, model) {
1925
+ if (!isAssistantMessage(message)) return false;
1926
+ return isRekaLikeAssistantMessage(message, model);
1927
+ },
1928
+ normalizeUsage(message) {
1929
+ return normalizeWithFallback(message, getOpenAIRawUsage);
1930
+ },
1931
+ warningText(model) {
1932
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
1933
+ if (missing.length === 0) return undefined;
1934
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
1935
+ },
1936
+ },
1937
+ {
1938
+ id: "openai" as CacheProviderId,
1939
+ label: "Falcon cache",
1940
+ matchesModel: isFalconLikeModel,
1941
+ matchesAssistantMessage(message, model) {
1942
+ if (!isAssistantMessage(message)) return false;
1943
+ return isFalconLikeAssistantMessage(message, model);
1944
+ },
1945
+ normalizeUsage(message) {
1946
+ return normalizeWithFallback(message, getOpenAIRawUsage);
1947
+ },
1948
+ warningText(model) {
1949
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
1950
+ if (missing.length === 0) return undefined;
1951
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
1952
+ },
1953
+ },
1954
+ {
1955
+ id: "openai" as CacheProviderId,
1956
+ label: "DBRX cache",
1957
+ matchesModel: isDbrxLikeModel,
1958
+ matchesAssistantMessage(message, model) {
1959
+ if (!isAssistantMessage(message)) return false;
1960
+ return isDbrxLikeAssistantMessage(message, model);
1961
+ },
1962
+ normalizeUsage(message) {
1963
+ return normalizeWithFallback(message, getOpenAIRawUsage);
1964
+ },
1965
+ warningText(model) {
1966
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
1967
+ if (missing.length === 0) return undefined;
1968
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
1969
+ },
1970
+ },
1971
+ {
1972
+ id: "openai" as CacheProviderId,
1973
+ label: "MPT cache",
1974
+ matchesModel: isMptLikeModel,
1975
+ matchesAssistantMessage(message, model) {
1976
+ if (!isAssistantMessage(message)) return false;
1977
+ return isMptLikeAssistantMessage(message, model);
1978
+ },
1979
+ normalizeUsage(message) {
1980
+ return normalizeWithFallback(message, getOpenAIRawUsage);
1981
+ },
1982
+ warningText(model) {
1983
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
1984
+ if (missing.length === 0) return undefined;
1985
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
1986
+ },
1987
+ },
1988
+ {
1989
+ id: "openai" as CacheProviderId,
1990
+ label: "StableLM cache",
1991
+ matchesModel: isStableLMLikeModel,
1992
+ matchesAssistantMessage(message, model) {
1993
+ if (!isAssistantMessage(message)) return false;
1994
+ return isStableLMLikeAssistantMessage(message, model);
1995
+ },
1996
+ normalizeUsage(message) {
1997
+ return normalizeWithFallback(message, getOpenAIRawUsage);
1998
+ },
1999
+ warningText(model) {
2000
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2001
+ if (missing.length === 0) return undefined;
2002
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2003
+ },
2004
+ },
2005
+ {
2006
+ id: "openai" as CacheProviderId,
2007
+ label: "Aquila cache",
2008
+ matchesModel: isAquilaLikeModel,
2009
+ matchesAssistantMessage(message, model) {
2010
+ if (!isAssistantMessage(message)) return false;
2011
+ return isAquilaLikeAssistantMessage(message, model);
2012
+ },
2013
+ normalizeUsage(message) {
2014
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2015
+ },
2016
+ warningText(model) {
2017
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2018
+ if (missing.length === 0) return undefined;
2019
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2020
+ },
2021
+ },
2022
+ {
2023
+ id: "openai" as CacheProviderId,
2024
+ label: "EXAONE cache",
2025
+ matchesModel: isExaoneLikeModel,
2026
+ matchesAssistantMessage(message, model) {
2027
+ if (!isAssistantMessage(message)) return false;
2028
+ return isExaoneLikeAssistantMessage(message, model);
2029
+ },
2030
+ normalizeUsage(message) {
2031
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2032
+ },
2033
+ warningText(model) {
2034
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2035
+ if (missing.length === 0) return undefined;
2036
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2037
+ },
2038
+ },
2039
+ {
2040
+ id: "openai" as CacheProviderId,
2041
+ label: "HyperCLOVA cache",
2042
+ matchesModel: isHyperCLOVALikeModel,
2043
+ matchesAssistantMessage(message, model) {
2044
+ if (!isAssistantMessage(message)) return false;
2045
+ return isHyperCLOVALikeAssistantMessage(message, model);
2046
+ },
2047
+ normalizeUsage(message) {
2048
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2049
+ },
2050
+ warningText(model) {
2051
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2052
+ if (missing.length === 0) return undefined;
2053
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2054
+ },
2055
+ },
2056
+ {
2057
+ id: "openai" as CacheProviderId,
2058
+ label: "Luminous cache",
2059
+ matchesModel: isLuminousLikeModel,
2060
+ matchesAssistantMessage(message, model) {
2061
+ if (!isAssistantMessage(message)) return false;
2062
+ return isLuminousLikeAssistantMessage(message, model);
2063
+ },
2064
+ normalizeUsage(message) {
2065
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2066
+ },
2067
+ warningText(model) {
2068
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2069
+ if (missing.length === 0) return undefined;
2070
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2071
+ },
2072
+ },
2073
+ {
2074
+ id: "openai" as CacheProviderId,
2075
+ label: "Hermes cache",
2076
+ matchesModel: isHermesLikeModel,
2077
+ matchesAssistantMessage(message, model) {
2078
+ if (!isAssistantMessage(message)) return false;
2079
+ return isHermesLikeAssistantMessage(message, model);
2080
+ },
2081
+ normalizeUsage(message) {
2082
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2083
+ },
2084
+ warningText(model) {
2085
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2086
+ if (missing.length === 0) return undefined;
2087
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2088
+ },
2089
+ },
2090
+ // ── More OpenAI-compatible adapters (batch 4, 18 families) ────────
2091
+ {
2092
+ id: "openai" as CacheProviderId,
2093
+ label: "Granite cache",
2094
+ matchesModel: isGraniteLikeModel,
2095
+ matchesAssistantMessage(message, model) {
2096
+ if (!isAssistantMessage(message)) return false;
2097
+ return isGraniteLikeAssistantMessage(message, model);
2098
+ },
2099
+ normalizeUsage(message) {
2100
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2101
+ },
2102
+ warningText(model) {
2103
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2104
+ if (missing.length === 0) return undefined;
2105
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2106
+ },
2107
+ },
2108
+ {
2109
+ id: "openai" as CacheProviderId,
2110
+ label: "Arctic cache",
2111
+ matchesModel: isArcticLikeModel,
2112
+ matchesAssistantMessage(message, model) {
2113
+ if (!isAssistantMessage(message)) return false;
2114
+ return isArcticLikeAssistantMessage(message, model);
2115
+ },
2116
+ normalizeUsage(message) {
2117
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2118
+ },
2119
+ warningText(model) {
2120
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2121
+ if (missing.length === 0) return undefined;
2122
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2123
+ },
2124
+ },
2125
+ {
2126
+ id: "openai" as CacheProviderId,
2127
+ label: "Pangu cache",
2128
+ matchesModel: isPanguLikeModel,
2129
+ matchesAssistantMessage(message, model) {
2130
+ if (!isAssistantMessage(message)) return false;
2131
+ return isPanguLikeAssistantMessage(message, model);
2132
+ },
2133
+ normalizeUsage(message) {
2134
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2135
+ },
2136
+ warningText(model) {
2137
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2138
+ if (missing.length === 0) return undefined;
2139
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2140
+ },
2141
+ },
2142
+ {
2143
+ id: "openai" as CacheProviderId,
2144
+ label: "SenseNova cache",
2145
+ matchesModel: isSenseNovaLikeModel,
2146
+ matchesAssistantMessage(message, model) {
2147
+ if (!isAssistantMessage(message)) return false;
2148
+ return isSenseNovaLikeAssistantMessage(message, model);
2149
+ },
2150
+ normalizeUsage(message) {
2151
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2152
+ },
2153
+ warningText(model) {
2154
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2155
+ if (missing.length === 0) return undefined;
2156
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2157
+ },
2158
+ },
2159
+ {
2160
+ id: "openai" as CacheProviderId,
2161
+ label: "Zhinao cache",
2162
+ matchesModel: isZhinaoLikeModel,
2163
+ matchesAssistantMessage(message, model) {
2164
+ if (!isAssistantMessage(message)) return false;
2165
+ return isZhinaoLikeAssistantMessage(message, model);
2166
+ },
2167
+ normalizeUsage(message) {
2168
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2169
+ },
2170
+ warningText(model) {
2171
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2172
+ if (missing.length === 0) return undefined;
2173
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2174
+ },
2175
+ },
2176
+ {
2177
+ id: "openai" as CacheProviderId,
2178
+ label: "MiniCPM cache",
2179
+ matchesModel: isMiniCPMLikeModel,
2180
+ matchesAssistantMessage(message, model) {
2181
+ if (!isAssistantMessage(message)) return false;
2182
+ return isMiniCPMLikeAssistantMessage(message, model);
2183
+ },
2184
+ normalizeUsage(message) {
2185
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2186
+ },
2187
+ warningText(model) {
2188
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2189
+ if (missing.length === 0) return undefined;
2190
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2191
+ },
2192
+ },
2193
+ {
2194
+ id: "openai" as CacheProviderId,
2195
+ label: "XVERSE cache",
2196
+ matchesModel: isXVerseLikeModel,
2197
+ matchesAssistantMessage(message, model) {
2198
+ if (!isAssistantMessage(message)) return false;
2199
+ return isXVerseLikeAssistantMessage(message, model);
2200
+ },
2201
+ normalizeUsage(message) {
2202
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2203
+ },
2204
+ warningText(model) {
2205
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2206
+ if (missing.length === 0) return undefined;
2207
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2208
+ },
2209
+ },
2210
+ {
2211
+ id: "openai" as CacheProviderId,
2212
+ label: "Orion cache",
2213
+ matchesModel: isOrionLikeModel,
2214
+ matchesAssistantMessage(message, model) {
2215
+ if (!isAssistantMessage(message)) return false;
2216
+ return isOrionLikeAssistantMessage(message, model);
2217
+ },
2218
+ normalizeUsage(message) {
2219
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2220
+ },
2221
+ warningText(model) {
2222
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2223
+ if (missing.length === 0) return undefined;
2224
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2225
+ },
2226
+ },
2227
+ {
2228
+ id: "openai" as CacheProviderId,
2229
+ label: "OpenChat cache",
2230
+ matchesModel: isOpenChatLikeModel,
2231
+ matchesAssistantMessage(message, model) {
2232
+ if (!isAssistantMessage(message)) return false;
2233
+ return isOpenChatLikeAssistantMessage(message, model);
2234
+ },
2235
+ normalizeUsage(message) {
2236
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2237
+ },
2238
+ warningText(model) {
2239
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2240
+ if (missing.length === 0) return undefined;
2241
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2242
+ },
2243
+ },
2244
+ {
2245
+ id: "openai" as CacheProviderId,
2246
+ label: "Vicuna cache",
2247
+ matchesModel: isVicunaLikeModel,
2248
+ matchesAssistantMessage(message, model) {
2249
+ if (!isAssistantMessage(message)) return false;
2250
+ return isVicunaLikeAssistantMessage(message, model);
2251
+ },
2252
+ normalizeUsage(message) {
2253
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2254
+ },
2255
+ warningText(model) {
2256
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2257
+ if (missing.length === 0) return undefined;
2258
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2259
+ },
2260
+ },
2261
+ {
2262
+ id: "openai" as CacheProviderId,
2263
+ label: "Wizard cache",
2264
+ matchesModel: isWizardLikeModel,
2265
+ matchesAssistantMessage(message, model) {
2266
+ if (!isAssistantMessage(message)) return false;
2267
+ return isWizardLikeAssistantMessage(message, model);
2268
+ },
2269
+ normalizeUsage(message) {
2270
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2271
+ },
2272
+ warningText(model) {
2273
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2274
+ if (missing.length === 0) return undefined;
2275
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2276
+ },
2277
+ },
2278
+ {
2279
+ id: "openai" as CacheProviderId,
2280
+ label: "Zephyr cache",
2281
+ matchesModel: isZephyrLikeModel,
2282
+ matchesAssistantMessage(message, model) {
2283
+ if (!isAssistantMessage(message)) return false;
2284
+ return isZephyrLikeAssistantMessage(message, model);
2285
+ },
2286
+ normalizeUsage(message) {
2287
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2288
+ },
2289
+ warningText(model) {
2290
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2291
+ if (missing.length === 0) return undefined;
2292
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2293
+ },
2294
+ },
2295
+ {
2296
+ id: "openai" as CacheProviderId,
2297
+ label: "Dolphin cache",
2298
+ matchesModel: isDolphinLikeModel,
2299
+ matchesAssistantMessage(message, model) {
2300
+ if (!isAssistantMessage(message)) return false;
2301
+ return isDolphinLikeAssistantMessage(message, model);
2302
+ },
2303
+ normalizeUsage(message) {
2304
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2305
+ },
2306
+ warningText(model) {
2307
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2308
+ if (missing.length === 0) return undefined;
2309
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2310
+ },
2311
+ },
2312
+ {
2313
+ id: "openai" as CacheProviderId,
2314
+ label: "OpenOrca cache",
2315
+ matchesModel: isOpenOrcaLikeModel,
2316
+ matchesAssistantMessage(message, model) {
2317
+ if (!isAssistantMessage(message)) return false;
2318
+ return isOpenOrcaLikeAssistantMessage(message, model);
2319
+ },
2320
+ normalizeUsage(message) {
2321
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2322
+ },
2323
+ warningText(model) {
2324
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2325
+ if (missing.length === 0) return undefined;
2326
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2327
+ },
2328
+ },
2329
+ {
2330
+ id: "openai" as CacheProviderId,
2331
+ label: "Starling cache",
2332
+ matchesModel: isStarlingLikeModel,
2333
+ matchesAssistantMessage(message, model) {
2334
+ if (!isAssistantMessage(message)) return false;
2335
+ return isStarlingLikeAssistantMessage(message, model);
2336
+ },
2337
+ normalizeUsage(message) {
2338
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2339
+ },
2340
+ warningText(model) {
2341
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2342
+ if (missing.length === 0) return undefined;
2343
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2344
+ },
2345
+ },
2346
+ {
2347
+ id: "openai" as CacheProviderId,
2348
+ label: "BLOOM cache",
2349
+ matchesModel: isBloomLikeModel,
2350
+ matchesAssistantMessage(message, model) {
2351
+ if (!isAssistantMessage(message)) return false;
2352
+ return isBloomLikeAssistantMessage(message, model);
2353
+ },
2354
+ normalizeUsage(message) {
2355
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2356
+ },
2357
+ warningText(model) {
2358
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2359
+ if (missing.length === 0) return undefined;
2360
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2361
+ },
2362
+ },
2363
+ {
2364
+ id: "openai" as CacheProviderId,
2365
+ label: "RWKV cache",
2366
+ matchesModel: isRwkvLikeModel,
2367
+ matchesAssistantMessage(message, model) {
2368
+ if (!isAssistantMessage(message)) return false;
2369
+ return isRwkvLikeAssistantMessage(message, model);
2370
+ },
2371
+ normalizeUsage(message) {
2372
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2373
+ },
2374
+ warningText(model) {
2375
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2376
+ if (missing.length === 0) return undefined;
2377
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2378
+ },
2379
+ },
2380
+ {
2381
+ id: "openai" as CacheProviderId,
2382
+ label: "Aya cache",
2383
+ matchesModel: isAyaLikeModel,
2384
+ matchesAssistantMessage(message, model) {
2385
+ if (!isAssistantMessage(message)) return false;
2386
+ return isAyaLikeAssistantMessage(message, model);
2387
+ },
2388
+ normalizeUsage(message) {
2389
+ return normalizeWithFallback(message, getOpenAIRawUsage);
2390
+ },
2391
+ warningText(model) {
2392
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2393
+ if (missing.length === 0) return undefined;
2394
+ return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
2395
+ },
2396
+ },
2397
+ ];
2398
+
2399
+ function selectAdapterForModel(model: PiModel | undefined): CacheProviderAdapter | undefined {
2400
+ return CACHE_PROVIDER_ADAPTERS.find((adapter) => adapter.matchesModel(model));
2401
+ }
2402
+
2403
+ function selectAdapterForAssistantMessage(message: unknown, model: PiModel | undefined): CacheProviderAdapter | undefined {
2404
+ return CACHE_PROVIDER_ADAPTERS.find((adapter) => adapter.matchesAssistantMessage(message, model));
2405
+ }
2406
+
2407
+ function notifyCacheCompatIfNeeded(
2408
+ model: PiModel | undefined,
2409
+ ctx: ExtensionContext,
2410
+ warnedModels: Set<string>,
2411
+ ): void {
2412
+ if (!model) return;
2413
+
2414
+ const adapter = selectAdapterForModel(model);
2415
+ const text = adapter?.warningText?.(model);
2416
+ if (!adapter || !text) return;
2417
+
2418
+ const key = `${adapter.id}:${modelKey(model)}`;
2419
+ if (warnedModels.has(key)) return;
2420
+ warnedModels.add(key);
2421
+
2422
+ ctx.ui.notify(text, "warning");
2423
+ }
2424
+
2425
+ function currentLocalDay(): string {
2426
+ const now = new Date();
2427
+ const year = now.getFullYear();
2428
+ const month = String(now.getMonth() + 1).padStart(2, "0");
2429
+ const day = String(now.getDate()).padStart(2, "0");
2430
+ return `${year}-${month}-${day}`;
2431
+ }
2432
+
2433
+ function emptyCacheStats(day = currentLocalDay()): CacheStats {
2434
+ return {
2435
+ day,
2436
+ totalRequests: 0,
2437
+ hitRequests: 0,
2438
+ cachedInputTokens: 0,
2439
+ cacheWriteInputTokens: 0,
2440
+ totalInputTokens: 0,
2441
+ };
2442
+ }
2443
+
2444
+ function emptyAllCacheStats(day = currentLocalDay()): Partial<Record<CacheProviderId, CacheStats>> {
2445
+ return Object.fromEntries(CACHE_PROVIDER_IDS.map((id) => [id, emptyCacheStats(day)])) as Partial<Record<CacheProviderId, CacheStats>>;
2446
+ }
2447
+
2448
+ function addUsageToCacheStats(stats: CacheStats, usage: UsageSnapshot): void {
2449
+ stats.totalRequests += 1;
2450
+ if (usage.cacheRead > 0) stats.hitRequests += 1;
2451
+ stats.cachedInputTokens += usage.cacheRead;
2452
+ stats.cacheWriteInputTokens += usage.cacheWrite;
2453
+ stats.totalInputTokens += usage.totalInput;
2454
+ }
2455
+
2456
+ function formatTokenCount(value: number): string {
2457
+ const millions = Math.max(0, Math.round(value)) / 1_000_000;
2458
+ if (millions === 0) return "0M";
2459
+ if (millions < 0.001) return `${millions.toFixed(4)}M`;
2460
+ if (millions < 0.01) return `${millions.toFixed(3)}M`;
2461
+ if (millions >= 10) return `${millions.toFixed(1)}M`;
2462
+ return `${millions.toFixed(2)}M`;
2463
+ }
2464
+
2465
+ function formatCacheStats(adapter: CacheProviderAdapter, stats: CacheStats): string {
2466
+ const percent = stats.totalInputTokens > 0
2467
+ ? ` (${Math.round((stats.cachedInputTokens / stats.totalInputTokens) * 100)}%)`
2468
+ : "";
2469
+ const writeText = adapter.showCacheWrite && stats.cacheWriteInputTokens > 0
2470
+ ? ` · write ${formatTokenCount(stats.cacheWriteInputTokens)} tok`
2471
+ : "";
2472
+
2473
+ return `${adapter.label} ${stats.hitRequests}/${stats.totalRequests} · ${formatTokenCount(stats.cachedInputTokens)}/${formatTokenCount(stats.totalInputTokens)} tok${percent}${writeText}`;
2474
+ }
2475
+
2476
+ function getErrorCode(error: unknown): string | undefined {
2477
+ return typeof error === "object" && error !== null && "code" in error
2478
+ ? String((error as { code?: unknown }).code)
2479
+ : undefined;
2480
+ }
2481
+
2482
+ function parseCacheStats(value: unknown): CacheStats | undefined {
2483
+ const stats = asRecord(value);
2484
+ if (!stats || typeof stats.day !== "string" || !/^\d{4}-\d{2}-\d{2}$/.test(stats.day)) {
2485
+ return undefined;
2486
+ }
2487
+
2488
+ const totalRequests = getNonNegativeNumber(stats, "totalRequests");
2489
+ const hitRequests = getNonNegativeNumber(stats, "hitRequests");
2490
+ const cachedInputTokens = getNonNegativeNumber(stats, "cachedInputTokens");
2491
+ const cacheWriteInputTokens = getNonNegativeNumber(stats, "cacheWriteInputTokens") ?? 0;
2492
+ const totalInputTokens = getNonNegativeNumber(stats, "totalInputTokens");
2493
+
2494
+ if (
2495
+ totalRequests === undefined ||
2496
+ hitRequests === undefined ||
2497
+ cachedInputTokens === undefined ||
2498
+ totalInputTokens === undefined ||
2499
+ hitRequests > totalRequests ||
2500
+ cachedInputTokens > totalInputTokens ||
2501
+ cacheWriteInputTokens > totalInputTokens
2502
+ ) {
2503
+ return undefined;
2504
+ }
2505
+
2506
+ return {
2507
+ day: stats.day,
2508
+ totalRequests,
2509
+ hitRequests,
2510
+ cachedInputTokens,
2511
+ cacheWriteInputTokens,
2512
+ totalInputTokens,
2513
+ };
2514
+ }
2515
+
2516
+ function parsePersistedCacheStats(value: unknown): CacheStatsState | undefined {
2517
+ const record = asRecord(value);
2518
+ if (!record) return undefined;
2519
+
2520
+ // version 3: model-scoped stats + legacy family fallback
2521
+ if (record.version === 3) {
2522
+ const statsByModel: Record<string, CacheStats> = {};
2523
+ const rawModelMap = asRecord(record.statsByModel);
2524
+ if (rawModelMap) {
1725
2525
  for (const [key, val] of Object.entries(rawModelMap)) {
1726
2526
  const parsed = parseCacheStats(val);
1727
2527
  if (parsed) statsByModel[key] = parsed;
@@ -1821,6 +2621,171 @@ function isCompatCheckApplicable(model: PiModel): boolean {
1821
2621
  return lower(model.api) === "openai-completions" && !isOfficialOpenAIBaseUrl(model);
1822
2622
  }
1823
2623
 
2624
+ /**
2625
+ * Detect router / channel profiles from a PiModel and return diagnostic notes.
2626
+ *
2627
+ * This function is advisory only — it does NOT participate in adapter selection,
2628
+ * prompt_cache_key injection, or footer stats. It inspects provider, api, baseUrl,
2629
+ * and compat to identify common proxy/router patterns where cache performance may
2630
+ * be degraded due to multi-backend routing.
2631
+ *
2632
+ * Known profiles (checked in order):
2633
+ * 1. OpenRouter — baseUrl or provider id matching openrouter.ai / openrouter
2634
+ * 2. Vercel AI Gateway — baseUrl matching ai-gateway.vercel.sh, or provider
2635
+ * matching vercel / vercel-ai-gateway
2636
+ * 3. LiteLLM / OneAPI / NewAPI / VoAPI — baseUrl or provider matching litellm,
2637
+ * oneapi, one-api, newapi, new-api, voapi, vo-api (self-hosted aggregation)
2638
+ * 4. Generic third-party OpenAI-compatible proxy — any openai-completions model
2639
+ * with a non-official base URL that does not match a higher-profile above.
2640
+ *
2641
+ * Official OpenAI (api.openai.com) and custom transports (kiro-api, anthropic-messages,
2642
+ * bedrock-converse-stream) do NOT produce notes.
2643
+ */
2644
+ function describeRouterChannelDiagnostics(model: PiModel): string[] {
2645
+ const notes: string[] = [];
2646
+ const api = lower(model.api);
2647
+ const baseUrl = lower(model.baseUrl || "");
2648
+ const provider = lower(model.provider);
2649
+
2650
+ // Only OpenAI-compatible APIs are applicable for router/channel diagnostics.
2651
+ // Custom transports like kiro-api, anthropic-messages, bedrock-converse-stream
2652
+ // or non-OpenAI APIs are excluded.
2653
+ if (api !== "openai-completions" && api !== "openai-responses") {
2654
+ return notes;
2655
+ }
2656
+
2657
+ // Official OpenAI bypass — no notes needed.
2658
+ if (isOfficialOpenAIBaseUrl(model)) {
2659
+ return notes;
2660
+ }
2661
+
2662
+ // ── 1. OpenRouter ────────────────────────────────────────────────
2663
+ if (
2664
+ baseUrl.includes("openrouter.ai") ||
2665
+ baseUrl.includes("openrouter") ||
2666
+ provider.includes("openrouter")
2667
+ ) {
2668
+ const compat = getCompat(model);
2669
+ const hasOnly = !!(compat as Record<string, unknown>)["openRouterRouting"]?.only;
2670
+ const hasOrder = !!(compat as Record<string, unknown>)["openRouterRouting"]?.order;
2671
+
2672
+ notes.push(
2673
+ "🔀 Router/channel: OpenRouter detected. OpenRouter is a multi-provider router; " +
2674
+ "low cache hit rates are common when each turn lands on a different upstream provider.",
2675
+ );
2676
+
2677
+ if (!hasOnly && !hasOrder) {
2678
+ notes.push(
2679
+ " Suggestion: Add an openRouterRouting config to fix the upstream provider. " +
2680
+ "Example for models.json -> providers[\"<providerId>\"] -> compat:",
2681
+ );
2682
+ notes.push(
2683
+ ` { "sendSessionAffinityHeaders": true, "supportsLongCacheRetention": true, ` +
2684
+ `"openRouterRouting": { "only": ["<provider-slug>"] } }`,
2685
+ );
2686
+ notes.push(
2687
+ ' Replace <provider-slug> with the actual OpenRouter provider slug (e.g. "openai", "anthropic").',
2688
+ );
2689
+ notes.push(
2690
+ " Alternatively, use openRouterRouting.order: [\"<provider-slug>\", \"...\"] for fallback order. " +
2691
+ "Only set supportsLongCacheRetention if your upstream supports long cache retention.",
2692
+ );
2693
+ }
2694
+
2695
+ return notes;
2696
+ }
2697
+
2698
+ // ── 2. Vercel AI Gateway ─────────────────────────────────────────
2699
+ if (
2700
+ baseUrl.includes("ai-gateway.vercel.sh") ||
2701
+ provider.includes("vercel") ||
2702
+ provider.includes("vercel-ai-gateway")
2703
+ ) {
2704
+ const compat = getCompat(model);
2705
+ const hasOnly = !!(compat as Record<string, unknown>)["vercelGatewayRouting"]?.only;
2706
+ const hasOrder = !!(compat as Record<string, unknown>)["vercelGatewayRouting"]?.order;
2707
+
2708
+ notes.push(
2709
+ "🔀 Router/channel: Vercel AI Gateway detected. The gateway may route to different " +
2710
+ "provider endpoints per request, reducing cache locality.",
2711
+ );
2712
+
2713
+ if (!hasOnly && !hasOrder) {
2714
+ notes.push(
2715
+ " Suggestion: Add a vercelGatewayRouting config to fix the upstream. " +
2716
+ "Example for models.json -> providers[\"<providerId>\"] -> compat:",
2717
+ );
2718
+ notes.push(
2719
+ ` { "sendSessionAffinityHeaders": true, "supportsLongCacheRetention": true, ` +
2720
+ `"vercelGatewayRouting": { "only": ["<provider-id>"] } }`,
2721
+ );
2722
+ notes.push(
2723
+ " Replace <provider-id> with the actual Vercel provider ID (e.g. \"openai\").",
2724
+ );
2725
+ notes.push(
2726
+ " Only set supportsLongCacheRetention if your upstream supports it.",
2727
+ );
2728
+ }
2729
+
2730
+ return notes;
2731
+ }
2732
+
2733
+ // ── 3. LiteLLM / OneAPI / NewAPI / VoAPI (self-hosted aggregation) ──
2734
+ const aggregationPatterns = ["litellm", "oneapi", "one-api", "newapi", "new-api", "voapi", "vo-api"];
2735
+ if (
2736
+ aggregationPatterns.some((p) => baseUrl.includes(p)) ||
2737
+ aggregationPatterns.some((p) => provider.includes(p))
2738
+ ) {
2739
+ notes.push(
2740
+ "🔀 Router/channel: Self-hosted aggregation proxy detected (LiteLLM / OneAPI / NewAPI / VoAPI). " +
2741
+ "These proxies route to multiple upstream accounts or instances, which can split the cache.",
2742
+ );
2743
+ notes.push(
2744
+ " Suggestions:",
2745
+ );
2746
+ notes.push(
2747
+ " • Ensure the proxy can fix to a single upstream per session (session_id affinity).",
2748
+ );
2749
+ notes.push(
2750
+ " • Forward prompt_cache_key and session-affinity headers to the upstream.",
2751
+ );
2752
+ notes.push(
2753
+ " • Return cache usage fields (prompt_cache_hit_tokens, etc.) in the response.",
2754
+ );
2755
+ notes.push(
2756
+ ` Example compat: { "sendSessionAffinityHeaders": true, "supportsLongCacheRetention": true }`,
2757
+ );
2758
+
2759
+ return notes;
2760
+ }
2761
+
2762
+ // ── 4. Generic third-party OpenAI-compatible proxy ─────────────────
2763
+ if (api === "openai-completions" && baseUrl) {
2764
+ const missing = describeMissingOpenAICompatibleProxyCompat(model);
2765
+ notes.push(
2766
+ "🔀 Router/channel: Third-party OpenAI-compatible proxy. If cache hit rates are low:",
2767
+ );
2768
+ notes.push(
2769
+ " • Verify the proxy routes to the same upstream account/instance per session.",
2770
+ );
2771
+ notes.push(
2772
+ " • Ensure the proxy forwards prompt_cache_key and sends session-affinity headers.",
2773
+ );
2774
+ notes.push(
2775
+ " • Check that the proxy returns cache usage fields (prompt_cache_hit_tokens etc.).",
2776
+ );
2777
+ if (missing.length > 0) {
2778
+ notes.push(
2779
+ ` • The compat flags above (${missing.join(", ")}) are recommended for cache stability.`,
2780
+ );
2781
+ }
2782
+
2783
+ return notes;
2784
+ }
2785
+
2786
+ return notes;
2787
+ }
2788
+
1824
2789
  function buildDoctorDiagnosis(model: PiModel): string {
1825
2790
  const lines: string[] = [];
1826
2791
  lines.push(`Provider: ${model.provider}`);
@@ -1848,6 +2813,15 @@ function buildDoctorDiagnosis(model: PiModel): string {
1848
2813
  lines.push("ℹ️ Compat check not applicable for this model.");
1849
2814
  }
1850
2815
 
2816
+ // ── Router/channel diagnostics ──
2817
+ const routerNotes = describeRouterChannelDiagnostics(model);
2818
+ if (routerNotes.length > 0) {
2819
+ lines.push("");
2820
+ for (const note of routerNotes) {
2821
+ lines.push(note);
2822
+ }
2823
+ }
2824
+
1851
2825
  // ── Integrity diagnostics ──
1852
2826
  if (lastPromptIntegrityWarningAt > 0) {
1853
2827
  const ago = Date.now() - lastPromptIntegrityWarningAt;
@@ -1870,21 +2844,46 @@ function buildDoctorDiagnosis(model: PiModel): string {
1870
2844
 
1871
2845
  function buildCompatDiagnosis(model: PiModel): string | undefined {
1872
2846
  const missing = describeMissingOpenAICompatibleProxyCompat(model);
1873
- if (missing.length === 0) return undefined;
2847
+ const routerNotes = describeRouterChannelDiagnostics(model);
2848
+
2849
+ if (missing.length === 0 && routerNotes.length === 0) return undefined;
1874
2850
 
1875
2851
  const key = modelKey(model);
1876
- const slashIdx = key.indexOf("/");
1877
- const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
1878
- const suggestion = Object.fromEntries(missing.map((f) => [f, true]));
1879
- const modelsJsonPath = getModelsJsonDisplayPath();
1880
- return (
1881
- `Active model: ${key}\n` +
1882
- `Missing: ${missing.join(", ")}\n\n` +
1883
- `Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat` +
1884
- ` (at the same level as baseUrl/api/apiKey/models) and add:\n` +
1885
- `${JSON.stringify(suggestion, null, 2)}\n\n` +
1886
- `Only enable if your endpoint supports them.`
1887
- );
2852
+ const lines: string[] = [];
2853
+
2854
+ if (missing.length > 0) {
2855
+ const slashIdx = key.indexOf("/");
2856
+ const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
2857
+ const suggestion = Object.fromEntries(missing.map((f) => [f, true]));
2858
+ const modelsJsonPath = getModelsJsonDisplayPath();
2859
+ lines.push(`Active model: ${key}`);
2860
+ lines.push(`Missing: ${missing.join(", ")}`);
2861
+ lines.push("");
2862
+ lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
2863
+ lines.push(`(at the same level as baseUrl/api/apiKey/models) and add:`);
2864
+ lines.push(JSON.stringify(suggestion, null, 2));
2865
+ lines.push("");
2866
+ lines.push(`Only enable if your endpoint supports them.`);
2867
+ }
2868
+
2869
+ // When compat is fully configured but router notes exist, prefix the status.
2870
+ if (routerNotes.length > 0 && missing.length === 0) {
2871
+ if (isCompatCheckApplicable(model)) {
2872
+ lines.push("✅ Compat fully configured.");
2873
+ } else {
2874
+ lines.push("ℹ️ Compat check not applicable for this model.");
2875
+ }
2876
+ lines.push("");
2877
+ }
2878
+
2879
+ if (routerNotes.length > 0) {
2880
+ if (missing.length > 0) lines.push("");
2881
+ for (const note of routerNotes) {
2882
+ lines.push(note);
2883
+ }
2884
+ }
2885
+
2886
+ return lines.join("\n");
1888
2887
  }
1889
2888
 
1890
2889
  // Internal helpers exported only so the task verification script
@@ -1960,6 +2959,68 @@ export const __internals_for_tests = {
1960
2959
  isJambaLikeAssistantMessage,
1961
2960
  isSolarLikeModel,
1962
2961
  isSolarLikeAssistantMessage,
2962
+ // New OpenAI-compatible model detection (batch 3, 12 families)
2963
+ isPerplexityLikeModel,
2964
+ isPerplexityLikeAssistantMessage,
2965
+ isNovaLikeModel,
2966
+ isNovaLikeAssistantMessage,
2967
+ isRekaLikeModel,
2968
+ isRekaLikeAssistantMessage,
2969
+ isFalconLikeModel,
2970
+ isFalconLikeAssistantMessage,
2971
+ isDbrxLikeModel,
2972
+ isDbrxLikeAssistantMessage,
2973
+ isMptLikeModel,
2974
+ isMptLikeAssistantMessage,
2975
+ isStableLMLikeModel,
2976
+ isStableLMLikeAssistantMessage,
2977
+ isAquilaLikeModel,
2978
+ isAquilaLikeAssistantMessage,
2979
+ isExaoneLikeModel,
2980
+ isExaoneLikeAssistantMessage,
2981
+ isHyperCLOVALikeModel,
2982
+ isHyperCLOVALikeAssistantMessage,
2983
+ isLuminousLikeModel,
2984
+ isLuminousLikeAssistantMessage,
2985
+ isHermesLikeModel,
2986
+ isHermesLikeAssistantMessage,
2987
+ // More OpenAI-compatible model detection (batch 4, 18 families)
2988
+ isGraniteLikeModel,
2989
+ isGraniteLikeAssistantMessage,
2990
+ isArcticLikeModel,
2991
+ isArcticLikeAssistantMessage,
2992
+ isPanguLikeModel,
2993
+ isPanguLikeAssistantMessage,
2994
+ isSenseNovaLikeModel,
2995
+ isSenseNovaLikeAssistantMessage,
2996
+ isZhinaoLikeModel,
2997
+ isZhinaoLikeAssistantMessage,
2998
+ isMiniCPMLikeModel,
2999
+ isMiniCPMLikeAssistantMessage,
3000
+ isXVerseLikeModel,
3001
+ isXVerseLikeAssistantMessage,
3002
+ isOrionLikeModel,
3003
+ isOrionLikeAssistantMessage,
3004
+ isOpenChatLikeModel,
3005
+ isOpenChatLikeAssistantMessage,
3006
+ isVicunaLikeModel,
3007
+ isVicunaLikeAssistantMessage,
3008
+ isWizardLikeModel,
3009
+ isWizardLikeAssistantMessage,
3010
+ isZephyrLikeModel,
3011
+ isZephyrLikeAssistantMessage,
3012
+ isDolphinLikeModel,
3013
+ isDolphinLikeAssistantMessage,
3014
+ isOpenOrcaLikeModel,
3015
+ isOpenOrcaLikeAssistantMessage,
3016
+ isStarlingLikeModel,
3017
+ isStarlingLikeAssistantMessage,
3018
+ isBloomLikeModel,
3019
+ isBloomLikeAssistantMessage,
3020
+ isRwkvLikeModel,
3021
+ isRwkvLikeAssistantMessage,
3022
+ isAyaLikeModel,
3023
+ isAyaLikeAssistantMessage,
1963
3024
  buildOpenAIProxyCompatWarningText,
1964
3025
  getModelIdNameTokenValues,
1965
3026
  getAssistantMessageModelTokenValues,
@@ -1973,6 +3034,7 @@ export const __internals_for_tests = {
1973
3034
  isCompatCheckApplicable,
1974
3035
  buildDoctorDiagnosis,
1975
3036
  buildCompatDiagnosis,
3037
+ describeRouterChannelDiagnostics,
1976
3038
  // Cache stats helpers (module-level, usable from verify script)
1977
3039
  addUsageToCacheStats,
1978
3040
  formatCacheStats,