pi-cache-optimizer 2.4.6 → 2.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -4
- package/README.zh-CN.md +48 -1
- package/index.ts +1203 -141
- package/package.json +1 -1
package/index.ts
CHANGED
|
@@ -91,6 +91,15 @@ const MIN_STABLE_CANDIDATE_LENGTH = 8;
|
|
|
91
91
|
const ASSISTANT_MESSAGE_MODEL_TOKEN_KEYS = ["model", "name"];
|
|
92
92
|
const OPENAI_REASONING_MODEL_PATTERN = /(^|[/\s:_-])o[1345]($|[-_.:/\s])/;
|
|
93
93
|
const XAI_MODEL_PATTERN = /(^|[/\s:_-])xai($|[-_.:/\s])/;
|
|
94
|
+
const PPLX_MODEL_PATTERN = /(^|[/\s:_-])pplx($|[-_.:/\s])/i;
|
|
95
|
+
const NOVA_MODEL_PATTERN = /(^|[/\s:_-])nova($|[-_.:/\s])/i;
|
|
96
|
+
const MPT_MODEL_PATTERN = /(^|[/\s:_-])mpt($|[-_.:/\s])/i;
|
|
97
|
+
const ALEPH_MODEL_PATTERN = /(^|[/\s:_-])aleph($|[-_.:/\s])/i;
|
|
98
|
+
|
|
99
|
+
// Safe-boundary patterns for models with short or ambiguous tokens
|
|
100
|
+
const ARCTIC_MODEL_PATTERN = /(^|[\/\s:_-])arctic($|[\-_.:\/\s])/i;
|
|
101
|
+
const AYA_MODEL_PATTERN = /(^|[\/\s:_-])aya($|[\-_.:\/\s])/i;
|
|
102
|
+
const ORION_MODEL_PATTERN = /(^|[\/\s:_-])orion($|[\-_.:\/\s])/i;
|
|
94
103
|
|
|
95
104
|
type CacheCompat = {
|
|
96
105
|
sendSessionAffinityHeaders?: boolean;
|
|
@@ -847,6 +856,285 @@ function isSolarLikeAssistantMessage(message: unknown, model: PiModel | undefine
|
|
|
847
856
|
return modelOrAssistantMessageHas(message, model, ["solar", "upstage"]);
|
|
848
857
|
}
|
|
849
858
|
|
|
859
|
+
// ── New OpenAI-compatible model detection (batch 3, 12 families) ──────
|
|
860
|
+
|
|
861
|
+
// Perplexity / Sonar
|
|
862
|
+
function isPerplexityLikeModel(model: PiModel | undefined): boolean {
|
|
863
|
+
const tokens = getModelIdNameTokenValues(model);
|
|
864
|
+
return hasAnyTokenContaining(tokens, ["sonar", "perplexity"]) || tokens.some((t) => PPLX_MODEL_PATTERN.test(t));
|
|
865
|
+
}
|
|
866
|
+
function isPerplexityLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
867
|
+
const allTokens = [
|
|
868
|
+
...getModelIdNameTokenValues(model),
|
|
869
|
+
...getAssistantMessageModelTokenValues(message),
|
|
870
|
+
];
|
|
871
|
+
return hasAnyTokenContaining(allTokens, ["sonar", "perplexity"]) || allTokens.some((t) => PPLX_MODEL_PATTERN.test(t));
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
// Amazon Nova
|
|
875
|
+
function isNovaLikeModel(model: PiModel | undefined): boolean {
|
|
876
|
+
const tokens = getModelIdNameTokenValues(model);
|
|
877
|
+
return hasAnyTokenContaining(tokens, ["amazon-nova"]) || tokens.some((t) => NOVA_MODEL_PATTERN.test(t));
|
|
878
|
+
}
|
|
879
|
+
function isNovaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
880
|
+
const allTokens = [
|
|
881
|
+
...getModelIdNameTokenValues(model),
|
|
882
|
+
...getAssistantMessageModelTokenValues(message),
|
|
883
|
+
];
|
|
884
|
+
return hasAnyTokenContaining(allTokens, ["amazon-nova"]) || allTokens.some((t) => NOVA_MODEL_PATTERN.test(t));
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
// Reka
|
|
888
|
+
function isRekaLikeModel(model: PiModel | undefined): boolean {
|
|
889
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["reka"]);
|
|
890
|
+
}
|
|
891
|
+
function isRekaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
892
|
+
return modelOrAssistantMessageHas(message, model, ["reka"]);
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
// Falcon / TII
|
|
896
|
+
function isFalconLikeModel(model: PiModel | undefined): boolean {
|
|
897
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["falcon", "tiiuae"]);
|
|
898
|
+
}
|
|
899
|
+
function isFalconLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
900
|
+
return modelOrAssistantMessageHas(message, model, ["falcon", "tiiuae"]);
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
// Databricks DBRX
|
|
904
|
+
function isDbrxLikeModel(model: PiModel | undefined): boolean {
|
|
905
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["dbrx", "databricks"]);
|
|
906
|
+
}
|
|
907
|
+
function isDbrxLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
908
|
+
return modelOrAssistantMessageHas(message, model, ["dbrx", "databricks"]);
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
// MosaicML MPT
|
|
912
|
+
function isMptLikeModel(model: PiModel | undefined): boolean {
|
|
913
|
+
const tokens = getModelIdNameTokenValues(model);
|
|
914
|
+
return hasAnyTokenContaining(tokens, ["mosaicml", "mpt-"]) || tokens.some((t) => MPT_MODEL_PATTERN.test(t));
|
|
915
|
+
}
|
|
916
|
+
function isMptLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
917
|
+
const allTokens = [
|
|
918
|
+
...getModelIdNameTokenValues(model),
|
|
919
|
+
...getAssistantMessageModelTokenValues(message),
|
|
920
|
+
];
|
|
921
|
+
return hasAnyTokenContaining(allTokens, ["mosaicml", "mpt-"]) || allTokens.some((t) => MPT_MODEL_PATTERN.test(t));
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
// StableLM / Stability AI
|
|
925
|
+
function isStableLMLikeModel(model: PiModel | undefined): boolean {
|
|
926
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["stablelm", "stable-lm", "stability-ai"]);
|
|
927
|
+
}
|
|
928
|
+
function isStableLMLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
929
|
+
return modelOrAssistantMessageHas(message, model, ["stablelm", "stable-lm", "stability-ai"]);
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
// BAAI / Aquila
|
|
933
|
+
function isAquilaLikeModel(model: PiModel | undefined): boolean {
|
|
934
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["aquila", "baai"]);
|
|
935
|
+
}
|
|
936
|
+
function isAquilaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
937
|
+
return modelOrAssistantMessageHas(message, model, ["aquila", "baai"]);
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
// LG EXAONE
|
|
941
|
+
function isExaoneLikeModel(model: PiModel | undefined): boolean {
|
|
942
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["exaone"]);
|
|
943
|
+
}
|
|
944
|
+
function isExaoneLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
945
|
+
return modelOrAssistantMessageHas(message, model, ["exaone"]);
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
// Naver HyperCLOVA X (conservative: hyperclova, clova-x only)
|
|
949
|
+
function isHyperCLOVALikeModel(model: PiModel | undefined): boolean {
|
|
950
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["hyperclova", "clova-x"]);
|
|
951
|
+
}
|
|
952
|
+
function isHyperCLOVALikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
953
|
+
return modelOrAssistantMessageHas(message, model, ["hyperclova", "clova-x"]);
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
// Aleph Alpha Luminous
|
|
957
|
+
function isLuminousLikeModel(model: PiModel | undefined): boolean {
|
|
958
|
+
const tokens = getModelIdNameTokenValues(model);
|
|
959
|
+
return hasAnyTokenContaining(tokens, ["luminous", "aleph-alpha"]) || tokens.some((t) => ALEPH_MODEL_PATTERN.test(t));
|
|
960
|
+
}
|
|
961
|
+
function isLuminousLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
962
|
+
const allTokens = [
|
|
963
|
+
...getModelIdNameTokenValues(model),
|
|
964
|
+
...getAssistantMessageModelTokenValues(message),
|
|
965
|
+
];
|
|
966
|
+
return hasAnyTokenContaining(allTokens, ["luminous", "aleph-alpha"]) || allTokens.some((t) => ALEPH_MODEL_PATTERN.test(t));
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
// Nous / Hermes / OpenHermes
|
|
970
|
+
function isHermesLikeModel(model: PiModel | undefined): boolean {
|
|
971
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["nous", "hermes", "openhermes"]);
|
|
972
|
+
}
|
|
973
|
+
function isHermesLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
974
|
+
return modelOrAssistantMessageHas(message, model, ["nous", "hermes", "openhermes"]);
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
// ── More OpenAI-compatible model detection (batch 4, 18 families) ──
|
|
978
|
+
|
|
979
|
+
// IBM Granite
|
|
980
|
+
function isGraniteLikeModel(model: PiModel | undefined): boolean {
|
|
981
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["granite", "ibm-granite"]);
|
|
982
|
+
}
|
|
983
|
+
function isGraniteLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
984
|
+
return modelOrAssistantMessageHas(message, model, ["granite", "ibm-granite"]);
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
// Snowflake Arctic
|
|
988
|
+
function isArcticLikeModel(model: PiModel | undefined): boolean {
|
|
989
|
+
const tokens = getModelIdNameTokenValues(model);
|
|
990
|
+
return hasAnyTokenContaining(tokens, ["snowflake-arctic"]) || tokens.some((t) => ARCTIC_MODEL_PATTERN.test(t));
|
|
991
|
+
}
|
|
992
|
+
function isArcticLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
993
|
+
const allTokens = [
|
|
994
|
+
...getModelIdNameTokenValues(model),
|
|
995
|
+
...getAssistantMessageModelTokenValues(message),
|
|
996
|
+
];
|
|
997
|
+
return hasAnyTokenContaining(allTokens, ["snowflake-arctic"]) || allTokens.some((t) => ARCTIC_MODEL_PATTERN.test(t));
|
|
998
|
+
}
|
|
999
|
+
|
|
1000
|
+
// Huawei Pangu / 盘古
|
|
1001
|
+
function isPanguLikeModel(model: PiModel | undefined): boolean {
|
|
1002
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["pangu", "pan-gu", "盘古", "huawei-pangu"]);
|
|
1003
|
+
}
|
|
1004
|
+
function isPanguLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
1005
|
+
return modelOrAssistantMessageHas(message, model, ["pangu", "pan-gu", "盘古", "huawei-pangu"]);
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
// SenseTime SenseNova / 商汤
|
|
1009
|
+
function isSenseNovaLikeModel(model: PiModel | undefined): boolean {
|
|
1010
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["sensenova", "sense-nova", "sensechat", "商汤"]);
|
|
1011
|
+
}
|
|
1012
|
+
function isSenseNovaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
1013
|
+
return modelOrAssistantMessageHas(message, model, ["sensenova", "sense-nova", "sensechat", "商汤"]);
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1016
|
+
// 360 Zhinao / 智脑
|
|
1017
|
+
function isZhinaoLikeModel(model: PiModel | undefined): boolean {
|
|
1018
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["360gpt", "360-gpt", "zhinao", "智脑"]);
|
|
1019
|
+
}
|
|
1020
|
+
function isZhinaoLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
1021
|
+
return modelOrAssistantMessageHas(message, model, ["360gpt", "360-gpt", "zhinao", "智脑"]);
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
// OpenBMB MiniCPM
|
|
1025
|
+
function isMiniCPMLikeModel(model: PiModel | undefined): boolean {
|
|
1026
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["minicpm", "mini-cpm", "openbmb"]);
|
|
1027
|
+
}
|
|
1028
|
+
function isMiniCPMLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
1029
|
+
return modelOrAssistantMessageHas(message, model, ["minicpm", "mini-cpm", "openbmb"]);
|
|
1030
|
+
}
|
|
1031
|
+
|
|
1032
|
+
// XVERSE
|
|
1033
|
+
function isXVerseLikeModel(model: PiModel | undefined): boolean {
|
|
1034
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["xverse"]);
|
|
1035
|
+
}
|
|
1036
|
+
function isXVerseLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
1037
|
+
return modelOrAssistantMessageHas(message, model, ["xverse"]);
|
|
1038
|
+
}
|
|
1039
|
+
|
|
1040
|
+
// OrionStar Orion
|
|
1041
|
+
function isOrionLikeModel(model: PiModel | undefined): boolean {
|
|
1042
|
+
const tokens = getModelIdNameTokenValues(model);
|
|
1043
|
+
return hasAnyTokenContaining(tokens, ["orionstar", "orion-star"]) || tokens.some((t) => ORION_MODEL_PATTERN.test(t));
|
|
1044
|
+
}
|
|
1045
|
+
function isOrionLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
1046
|
+
const allTokens = [
|
|
1047
|
+
...getModelIdNameTokenValues(model),
|
|
1048
|
+
...getAssistantMessageModelTokenValues(message),
|
|
1049
|
+
];
|
|
1050
|
+
return hasAnyTokenContaining(allTokens, ["orionstar", "orion-star"]) || allTokens.some((t) => ORION_MODEL_PATTERN.test(t));
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1053
|
+
// OpenChat
|
|
1054
|
+
function isOpenChatLikeModel(model: PiModel | undefined): boolean {
|
|
1055
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["openchat"]);
|
|
1056
|
+
}
|
|
1057
|
+
function isOpenChatLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
1058
|
+
return modelOrAssistantMessageHas(message, model, ["openchat"]);
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
// Vicuna
|
|
1062
|
+
function isVicunaLikeModel(model: PiModel | undefined): boolean {
|
|
1063
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["vicuna"]);
|
|
1064
|
+
}
|
|
1065
|
+
function isVicunaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
1066
|
+
return modelOrAssistantMessageHas(message, model, ["vicuna"]);
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
// WizardLM / WizardCoder
|
|
1070
|
+
function isWizardLikeModel(model: PiModel | undefined): boolean {
|
|
1071
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["wizardlm", "wizard-lm", "wizardcoder", "wizard-coder"]);
|
|
1072
|
+
}
|
|
1073
|
+
function isWizardLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
1074
|
+
return modelOrAssistantMessageHas(message, model, ["wizardlm", "wizard-lm", "wizardcoder", "wizard-coder"]);
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
// Zephyr
|
|
1078
|
+
function isZephyrLikeModel(model: PiModel | undefined): boolean {
|
|
1079
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["zephyr"]);
|
|
1080
|
+
}
|
|
1081
|
+
function isZephyrLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
1082
|
+
return modelOrAssistantMessageHas(message, model, ["zephyr"]);
|
|
1083
|
+
}
|
|
1084
|
+
|
|
1085
|
+
// Dolphin
|
|
1086
|
+
function isDolphinLikeModel(model: PiModel | undefined): boolean {
|
|
1087
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["dolphin"]);
|
|
1088
|
+
}
|
|
1089
|
+
function isDolphinLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
1090
|
+
return modelOrAssistantMessageHas(message, model, ["dolphin"]);
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
// OpenOrca
|
|
1094
|
+
function isOpenOrcaLikeModel(model: PiModel | undefined): boolean {
|
|
1095
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["openorca", "open-orca"]);
|
|
1096
|
+
}
|
|
1097
|
+
function isOpenOrcaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
1098
|
+
return modelOrAssistantMessageHas(message, model, ["openorca", "open-orca"]);
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
// Starling
|
|
1102
|
+
function isStarlingLikeModel(model: PiModel | undefined): boolean {
|
|
1103
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["starling"]);
|
|
1104
|
+
}
|
|
1105
|
+
function isStarlingLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
1106
|
+
return modelOrAssistantMessageHas(message, model, ["starling"]);
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
// BLOOM / BigScience
|
|
1110
|
+
function isBloomLikeModel(model: PiModel | undefined): boolean {
|
|
1111
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["bloom", "bigscience"]);
|
|
1112
|
+
}
|
|
1113
|
+
function isBloomLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
1114
|
+
return modelOrAssistantMessageHas(message, model, ["bloom", "bigscience"]);
|
|
1115
|
+
}
|
|
1116
|
+
|
|
1117
|
+
// RWKV
|
|
1118
|
+
function isRwkvLikeModel(model: PiModel | undefined): boolean {
|
|
1119
|
+
return hasAnyTokenContaining(getModelIdNameTokenValues(model), ["rwkv"]);
|
|
1120
|
+
}
|
|
1121
|
+
function isRwkvLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
1122
|
+
return modelOrAssistantMessageHas(message, model, ["rwkv"]);
|
|
1123
|
+
}
|
|
1124
|
+
|
|
1125
|
+
// Cohere Aya
|
|
1126
|
+
function isAyaLikeModel(model: PiModel | undefined): boolean {
|
|
1127
|
+
const tokens = getModelIdNameTokenValues(model);
|
|
1128
|
+
return hasAnyTokenContaining(tokens, ["aya-expanse"]) || tokens.some((t) => AYA_MODEL_PATTERN.test(t));
|
|
1129
|
+
}
|
|
1130
|
+
function isAyaLikeAssistantMessage(message: unknown, model: PiModel | undefined): boolean {
|
|
1131
|
+
const allTokens = [
|
|
1132
|
+
...getModelIdNameTokenValues(model),
|
|
1133
|
+
...getAssistantMessageModelTokenValues(message),
|
|
1134
|
+
];
|
|
1135
|
+
return hasAnyTokenContaining(allTokens, ["aya-expanse"]) || allTokens.some((t) => AYA_MODEL_PATTERN.test(t));
|
|
1136
|
+
}
|
|
1137
|
+
|
|
850
1138
|
// ── Model key ──────────────────────────────────────────────────────
|
|
851
1139
|
|
|
852
1140
|
function modelKey(model: PiModel): string {
|
|
@@ -1594,134 +1882,646 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
|
|
|
1594
1882
|
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1595
1883
|
},
|
|
1596
1884
|
},
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
}
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
|
|
1611
|
-
)
|
|
1612
|
-
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
}
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
|
|
1636
|
-
|
|
1637
|
-
|
|
1638
|
-
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
}
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
|
|
1668
|
-
: ""
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1885
|
+
// ── New OpenAI-compatible adapters (batch 3, 12 families) ────────
|
|
1886
|
+
{
|
|
1887
|
+
id: "openai" as CacheProviderId,
|
|
1888
|
+
label: "Sonar cache",
|
|
1889
|
+
matchesModel: isPerplexityLikeModel,
|
|
1890
|
+
matchesAssistantMessage(message, model) {
|
|
1891
|
+
if (!isAssistantMessage(message)) return false;
|
|
1892
|
+
return isPerplexityLikeAssistantMessage(message, model);
|
|
1893
|
+
},
|
|
1894
|
+
normalizeUsage(message) {
|
|
1895
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1896
|
+
},
|
|
1897
|
+
warningText(model) {
|
|
1898
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1899
|
+
if (missing.length === 0) return undefined;
|
|
1900
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1901
|
+
},
|
|
1902
|
+
},
|
|
1903
|
+
{
|
|
1904
|
+
id: "openai" as CacheProviderId,
|
|
1905
|
+
label: "Nova cache",
|
|
1906
|
+
matchesModel: isNovaLikeModel,
|
|
1907
|
+
matchesAssistantMessage(message, model) {
|
|
1908
|
+
if (!isAssistantMessage(message)) return false;
|
|
1909
|
+
return isNovaLikeAssistantMessage(message, model);
|
|
1910
|
+
},
|
|
1911
|
+
normalizeUsage(message) {
|
|
1912
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1913
|
+
},
|
|
1914
|
+
warningText(model) {
|
|
1915
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1916
|
+
if (missing.length === 0) return undefined;
|
|
1917
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1918
|
+
},
|
|
1919
|
+
},
|
|
1920
|
+
{
|
|
1921
|
+
id: "openai" as CacheProviderId,
|
|
1922
|
+
label: "Reka cache",
|
|
1923
|
+
matchesModel: isRekaLikeModel,
|
|
1924
|
+
matchesAssistantMessage(message, model) {
|
|
1925
|
+
if (!isAssistantMessage(message)) return false;
|
|
1926
|
+
return isRekaLikeAssistantMessage(message, model);
|
|
1927
|
+
},
|
|
1928
|
+
normalizeUsage(message) {
|
|
1929
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1930
|
+
},
|
|
1931
|
+
warningText(model) {
|
|
1932
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1933
|
+
if (missing.length === 0) return undefined;
|
|
1934
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1935
|
+
},
|
|
1936
|
+
},
|
|
1937
|
+
{
|
|
1938
|
+
id: "openai" as CacheProviderId,
|
|
1939
|
+
label: "Falcon cache",
|
|
1940
|
+
matchesModel: isFalconLikeModel,
|
|
1941
|
+
matchesAssistantMessage(message, model) {
|
|
1942
|
+
if (!isAssistantMessage(message)) return false;
|
|
1943
|
+
return isFalconLikeAssistantMessage(message, model);
|
|
1944
|
+
},
|
|
1945
|
+
normalizeUsage(message) {
|
|
1946
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1947
|
+
},
|
|
1948
|
+
warningText(model) {
|
|
1949
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1950
|
+
if (missing.length === 0) return undefined;
|
|
1951
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1952
|
+
},
|
|
1953
|
+
},
|
|
1954
|
+
{
|
|
1955
|
+
id: "openai" as CacheProviderId,
|
|
1956
|
+
label: "DBRX cache",
|
|
1957
|
+
matchesModel: isDbrxLikeModel,
|
|
1958
|
+
matchesAssistantMessage(message, model) {
|
|
1959
|
+
if (!isAssistantMessage(message)) return false;
|
|
1960
|
+
return isDbrxLikeAssistantMessage(message, model);
|
|
1961
|
+
},
|
|
1962
|
+
normalizeUsage(message) {
|
|
1963
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1964
|
+
},
|
|
1965
|
+
warningText(model) {
|
|
1966
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1967
|
+
if (missing.length === 0) return undefined;
|
|
1968
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1969
|
+
},
|
|
1970
|
+
},
|
|
1971
|
+
{
|
|
1972
|
+
id: "openai" as CacheProviderId,
|
|
1973
|
+
label: "MPT cache",
|
|
1974
|
+
matchesModel: isMptLikeModel,
|
|
1975
|
+
matchesAssistantMessage(message, model) {
|
|
1976
|
+
if (!isAssistantMessage(message)) return false;
|
|
1977
|
+
return isMptLikeAssistantMessage(message, model);
|
|
1978
|
+
},
|
|
1979
|
+
normalizeUsage(message) {
|
|
1980
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1981
|
+
},
|
|
1982
|
+
warningText(model) {
|
|
1983
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1984
|
+
if (missing.length === 0) return undefined;
|
|
1985
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
1986
|
+
},
|
|
1987
|
+
},
|
|
1988
|
+
{
|
|
1989
|
+
id: "openai" as CacheProviderId,
|
|
1990
|
+
label: "StableLM cache",
|
|
1991
|
+
matchesModel: isStableLMLikeModel,
|
|
1992
|
+
matchesAssistantMessage(message, model) {
|
|
1993
|
+
if (!isAssistantMessage(message)) return false;
|
|
1994
|
+
return isStableLMLikeAssistantMessage(message, model);
|
|
1995
|
+
},
|
|
1996
|
+
normalizeUsage(message) {
|
|
1997
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
1998
|
+
},
|
|
1999
|
+
warningText(model) {
|
|
2000
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2001
|
+
if (missing.length === 0) return undefined;
|
|
2002
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2003
|
+
},
|
|
2004
|
+
},
|
|
2005
|
+
{
|
|
2006
|
+
id: "openai" as CacheProviderId,
|
|
2007
|
+
label: "Aquila cache",
|
|
2008
|
+
matchesModel: isAquilaLikeModel,
|
|
2009
|
+
matchesAssistantMessage(message, model) {
|
|
2010
|
+
if (!isAssistantMessage(message)) return false;
|
|
2011
|
+
return isAquilaLikeAssistantMessage(message, model);
|
|
2012
|
+
},
|
|
2013
|
+
normalizeUsage(message) {
|
|
2014
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2015
|
+
},
|
|
2016
|
+
warningText(model) {
|
|
2017
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2018
|
+
if (missing.length === 0) return undefined;
|
|
2019
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2020
|
+
},
|
|
2021
|
+
},
|
|
2022
|
+
{
|
|
2023
|
+
id: "openai" as CacheProviderId,
|
|
2024
|
+
label: "EXAONE cache",
|
|
2025
|
+
matchesModel: isExaoneLikeModel,
|
|
2026
|
+
matchesAssistantMessage(message, model) {
|
|
2027
|
+
if (!isAssistantMessage(message)) return false;
|
|
2028
|
+
return isExaoneLikeAssistantMessage(message, model);
|
|
2029
|
+
},
|
|
2030
|
+
normalizeUsage(message) {
|
|
2031
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2032
|
+
},
|
|
2033
|
+
warningText(model) {
|
|
2034
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2035
|
+
if (missing.length === 0) return undefined;
|
|
2036
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2037
|
+
},
|
|
2038
|
+
},
|
|
2039
|
+
{
|
|
2040
|
+
id: "openai" as CacheProviderId,
|
|
2041
|
+
label: "HyperCLOVA cache",
|
|
2042
|
+
matchesModel: isHyperCLOVALikeModel,
|
|
2043
|
+
matchesAssistantMessage(message, model) {
|
|
2044
|
+
if (!isAssistantMessage(message)) return false;
|
|
2045
|
+
return isHyperCLOVALikeAssistantMessage(message, model);
|
|
2046
|
+
},
|
|
2047
|
+
normalizeUsage(message) {
|
|
2048
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2049
|
+
},
|
|
2050
|
+
warningText(model) {
|
|
2051
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2052
|
+
if (missing.length === 0) return undefined;
|
|
2053
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2054
|
+
},
|
|
2055
|
+
},
|
|
2056
|
+
{
|
|
2057
|
+
id: "openai" as CacheProviderId,
|
|
2058
|
+
label: "Luminous cache",
|
|
2059
|
+
matchesModel: isLuminousLikeModel,
|
|
2060
|
+
matchesAssistantMessage(message, model) {
|
|
2061
|
+
if (!isAssistantMessage(message)) return false;
|
|
2062
|
+
return isLuminousLikeAssistantMessage(message, model);
|
|
2063
|
+
},
|
|
2064
|
+
normalizeUsage(message) {
|
|
2065
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2066
|
+
},
|
|
2067
|
+
warningText(model) {
|
|
2068
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2069
|
+
if (missing.length === 0) return undefined;
|
|
2070
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2071
|
+
},
|
|
2072
|
+
},
|
|
2073
|
+
{
|
|
2074
|
+
id: "openai" as CacheProviderId,
|
|
2075
|
+
label: "Hermes cache",
|
|
2076
|
+
matchesModel: isHermesLikeModel,
|
|
2077
|
+
matchesAssistantMessage(message, model) {
|
|
2078
|
+
if (!isAssistantMessage(message)) return false;
|
|
2079
|
+
return isHermesLikeAssistantMessage(message, model);
|
|
2080
|
+
},
|
|
2081
|
+
normalizeUsage(message) {
|
|
2082
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2083
|
+
},
|
|
2084
|
+
warningText(model) {
|
|
2085
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2086
|
+
if (missing.length === 0) return undefined;
|
|
2087
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2088
|
+
},
|
|
2089
|
+
},
|
|
2090
|
+
// ── More OpenAI-compatible adapters (batch 4, 18 families) ────────
|
|
2091
|
+
{
|
|
2092
|
+
id: "openai" as CacheProviderId,
|
|
2093
|
+
label: "Granite cache",
|
|
2094
|
+
matchesModel: isGraniteLikeModel,
|
|
2095
|
+
matchesAssistantMessage(message, model) {
|
|
2096
|
+
if (!isAssistantMessage(message)) return false;
|
|
2097
|
+
return isGraniteLikeAssistantMessage(message, model);
|
|
2098
|
+
},
|
|
2099
|
+
normalizeUsage(message) {
|
|
2100
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2101
|
+
},
|
|
2102
|
+
warningText(model) {
|
|
2103
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2104
|
+
if (missing.length === 0) return undefined;
|
|
2105
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2106
|
+
},
|
|
2107
|
+
},
|
|
2108
|
+
{
|
|
2109
|
+
id: "openai" as CacheProviderId,
|
|
2110
|
+
label: "Arctic cache",
|
|
2111
|
+
matchesModel: isArcticLikeModel,
|
|
2112
|
+
matchesAssistantMessage(message, model) {
|
|
2113
|
+
if (!isAssistantMessage(message)) return false;
|
|
2114
|
+
return isArcticLikeAssistantMessage(message, model);
|
|
2115
|
+
},
|
|
2116
|
+
normalizeUsage(message) {
|
|
2117
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2118
|
+
},
|
|
2119
|
+
warningText(model) {
|
|
2120
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2121
|
+
if (missing.length === 0) return undefined;
|
|
2122
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2123
|
+
},
|
|
2124
|
+
},
|
|
2125
|
+
{
|
|
2126
|
+
id: "openai" as CacheProviderId,
|
|
2127
|
+
label: "Pangu cache",
|
|
2128
|
+
matchesModel: isPanguLikeModel,
|
|
2129
|
+
matchesAssistantMessage(message, model) {
|
|
2130
|
+
if (!isAssistantMessage(message)) return false;
|
|
2131
|
+
return isPanguLikeAssistantMessage(message, model);
|
|
2132
|
+
},
|
|
2133
|
+
normalizeUsage(message) {
|
|
2134
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2135
|
+
},
|
|
2136
|
+
warningText(model) {
|
|
2137
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2138
|
+
if (missing.length === 0) return undefined;
|
|
2139
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2140
|
+
},
|
|
2141
|
+
},
|
|
2142
|
+
{
|
|
2143
|
+
id: "openai" as CacheProviderId,
|
|
2144
|
+
label: "SenseNova cache",
|
|
2145
|
+
matchesModel: isSenseNovaLikeModel,
|
|
2146
|
+
matchesAssistantMessage(message, model) {
|
|
2147
|
+
if (!isAssistantMessage(message)) return false;
|
|
2148
|
+
return isSenseNovaLikeAssistantMessage(message, model);
|
|
2149
|
+
},
|
|
2150
|
+
normalizeUsage(message) {
|
|
2151
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2152
|
+
},
|
|
2153
|
+
warningText(model) {
|
|
2154
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2155
|
+
if (missing.length === 0) return undefined;
|
|
2156
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2157
|
+
},
|
|
2158
|
+
},
|
|
2159
|
+
{
|
|
2160
|
+
id: "openai" as CacheProviderId,
|
|
2161
|
+
label: "Zhinao cache",
|
|
2162
|
+
matchesModel: isZhinaoLikeModel,
|
|
2163
|
+
matchesAssistantMessage(message, model) {
|
|
2164
|
+
if (!isAssistantMessage(message)) return false;
|
|
2165
|
+
return isZhinaoLikeAssistantMessage(message, model);
|
|
2166
|
+
},
|
|
2167
|
+
normalizeUsage(message) {
|
|
2168
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2169
|
+
},
|
|
2170
|
+
warningText(model) {
|
|
2171
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2172
|
+
if (missing.length === 0) return undefined;
|
|
2173
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2174
|
+
},
|
|
2175
|
+
},
|
|
2176
|
+
{
|
|
2177
|
+
id: "openai" as CacheProviderId,
|
|
2178
|
+
label: "MiniCPM cache",
|
|
2179
|
+
matchesModel: isMiniCPMLikeModel,
|
|
2180
|
+
matchesAssistantMessage(message, model) {
|
|
2181
|
+
if (!isAssistantMessage(message)) return false;
|
|
2182
|
+
return isMiniCPMLikeAssistantMessage(message, model);
|
|
2183
|
+
},
|
|
2184
|
+
normalizeUsage(message) {
|
|
2185
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2186
|
+
},
|
|
2187
|
+
warningText(model) {
|
|
2188
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2189
|
+
if (missing.length === 0) return undefined;
|
|
2190
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2191
|
+
},
|
|
2192
|
+
},
|
|
2193
|
+
{
|
|
2194
|
+
id: "openai" as CacheProviderId,
|
|
2195
|
+
label: "XVERSE cache",
|
|
2196
|
+
matchesModel: isXVerseLikeModel,
|
|
2197
|
+
matchesAssistantMessage(message, model) {
|
|
2198
|
+
if (!isAssistantMessage(message)) return false;
|
|
2199
|
+
return isXVerseLikeAssistantMessage(message, model);
|
|
2200
|
+
},
|
|
2201
|
+
normalizeUsage(message) {
|
|
2202
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2203
|
+
},
|
|
2204
|
+
warningText(model) {
|
|
2205
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2206
|
+
if (missing.length === 0) return undefined;
|
|
2207
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2208
|
+
},
|
|
2209
|
+
},
|
|
2210
|
+
{
|
|
2211
|
+
id: "openai" as CacheProviderId,
|
|
2212
|
+
label: "Orion cache",
|
|
2213
|
+
matchesModel: isOrionLikeModel,
|
|
2214
|
+
matchesAssistantMessage(message, model) {
|
|
2215
|
+
if (!isAssistantMessage(message)) return false;
|
|
2216
|
+
return isOrionLikeAssistantMessage(message, model);
|
|
2217
|
+
},
|
|
2218
|
+
normalizeUsage(message) {
|
|
2219
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2220
|
+
},
|
|
2221
|
+
warningText(model) {
|
|
2222
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2223
|
+
if (missing.length === 0) return undefined;
|
|
2224
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2225
|
+
},
|
|
2226
|
+
},
|
|
2227
|
+
{
|
|
2228
|
+
id: "openai" as CacheProviderId,
|
|
2229
|
+
label: "OpenChat cache",
|
|
2230
|
+
matchesModel: isOpenChatLikeModel,
|
|
2231
|
+
matchesAssistantMessage(message, model) {
|
|
2232
|
+
if (!isAssistantMessage(message)) return false;
|
|
2233
|
+
return isOpenChatLikeAssistantMessage(message, model);
|
|
2234
|
+
},
|
|
2235
|
+
normalizeUsage(message) {
|
|
2236
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2237
|
+
},
|
|
2238
|
+
warningText(model) {
|
|
2239
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2240
|
+
if (missing.length === 0) return undefined;
|
|
2241
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2242
|
+
},
|
|
2243
|
+
},
|
|
2244
|
+
{
|
|
2245
|
+
id: "openai" as CacheProviderId,
|
|
2246
|
+
label: "Vicuna cache",
|
|
2247
|
+
matchesModel: isVicunaLikeModel,
|
|
2248
|
+
matchesAssistantMessage(message, model) {
|
|
2249
|
+
if (!isAssistantMessage(message)) return false;
|
|
2250
|
+
return isVicunaLikeAssistantMessage(message, model);
|
|
2251
|
+
},
|
|
2252
|
+
normalizeUsage(message) {
|
|
2253
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2254
|
+
},
|
|
2255
|
+
warningText(model) {
|
|
2256
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2257
|
+
if (missing.length === 0) return undefined;
|
|
2258
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2259
|
+
},
|
|
2260
|
+
},
|
|
2261
|
+
{
|
|
2262
|
+
id: "openai" as CacheProviderId,
|
|
2263
|
+
label: "Wizard cache",
|
|
2264
|
+
matchesModel: isWizardLikeModel,
|
|
2265
|
+
matchesAssistantMessage(message, model) {
|
|
2266
|
+
if (!isAssistantMessage(message)) return false;
|
|
2267
|
+
return isWizardLikeAssistantMessage(message, model);
|
|
2268
|
+
},
|
|
2269
|
+
normalizeUsage(message) {
|
|
2270
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2271
|
+
},
|
|
2272
|
+
warningText(model) {
|
|
2273
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2274
|
+
if (missing.length === 0) return undefined;
|
|
2275
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2276
|
+
},
|
|
2277
|
+
},
|
|
2278
|
+
{
|
|
2279
|
+
id: "openai" as CacheProviderId,
|
|
2280
|
+
label: "Zephyr cache",
|
|
2281
|
+
matchesModel: isZephyrLikeModel,
|
|
2282
|
+
matchesAssistantMessage(message, model) {
|
|
2283
|
+
if (!isAssistantMessage(message)) return false;
|
|
2284
|
+
return isZephyrLikeAssistantMessage(message, model);
|
|
2285
|
+
},
|
|
2286
|
+
normalizeUsage(message) {
|
|
2287
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2288
|
+
},
|
|
2289
|
+
warningText(model) {
|
|
2290
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2291
|
+
if (missing.length === 0) return undefined;
|
|
2292
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2293
|
+
},
|
|
2294
|
+
},
|
|
2295
|
+
{
|
|
2296
|
+
id: "openai" as CacheProviderId,
|
|
2297
|
+
label: "Dolphin cache",
|
|
2298
|
+
matchesModel: isDolphinLikeModel,
|
|
2299
|
+
matchesAssistantMessage(message, model) {
|
|
2300
|
+
if (!isAssistantMessage(message)) return false;
|
|
2301
|
+
return isDolphinLikeAssistantMessage(message, model);
|
|
2302
|
+
},
|
|
2303
|
+
normalizeUsage(message) {
|
|
2304
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2305
|
+
},
|
|
2306
|
+
warningText(model) {
|
|
2307
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2308
|
+
if (missing.length === 0) return undefined;
|
|
2309
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2310
|
+
},
|
|
2311
|
+
},
|
|
2312
|
+
{
|
|
2313
|
+
id: "openai" as CacheProviderId,
|
|
2314
|
+
label: "OpenOrca cache",
|
|
2315
|
+
matchesModel: isOpenOrcaLikeModel,
|
|
2316
|
+
matchesAssistantMessage(message, model) {
|
|
2317
|
+
if (!isAssistantMessage(message)) return false;
|
|
2318
|
+
return isOpenOrcaLikeAssistantMessage(message, model);
|
|
2319
|
+
},
|
|
2320
|
+
normalizeUsage(message) {
|
|
2321
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2322
|
+
},
|
|
2323
|
+
warningText(model) {
|
|
2324
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2325
|
+
if (missing.length === 0) return undefined;
|
|
2326
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2327
|
+
},
|
|
2328
|
+
},
|
|
2329
|
+
{
|
|
2330
|
+
id: "openai" as CacheProviderId,
|
|
2331
|
+
label: "Starling cache",
|
|
2332
|
+
matchesModel: isStarlingLikeModel,
|
|
2333
|
+
matchesAssistantMessage(message, model) {
|
|
2334
|
+
if (!isAssistantMessage(message)) return false;
|
|
2335
|
+
return isStarlingLikeAssistantMessage(message, model);
|
|
2336
|
+
},
|
|
2337
|
+
normalizeUsage(message) {
|
|
2338
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2339
|
+
},
|
|
2340
|
+
warningText(model) {
|
|
2341
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2342
|
+
if (missing.length === 0) return undefined;
|
|
2343
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2344
|
+
},
|
|
2345
|
+
},
|
|
2346
|
+
{
|
|
2347
|
+
id: "openai" as CacheProviderId,
|
|
2348
|
+
label: "BLOOM cache",
|
|
2349
|
+
matchesModel: isBloomLikeModel,
|
|
2350
|
+
matchesAssistantMessage(message, model) {
|
|
2351
|
+
if (!isAssistantMessage(message)) return false;
|
|
2352
|
+
return isBloomLikeAssistantMessage(message, model);
|
|
2353
|
+
},
|
|
2354
|
+
normalizeUsage(message) {
|
|
2355
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2356
|
+
},
|
|
2357
|
+
warningText(model) {
|
|
2358
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2359
|
+
if (missing.length === 0) return undefined;
|
|
2360
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2361
|
+
},
|
|
2362
|
+
},
|
|
2363
|
+
{
|
|
2364
|
+
id: "openai" as CacheProviderId,
|
|
2365
|
+
label: "RWKV cache",
|
|
2366
|
+
matchesModel: isRwkvLikeModel,
|
|
2367
|
+
matchesAssistantMessage(message, model) {
|
|
2368
|
+
if (!isAssistantMessage(message)) return false;
|
|
2369
|
+
return isRwkvLikeAssistantMessage(message, model);
|
|
2370
|
+
},
|
|
2371
|
+
normalizeUsage(message) {
|
|
2372
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2373
|
+
},
|
|
2374
|
+
warningText(model) {
|
|
2375
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2376
|
+
if (missing.length === 0) return undefined;
|
|
2377
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2378
|
+
},
|
|
2379
|
+
},
|
|
2380
|
+
{
|
|
2381
|
+
id: "openai" as CacheProviderId,
|
|
2382
|
+
label: "Aya cache",
|
|
2383
|
+
matchesModel: isAyaLikeModel,
|
|
2384
|
+
matchesAssistantMessage(message, model) {
|
|
2385
|
+
if (!isAssistantMessage(message)) return false;
|
|
2386
|
+
return isAyaLikeAssistantMessage(message, model);
|
|
2387
|
+
},
|
|
2388
|
+
normalizeUsage(message) {
|
|
2389
|
+
return normalizeWithFallback(message, getOpenAIRawUsage);
|
|
2390
|
+
},
|
|
2391
|
+
warningText(model) {
|
|
2392
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2393
|
+
if (missing.length === 0) return undefined;
|
|
2394
|
+
return buildOpenAIProxyCompatWarningText(modelKey(model), missing);
|
|
2395
|
+
},
|
|
2396
|
+
},
|
|
2397
|
+
];
|
|
2398
|
+
|
|
2399
|
+
function selectAdapterForModel(model: PiModel | undefined): CacheProviderAdapter | undefined {
|
|
2400
|
+
return CACHE_PROVIDER_ADAPTERS.find((adapter) => adapter.matchesModel(model));
|
|
2401
|
+
}
|
|
2402
|
+
|
|
2403
|
+
function selectAdapterForAssistantMessage(message: unknown, model: PiModel | undefined): CacheProviderAdapter | undefined {
|
|
2404
|
+
return CACHE_PROVIDER_ADAPTERS.find((adapter) => adapter.matchesAssistantMessage(message, model));
|
|
2405
|
+
}
|
|
2406
|
+
|
|
2407
|
+
function notifyCacheCompatIfNeeded(
|
|
2408
|
+
model: PiModel | undefined,
|
|
2409
|
+
ctx: ExtensionContext,
|
|
2410
|
+
warnedModels: Set<string>,
|
|
2411
|
+
): void {
|
|
2412
|
+
if (!model) return;
|
|
2413
|
+
|
|
2414
|
+
const adapter = selectAdapterForModel(model);
|
|
2415
|
+
const text = adapter?.warningText?.(model);
|
|
2416
|
+
if (!adapter || !text) return;
|
|
2417
|
+
|
|
2418
|
+
const key = `${adapter.id}:${modelKey(model)}`;
|
|
2419
|
+
if (warnedModels.has(key)) return;
|
|
2420
|
+
warnedModels.add(key);
|
|
2421
|
+
|
|
2422
|
+
ctx.ui.notify(text, "warning");
|
|
2423
|
+
}
|
|
2424
|
+
|
|
2425
|
+
function currentLocalDay(): string {
|
|
2426
|
+
const now = new Date();
|
|
2427
|
+
const year = now.getFullYear();
|
|
2428
|
+
const month = String(now.getMonth() + 1).padStart(2, "0");
|
|
2429
|
+
const day = String(now.getDate()).padStart(2, "0");
|
|
2430
|
+
return `${year}-${month}-${day}`;
|
|
2431
|
+
}
|
|
2432
|
+
|
|
2433
|
+
function emptyCacheStats(day = currentLocalDay()): CacheStats {
|
|
2434
|
+
return {
|
|
2435
|
+
day,
|
|
2436
|
+
totalRequests: 0,
|
|
2437
|
+
hitRequests: 0,
|
|
2438
|
+
cachedInputTokens: 0,
|
|
2439
|
+
cacheWriteInputTokens: 0,
|
|
2440
|
+
totalInputTokens: 0,
|
|
2441
|
+
};
|
|
2442
|
+
}
|
|
2443
|
+
|
|
2444
|
+
function emptyAllCacheStats(day = currentLocalDay()): Partial<Record<CacheProviderId, CacheStats>> {
|
|
2445
|
+
return Object.fromEntries(CACHE_PROVIDER_IDS.map((id) => [id, emptyCacheStats(day)])) as Partial<Record<CacheProviderId, CacheStats>>;
|
|
2446
|
+
}
|
|
2447
|
+
|
|
2448
|
+
function addUsageToCacheStats(stats: CacheStats, usage: UsageSnapshot): void {
|
|
2449
|
+
stats.totalRequests += 1;
|
|
2450
|
+
if (usage.cacheRead > 0) stats.hitRequests += 1;
|
|
2451
|
+
stats.cachedInputTokens += usage.cacheRead;
|
|
2452
|
+
stats.cacheWriteInputTokens += usage.cacheWrite;
|
|
2453
|
+
stats.totalInputTokens += usage.totalInput;
|
|
2454
|
+
}
|
|
2455
|
+
|
|
2456
|
+
function formatTokenCount(value: number): string {
|
|
2457
|
+
const millions = Math.max(0, Math.round(value)) / 1_000_000;
|
|
2458
|
+
if (millions === 0) return "0M";
|
|
2459
|
+
if (millions < 0.001) return `${millions.toFixed(4)}M`;
|
|
2460
|
+
if (millions < 0.01) return `${millions.toFixed(3)}M`;
|
|
2461
|
+
if (millions >= 10) return `${millions.toFixed(1)}M`;
|
|
2462
|
+
return `${millions.toFixed(2)}M`;
|
|
2463
|
+
}
|
|
2464
|
+
|
|
2465
|
+
function formatCacheStats(adapter: CacheProviderAdapter, stats: CacheStats): string {
|
|
2466
|
+
const percent = stats.totalInputTokens > 0
|
|
2467
|
+
? ` (${Math.round((stats.cachedInputTokens / stats.totalInputTokens) * 100)}%)`
|
|
2468
|
+
: "";
|
|
2469
|
+
const writeText = adapter.showCacheWrite && stats.cacheWriteInputTokens > 0
|
|
2470
|
+
? ` · write ${formatTokenCount(stats.cacheWriteInputTokens)} tok`
|
|
2471
|
+
: "";
|
|
2472
|
+
|
|
2473
|
+
return `${adapter.label} ${stats.hitRequests}/${stats.totalRequests} · ${formatTokenCount(stats.cachedInputTokens)}/${formatTokenCount(stats.totalInputTokens)} tok${percent}${writeText}`;
|
|
2474
|
+
}
|
|
2475
|
+
|
|
2476
|
+
function getErrorCode(error: unknown): string | undefined {
|
|
2477
|
+
return typeof error === "object" && error !== null && "code" in error
|
|
2478
|
+
? String((error as { code?: unknown }).code)
|
|
2479
|
+
: undefined;
|
|
2480
|
+
}
|
|
2481
|
+
|
|
2482
|
+
function parseCacheStats(value: unknown): CacheStats | undefined {
|
|
2483
|
+
const stats = asRecord(value);
|
|
2484
|
+
if (!stats || typeof stats.day !== "string" || !/^\d{4}-\d{2}-\d{2}$/.test(stats.day)) {
|
|
2485
|
+
return undefined;
|
|
2486
|
+
}
|
|
2487
|
+
|
|
2488
|
+
const totalRequests = getNonNegativeNumber(stats, "totalRequests");
|
|
2489
|
+
const hitRequests = getNonNegativeNumber(stats, "hitRequests");
|
|
2490
|
+
const cachedInputTokens = getNonNegativeNumber(stats, "cachedInputTokens");
|
|
2491
|
+
const cacheWriteInputTokens = getNonNegativeNumber(stats, "cacheWriteInputTokens") ?? 0;
|
|
2492
|
+
const totalInputTokens = getNonNegativeNumber(stats, "totalInputTokens");
|
|
2493
|
+
|
|
2494
|
+
if (
|
|
2495
|
+
totalRequests === undefined ||
|
|
2496
|
+
hitRequests === undefined ||
|
|
2497
|
+
cachedInputTokens === undefined ||
|
|
2498
|
+
totalInputTokens === undefined ||
|
|
2499
|
+
hitRequests > totalRequests ||
|
|
2500
|
+
cachedInputTokens > totalInputTokens ||
|
|
2501
|
+
cacheWriteInputTokens > totalInputTokens
|
|
2502
|
+
) {
|
|
2503
|
+
return undefined;
|
|
2504
|
+
}
|
|
2505
|
+
|
|
2506
|
+
return {
|
|
2507
|
+
day: stats.day,
|
|
2508
|
+
totalRequests,
|
|
2509
|
+
hitRequests,
|
|
2510
|
+
cachedInputTokens,
|
|
2511
|
+
cacheWriteInputTokens,
|
|
2512
|
+
totalInputTokens,
|
|
2513
|
+
};
|
|
2514
|
+
}
|
|
2515
|
+
|
|
2516
|
+
function parsePersistedCacheStats(value: unknown): CacheStatsState | undefined {
|
|
2517
|
+
const record = asRecord(value);
|
|
2518
|
+
if (!record) return undefined;
|
|
2519
|
+
|
|
2520
|
+
// version 3: model-scoped stats + legacy family fallback
|
|
2521
|
+
if (record.version === 3) {
|
|
2522
|
+
const statsByModel: Record<string, CacheStats> = {};
|
|
2523
|
+
const rawModelMap = asRecord(record.statsByModel);
|
|
2524
|
+
if (rawModelMap) {
|
|
1725
2525
|
for (const [key, val] of Object.entries(rawModelMap)) {
|
|
1726
2526
|
const parsed = parseCacheStats(val);
|
|
1727
2527
|
if (parsed) statsByModel[key] = parsed;
|
|
@@ -1821,6 +2621,171 @@ function isCompatCheckApplicable(model: PiModel): boolean {
|
|
|
1821
2621
|
return lower(model.api) === "openai-completions" && !isOfficialOpenAIBaseUrl(model);
|
|
1822
2622
|
}
|
|
1823
2623
|
|
|
2624
|
+
/**
|
|
2625
|
+
* Detect router / channel profiles from a PiModel and return diagnostic notes.
|
|
2626
|
+
*
|
|
2627
|
+
* This function is advisory only — it does NOT participate in adapter selection,
|
|
2628
|
+
* prompt_cache_key injection, or footer stats. It inspects provider, api, baseUrl,
|
|
2629
|
+
* and compat to identify common proxy/router patterns where cache performance may
|
|
2630
|
+
* be degraded due to multi-backend routing.
|
|
2631
|
+
*
|
|
2632
|
+
* Known profiles (checked in order):
|
|
2633
|
+
* 1. OpenRouter — baseUrl or provider id matching openrouter.ai / openrouter
|
|
2634
|
+
* 2. Vercel AI Gateway — baseUrl matching ai-gateway.vercel.sh, or provider
|
|
2635
|
+
* matching vercel / vercel-ai-gateway
|
|
2636
|
+
* 3. LiteLLM / OneAPI / NewAPI / VoAPI — baseUrl or provider matching litellm,
|
|
2637
|
+
* oneapi, one-api, newapi, new-api, voapi, vo-api (self-hosted aggregation)
|
|
2638
|
+
* 4. Generic third-party OpenAI-compatible proxy — any openai-completions model
|
|
2639
|
+
* with a non-official base URL that does not match a higher-profile above.
|
|
2640
|
+
*
|
|
2641
|
+
* Official OpenAI (api.openai.com) and custom transports (kiro-api, anthropic-messages,
|
|
2642
|
+
* bedrock-converse-stream) do NOT produce notes.
|
|
2643
|
+
*/
|
|
2644
|
+
function describeRouterChannelDiagnostics(model: PiModel): string[] {
|
|
2645
|
+
const notes: string[] = [];
|
|
2646
|
+
const api = lower(model.api);
|
|
2647
|
+
const baseUrl = lower(model.baseUrl || "");
|
|
2648
|
+
const provider = lower(model.provider);
|
|
2649
|
+
|
|
2650
|
+
// Only OpenAI-compatible APIs are applicable for router/channel diagnostics.
|
|
2651
|
+
// Custom transports like kiro-api, anthropic-messages, bedrock-converse-stream
|
|
2652
|
+
// or non-OpenAI APIs are excluded.
|
|
2653
|
+
if (api !== "openai-completions" && api !== "openai-responses") {
|
|
2654
|
+
return notes;
|
|
2655
|
+
}
|
|
2656
|
+
|
|
2657
|
+
// Official OpenAI bypass — no notes needed.
|
|
2658
|
+
if (isOfficialOpenAIBaseUrl(model)) {
|
|
2659
|
+
return notes;
|
|
2660
|
+
}
|
|
2661
|
+
|
|
2662
|
+
// ── 1. OpenRouter ────────────────────────────────────────────────
|
|
2663
|
+
if (
|
|
2664
|
+
baseUrl.includes("openrouter.ai") ||
|
|
2665
|
+
baseUrl.includes("openrouter") ||
|
|
2666
|
+
provider.includes("openrouter")
|
|
2667
|
+
) {
|
|
2668
|
+
const compat = getCompat(model);
|
|
2669
|
+
const hasOnly = !!(compat as Record<string, unknown>)["openRouterRouting"]?.only;
|
|
2670
|
+
const hasOrder = !!(compat as Record<string, unknown>)["openRouterRouting"]?.order;
|
|
2671
|
+
|
|
2672
|
+
notes.push(
|
|
2673
|
+
"🔀 Router/channel: OpenRouter detected. OpenRouter is a multi-provider router; " +
|
|
2674
|
+
"low cache hit rates are common when each turn lands on a different upstream provider.",
|
|
2675
|
+
);
|
|
2676
|
+
|
|
2677
|
+
if (!hasOnly && !hasOrder) {
|
|
2678
|
+
notes.push(
|
|
2679
|
+
" Suggestion: Add an openRouterRouting config to fix the upstream provider. " +
|
|
2680
|
+
"Example for models.json -> providers[\"<providerId>\"] -> compat:",
|
|
2681
|
+
);
|
|
2682
|
+
notes.push(
|
|
2683
|
+
` { "sendSessionAffinityHeaders": true, "supportsLongCacheRetention": true, ` +
|
|
2684
|
+
`"openRouterRouting": { "only": ["<provider-slug>"] } }`,
|
|
2685
|
+
);
|
|
2686
|
+
notes.push(
|
|
2687
|
+
' Replace <provider-slug> with the actual OpenRouter provider slug (e.g. "openai", "anthropic").',
|
|
2688
|
+
);
|
|
2689
|
+
notes.push(
|
|
2690
|
+
" Alternatively, use openRouterRouting.order: [\"<provider-slug>\", \"...\"] for fallback order. " +
|
|
2691
|
+
"Only set supportsLongCacheRetention if your upstream supports long cache retention.",
|
|
2692
|
+
);
|
|
2693
|
+
}
|
|
2694
|
+
|
|
2695
|
+
return notes;
|
|
2696
|
+
}
|
|
2697
|
+
|
|
2698
|
+
// ── 2. Vercel AI Gateway ─────────────────────────────────────────
|
|
2699
|
+
if (
|
|
2700
|
+
baseUrl.includes("ai-gateway.vercel.sh") ||
|
|
2701
|
+
provider.includes("vercel") ||
|
|
2702
|
+
provider.includes("vercel-ai-gateway")
|
|
2703
|
+
) {
|
|
2704
|
+
const compat = getCompat(model);
|
|
2705
|
+
const hasOnly = !!(compat as Record<string, unknown>)["vercelGatewayRouting"]?.only;
|
|
2706
|
+
const hasOrder = !!(compat as Record<string, unknown>)["vercelGatewayRouting"]?.order;
|
|
2707
|
+
|
|
2708
|
+
notes.push(
|
|
2709
|
+
"🔀 Router/channel: Vercel AI Gateway detected. The gateway may route to different " +
|
|
2710
|
+
"provider endpoints per request, reducing cache locality.",
|
|
2711
|
+
);
|
|
2712
|
+
|
|
2713
|
+
if (!hasOnly && !hasOrder) {
|
|
2714
|
+
notes.push(
|
|
2715
|
+
" Suggestion: Add a vercelGatewayRouting config to fix the upstream. " +
|
|
2716
|
+
"Example for models.json -> providers[\"<providerId>\"] -> compat:",
|
|
2717
|
+
);
|
|
2718
|
+
notes.push(
|
|
2719
|
+
` { "sendSessionAffinityHeaders": true, "supportsLongCacheRetention": true, ` +
|
|
2720
|
+
`"vercelGatewayRouting": { "only": ["<provider-id>"] } }`,
|
|
2721
|
+
);
|
|
2722
|
+
notes.push(
|
|
2723
|
+
" Replace <provider-id> with the actual Vercel provider ID (e.g. \"openai\").",
|
|
2724
|
+
);
|
|
2725
|
+
notes.push(
|
|
2726
|
+
" Only set supportsLongCacheRetention if your upstream supports it.",
|
|
2727
|
+
);
|
|
2728
|
+
}
|
|
2729
|
+
|
|
2730
|
+
return notes;
|
|
2731
|
+
}
|
|
2732
|
+
|
|
2733
|
+
// ── 3. LiteLLM / OneAPI / NewAPI / VoAPI (self-hosted aggregation) ──
|
|
2734
|
+
const aggregationPatterns = ["litellm", "oneapi", "one-api", "newapi", "new-api", "voapi", "vo-api"];
|
|
2735
|
+
if (
|
|
2736
|
+
aggregationPatterns.some((p) => baseUrl.includes(p)) ||
|
|
2737
|
+
aggregationPatterns.some((p) => provider.includes(p))
|
|
2738
|
+
) {
|
|
2739
|
+
notes.push(
|
|
2740
|
+
"🔀 Router/channel: Self-hosted aggregation proxy detected (LiteLLM / OneAPI / NewAPI / VoAPI). " +
|
|
2741
|
+
"These proxies route to multiple upstream accounts or instances, which can split the cache.",
|
|
2742
|
+
);
|
|
2743
|
+
notes.push(
|
|
2744
|
+
" Suggestions:",
|
|
2745
|
+
);
|
|
2746
|
+
notes.push(
|
|
2747
|
+
" • Ensure the proxy can fix to a single upstream per session (session_id affinity).",
|
|
2748
|
+
);
|
|
2749
|
+
notes.push(
|
|
2750
|
+
" • Forward prompt_cache_key and session-affinity headers to the upstream.",
|
|
2751
|
+
);
|
|
2752
|
+
notes.push(
|
|
2753
|
+
" • Return cache usage fields (prompt_cache_hit_tokens, etc.) in the response.",
|
|
2754
|
+
);
|
|
2755
|
+
notes.push(
|
|
2756
|
+
` Example compat: { "sendSessionAffinityHeaders": true, "supportsLongCacheRetention": true }`,
|
|
2757
|
+
);
|
|
2758
|
+
|
|
2759
|
+
return notes;
|
|
2760
|
+
}
|
|
2761
|
+
|
|
2762
|
+
// ── 4. Generic third-party OpenAI-compatible proxy ─────────────────
|
|
2763
|
+
if (api === "openai-completions" && baseUrl) {
|
|
2764
|
+
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
2765
|
+
notes.push(
|
|
2766
|
+
"🔀 Router/channel: Third-party OpenAI-compatible proxy. If cache hit rates are low:",
|
|
2767
|
+
);
|
|
2768
|
+
notes.push(
|
|
2769
|
+
" • Verify the proxy routes to the same upstream account/instance per session.",
|
|
2770
|
+
);
|
|
2771
|
+
notes.push(
|
|
2772
|
+
" • Ensure the proxy forwards prompt_cache_key and sends session-affinity headers.",
|
|
2773
|
+
);
|
|
2774
|
+
notes.push(
|
|
2775
|
+
" • Check that the proxy returns cache usage fields (prompt_cache_hit_tokens etc.).",
|
|
2776
|
+
);
|
|
2777
|
+
if (missing.length > 0) {
|
|
2778
|
+
notes.push(
|
|
2779
|
+
` • The compat flags above (${missing.join(", ")}) are recommended for cache stability.`,
|
|
2780
|
+
);
|
|
2781
|
+
}
|
|
2782
|
+
|
|
2783
|
+
return notes;
|
|
2784
|
+
}
|
|
2785
|
+
|
|
2786
|
+
return notes;
|
|
2787
|
+
}
|
|
2788
|
+
|
|
1824
2789
|
function buildDoctorDiagnosis(model: PiModel): string {
|
|
1825
2790
|
const lines: string[] = [];
|
|
1826
2791
|
lines.push(`Provider: ${model.provider}`);
|
|
@@ -1848,6 +2813,15 @@ function buildDoctorDiagnosis(model: PiModel): string {
|
|
|
1848
2813
|
lines.push("ℹ️ Compat check not applicable for this model.");
|
|
1849
2814
|
}
|
|
1850
2815
|
|
|
2816
|
+
// ── Router/channel diagnostics ──
|
|
2817
|
+
const routerNotes = describeRouterChannelDiagnostics(model);
|
|
2818
|
+
if (routerNotes.length > 0) {
|
|
2819
|
+
lines.push("");
|
|
2820
|
+
for (const note of routerNotes) {
|
|
2821
|
+
lines.push(note);
|
|
2822
|
+
}
|
|
2823
|
+
}
|
|
2824
|
+
|
|
1851
2825
|
// ── Integrity diagnostics ──
|
|
1852
2826
|
if (lastPromptIntegrityWarningAt > 0) {
|
|
1853
2827
|
const ago = Date.now() - lastPromptIntegrityWarningAt;
|
|
@@ -1870,21 +2844,46 @@ function buildDoctorDiagnosis(model: PiModel): string {
|
|
|
1870
2844
|
|
|
1871
2845
|
function buildCompatDiagnosis(model: PiModel): string | undefined {
|
|
1872
2846
|
const missing = describeMissingOpenAICompatibleProxyCompat(model);
|
|
1873
|
-
|
|
2847
|
+
const routerNotes = describeRouterChannelDiagnostics(model);
|
|
2848
|
+
|
|
2849
|
+
if (missing.length === 0 && routerNotes.length === 0) return undefined;
|
|
1874
2850
|
|
|
1875
2851
|
const key = modelKey(model);
|
|
1876
|
-
const
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
`
|
|
1884
|
-
` (
|
|
1885
|
-
|
|
1886
|
-
`
|
|
1887
|
-
|
|
2852
|
+
const lines: string[] = [];
|
|
2853
|
+
|
|
2854
|
+
if (missing.length > 0) {
|
|
2855
|
+
const slashIdx = key.indexOf("/");
|
|
2856
|
+
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
2857
|
+
const suggestion = Object.fromEntries(missing.map((f) => [f, true]));
|
|
2858
|
+
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
2859
|
+
lines.push(`Active model: ${key}`);
|
|
2860
|
+
lines.push(`Missing: ${missing.join(", ")}`);
|
|
2861
|
+
lines.push("");
|
|
2862
|
+
lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
|
|
2863
|
+
lines.push(`(at the same level as baseUrl/api/apiKey/models) and add:`);
|
|
2864
|
+
lines.push(JSON.stringify(suggestion, null, 2));
|
|
2865
|
+
lines.push("");
|
|
2866
|
+
lines.push(`Only enable if your endpoint supports them.`);
|
|
2867
|
+
}
|
|
2868
|
+
|
|
2869
|
+
// When compat is fully configured but router notes exist, prefix the status.
|
|
2870
|
+
if (routerNotes.length > 0 && missing.length === 0) {
|
|
2871
|
+
if (isCompatCheckApplicable(model)) {
|
|
2872
|
+
lines.push("✅ Compat fully configured.");
|
|
2873
|
+
} else {
|
|
2874
|
+
lines.push("ℹ️ Compat check not applicable for this model.");
|
|
2875
|
+
}
|
|
2876
|
+
lines.push("");
|
|
2877
|
+
}
|
|
2878
|
+
|
|
2879
|
+
if (routerNotes.length > 0) {
|
|
2880
|
+
if (missing.length > 0) lines.push("");
|
|
2881
|
+
for (const note of routerNotes) {
|
|
2882
|
+
lines.push(note);
|
|
2883
|
+
}
|
|
2884
|
+
}
|
|
2885
|
+
|
|
2886
|
+
return lines.join("\n");
|
|
1888
2887
|
}
|
|
1889
2888
|
|
|
1890
2889
|
// Internal helpers exported only so the task verification script
|
|
@@ -1960,6 +2959,68 @@ export const __internals_for_tests = {
|
|
|
1960
2959
|
isJambaLikeAssistantMessage,
|
|
1961
2960
|
isSolarLikeModel,
|
|
1962
2961
|
isSolarLikeAssistantMessage,
|
|
2962
|
+
// New OpenAI-compatible model detection (batch 3, 12 families)
|
|
2963
|
+
isPerplexityLikeModel,
|
|
2964
|
+
isPerplexityLikeAssistantMessage,
|
|
2965
|
+
isNovaLikeModel,
|
|
2966
|
+
isNovaLikeAssistantMessage,
|
|
2967
|
+
isRekaLikeModel,
|
|
2968
|
+
isRekaLikeAssistantMessage,
|
|
2969
|
+
isFalconLikeModel,
|
|
2970
|
+
isFalconLikeAssistantMessage,
|
|
2971
|
+
isDbrxLikeModel,
|
|
2972
|
+
isDbrxLikeAssistantMessage,
|
|
2973
|
+
isMptLikeModel,
|
|
2974
|
+
isMptLikeAssistantMessage,
|
|
2975
|
+
isStableLMLikeModel,
|
|
2976
|
+
isStableLMLikeAssistantMessage,
|
|
2977
|
+
isAquilaLikeModel,
|
|
2978
|
+
isAquilaLikeAssistantMessage,
|
|
2979
|
+
isExaoneLikeModel,
|
|
2980
|
+
isExaoneLikeAssistantMessage,
|
|
2981
|
+
isHyperCLOVALikeModel,
|
|
2982
|
+
isHyperCLOVALikeAssistantMessage,
|
|
2983
|
+
isLuminousLikeModel,
|
|
2984
|
+
isLuminousLikeAssistantMessage,
|
|
2985
|
+
isHermesLikeModel,
|
|
2986
|
+
isHermesLikeAssistantMessage,
|
|
2987
|
+
// More OpenAI-compatible model detection (batch 4, 18 families)
|
|
2988
|
+
isGraniteLikeModel,
|
|
2989
|
+
isGraniteLikeAssistantMessage,
|
|
2990
|
+
isArcticLikeModel,
|
|
2991
|
+
isArcticLikeAssistantMessage,
|
|
2992
|
+
isPanguLikeModel,
|
|
2993
|
+
isPanguLikeAssistantMessage,
|
|
2994
|
+
isSenseNovaLikeModel,
|
|
2995
|
+
isSenseNovaLikeAssistantMessage,
|
|
2996
|
+
isZhinaoLikeModel,
|
|
2997
|
+
isZhinaoLikeAssistantMessage,
|
|
2998
|
+
isMiniCPMLikeModel,
|
|
2999
|
+
isMiniCPMLikeAssistantMessage,
|
|
3000
|
+
isXVerseLikeModel,
|
|
3001
|
+
isXVerseLikeAssistantMessage,
|
|
3002
|
+
isOrionLikeModel,
|
|
3003
|
+
isOrionLikeAssistantMessage,
|
|
3004
|
+
isOpenChatLikeModel,
|
|
3005
|
+
isOpenChatLikeAssistantMessage,
|
|
3006
|
+
isVicunaLikeModel,
|
|
3007
|
+
isVicunaLikeAssistantMessage,
|
|
3008
|
+
isWizardLikeModel,
|
|
3009
|
+
isWizardLikeAssistantMessage,
|
|
3010
|
+
isZephyrLikeModel,
|
|
3011
|
+
isZephyrLikeAssistantMessage,
|
|
3012
|
+
isDolphinLikeModel,
|
|
3013
|
+
isDolphinLikeAssistantMessage,
|
|
3014
|
+
isOpenOrcaLikeModel,
|
|
3015
|
+
isOpenOrcaLikeAssistantMessage,
|
|
3016
|
+
isStarlingLikeModel,
|
|
3017
|
+
isStarlingLikeAssistantMessage,
|
|
3018
|
+
isBloomLikeModel,
|
|
3019
|
+
isBloomLikeAssistantMessage,
|
|
3020
|
+
isRwkvLikeModel,
|
|
3021
|
+
isRwkvLikeAssistantMessage,
|
|
3022
|
+
isAyaLikeModel,
|
|
3023
|
+
isAyaLikeAssistantMessage,
|
|
1963
3024
|
buildOpenAIProxyCompatWarningText,
|
|
1964
3025
|
getModelIdNameTokenValues,
|
|
1965
3026
|
getAssistantMessageModelTokenValues,
|
|
@@ -1973,6 +3034,7 @@ export const __internals_for_tests = {
|
|
|
1973
3034
|
isCompatCheckApplicable,
|
|
1974
3035
|
buildDoctorDiagnosis,
|
|
1975
3036
|
buildCompatDiagnosis,
|
|
3037
|
+
describeRouterChannelDiagnostics,
|
|
1976
3038
|
// Cache stats helpers (module-level, usable from verify script)
|
|
1977
3039
|
addUsageToCacheStats,
|
|
1978
3040
|
formatCacheStats,
|