llm-simple-router 0.10.13 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/model-directory.json +1 -0
- package/config/recommended-providers.json +6 -5
- package/dist/admin/groups.js +25 -0
- package/dist/admin/monitor.js +15 -6
- package/dist/admin/providers.js +22 -3
- package/dist/admin/recommended.js +13 -1
- package/dist/config/model-context.d.ts +12 -0
- package/dist/config/model-context.js +96 -2
- package/dist/config/model-directory.json +1 -0
- package/dist/config/recommended-providers.json +355 -0
- package/dist/config/recommended-retry-rules.json +12 -0
- package/dist/config/recommended.d.ts +2 -0
- package/dist/config/version.json +1 -0
- package/dist/core/monitor/request-tracker.d.ts +1 -1
- package/dist/core/monitor/request-tracker.js +2 -1
- package/dist/core/monitor/types.d.ts +1 -0
- package/dist/core/types.d.ts +1 -0
- package/dist/index.js +17 -1
- package/dist/metrics/metrics-extractor.js +3 -0
- package/dist/proxy/handler/create-proxy-handler.js +15 -0
- package/dist/proxy/handler/failover-loop.js +88 -63
- package/dist/proxy/pipeline-snapshot.d.ts +9 -1
- package/dist/proxy/proxy-logging.js +2 -2
- package/dist/proxy/routing/modality-redirect.d.ts +22 -0
- package/dist/proxy/routing/modality-redirect.js +252 -0
- package/dist/proxy/routing/overflow.d.ts +11 -0
- package/dist/proxy/routing/overflow.js +24 -0
- package/dist/proxy/transform/plugin-registry.js +1 -1
- package/dist/proxy/transform/stream-oa2ant.js +3 -0
- package/dist/proxy/transport/http.js +6 -0
- package/dist/proxy/transport/proxy-agent.js +20 -8
- package/dist/proxy/transport/stream.js +8 -1
- package/dist/proxy/transport/transport-fn.js +12 -0
- package/frontend-dist/assets/CardContent-yiYaxAko.js +1 -0
- package/frontend-dist/assets/CardTitle-CzqSlrtn.js +1 -0
- package/frontend-dist/assets/Checkbox-2voapLgE.js +1 -0
- package/frontend-dist/assets/CollapsibleContent-DHkVSWt2.js +1 -0
- package/frontend-dist/assets/CollapsibleTrigger-DbVCeTdD.js +1 -0
- package/frontend-dist/assets/Dashboard-xT1CEwOR.js +3 -0
- package/frontend-dist/assets/{Input-Ey_q_5_r.js → Input-DEfnoFS3.js} +1 -1
- package/frontend-dist/assets/Label-CjUuzGNQ.js +1 -0
- package/frontend-dist/assets/Login-CJDEk-tO.js +1 -0
- package/frontend-dist/assets/Logs-CzdPCIYV.js +1 -0
- package/frontend-dist/assets/MappingEntryEditor-GejG6FYv.js +1 -0
- package/frontend-dist/assets/ModelCard-DdQtySPM.js +1 -0
- package/frontend-dist/assets/ModelMappings-DffY7Izx.js +1 -0
- package/frontend-dist/assets/Monitor-y6d6LInm.js +1 -0
- package/frontend-dist/assets/Providers-Cb-CB1yf.js +1 -0
- package/frontend-dist/assets/ProxyEnhancement-CywRxDop.js +1 -0
- package/frontend-dist/assets/QuickSetup-Nj_ysAdc.js +1 -0
- package/frontend-dist/assets/RetryRules-DRdeZUPt.js +1 -0
- package/frontend-dist/assets/RouterKeys-BHOhDgXZ.js +1 -0
- package/frontend-dist/assets/RovingFocusItem-NxZWBEpr.js +1 -0
- package/frontend-dist/assets/Schedules-C4jRCbnI.js +1 -0
- package/frontend-dist/assets/Settings-Cn0qnqMY.js +6 -0
- package/frontend-dist/assets/Setup-BjN6KU0y.js +1 -0
- package/frontend-dist/assets/Switch-bk3eQSZ_.js +1 -0
- package/frontend-dist/assets/TooltipTrigger-DmYucHtv.js +1 -0
- package/frontend-dist/assets/TransformRulesForm-Bo-zFABv.js +1 -0
- package/frontend-dist/assets/UnifiedRequestDialog-5-vBmVMH.js +3 -0
- package/frontend-dist/assets/VisuallyHiddenInput-BflIWQCW.js +1 -0
- package/frontend-dist/assets/{button-C7HO6Dyb.js → button-DZwflOXO.js} +2 -2
- package/frontend-dist/assets/{copy-DxwFlq2A.js → copy-zQQvOqam.js} +1 -1
- package/frontend-dist/assets/dialog-C7v6Gaak.js +1 -0
- package/frontend-dist/assets/index-ClQS69Or.css +1 -0
- package/frontend-dist/assets/index-PMAQyWJb.js +3 -0
- package/frontend-dist/assets/mappings-BpkOqnsu.js +1 -0
- package/frontend-dist/assets/mappings-D7Qy46v_.js +1 -0
- package/frontend-dist/assets/{providers-Bcea72GK.js → providers-BI5dO-j0.js} +1 -1
- package/frontend-dist/assets/{providers-DNICB6Kg.js → providers-BzxbZ85B.js} +1 -1
- package/frontend-dist/assets/{trash-2-D2SrfECO.js → trash-2-CrcHK-G_.js} +1 -1
- package/frontend-dist/assets/{useClipboard-CttzUerj.js → useClipboard-B4K3eogm.js} +1 -1
- package/frontend-dist/assets/{useLogRetention-Dv0deAan.js → useLogRetention-BNbFXLBO.js} +1 -1
- package/frontend-dist/index.html +3 -3
- package/package.json +2 -2
- package/frontend-dist/assets/CardContent-DfVo-N85.js +0 -1
- package/frontend-dist/assets/CardTitle-npwJSAlz.js +0 -1
- package/frontend-dist/assets/Checkbox-Ddnzkh_i.js +0 -1
- package/frontend-dist/assets/CollapsibleContent-BTVazeoQ.js +0 -1
- package/frontend-dist/assets/CollapsibleTrigger-DCQeyHrt.js +0 -1
- package/frontend-dist/assets/Dashboard-DjnImtwH.js +0 -3
- package/frontend-dist/assets/Label-Dw5HcYsL.js +0 -1
- package/frontend-dist/assets/Login-CSrfhhm9.js +0 -1
- package/frontend-dist/assets/Logs-HR1DZs1M.js +0 -1
- package/frontend-dist/assets/MappingEntryEditor-C9pgNL0Q.js +0 -1
- package/frontend-dist/assets/ModelCard-IQMwlnCm.js +0 -1
- package/frontend-dist/assets/ModelMappings-kRx-GL_7.js +0 -1
- package/frontend-dist/assets/Monitor-y1ofDNK7.js +0 -1
- package/frontend-dist/assets/Providers-C1bP2PoM.js +0 -1
- package/frontend-dist/assets/ProxyEnhancement-DQx4coxn.js +0 -1
- package/frontend-dist/assets/QuickSetup-DHX9-CnO.js +0 -1
- package/frontend-dist/assets/RetryRules-zdJE0bFL.js +0 -1
- package/frontend-dist/assets/RouterKeys-CD0rI4kv.js +0 -1
- package/frontend-dist/assets/RovingFocusItem-CFmjbm49.js +0 -1
- package/frontend-dist/assets/Schedules-BUm3cC6w.js +0 -1
- package/frontend-dist/assets/Settings-D7z5IRkY.js +0 -6
- package/frontend-dist/assets/Setup-i9inmgjB.js +0 -1
- package/frontend-dist/assets/Switch-C9DeYAnK.js +0 -1
- package/frontend-dist/assets/TooltipTrigger-Dr6kqGSH.js +0 -1
- package/frontend-dist/assets/TransformRulesForm-CyXh4jHa.js +0 -1
- package/frontend-dist/assets/UnifiedRequestDialog-6ZRBfjko.js +0 -3
- package/frontend-dist/assets/VisuallyHiddenInput-CwE9jREu.js +0 -1
- package/frontend-dist/assets/constants-yM0YwP2s.js +0 -1
- package/frontend-dist/assets/dialog-BWB1aLcT.js +0 -1
- package/frontend-dist/assets/index-DeeDpH_W.css +0 -1
- package/frontend-dist/assets/index-itL9--Q_.js +0 -3
- package/frontend-dist/assets/mappings-6w7mc8YK.js +0 -1
- package/frontend-dist/assets/mappings-C1fK_e70.js +0 -1
- /package/frontend-dist/assets/{common-D96jEq-h.js → common-Bvxev9Ev.js} +0 -0
- /package/frontend-dist/assets/{common-BpwAv-lj.js → common-Cn0QcrnY.js} +0 -0
- /package/frontend-dist/assets/{dashboard-DjgmcUG5.js → dashboard-Cejt1wVQ.js} +0 -0
- /package/frontend-dist/assets/{dashboard-COCyp2p_.js → dashboard-DLTOR0fN.js} +0 -0
- /package/frontend-dist/assets/{login-BTNL5nN5.js → login-BkOvA7gg.js} +0 -0
- /package/frontend-dist/assets/{login-Sef1i0de.js → login-DWRFsEu3.js} +0 -0
- /package/frontend-dist/assets/{logs-CBRLywRw.js → logs-CA8USnXG.js} +0 -0
- /package/frontend-dist/assets/{logs-B-6cgV12.js → logs-QPt2Ybwy.js} +0 -0
- /package/frontend-dist/assets/{monitor-CaDMr_KG.js → monitor-CcPZdXUM.js} +0 -0
- /package/frontend-dist/assets/{monitor-C9j7ppMj.js → monitor-D-0KOVTC.js} +0 -0
- /package/frontend-dist/assets/{proxyEnhancement-DpIVSv-g.js → proxyEnhancement-B6vdsMeK.js} +0 -0
- /package/frontend-dist/assets/{proxyEnhancement-rSM6KhbN.js → proxyEnhancement-UuPFs4M3.js} +0 -0
- /package/frontend-dist/assets/{quickSetup-CCxaqY3U.js → quickSetup-CSpWmAy-.js} +0 -0
- /package/frontend-dist/assets/{quickSetup-DgDENHE4.js → quickSetup-D8ruRelW.js} +0 -0
- /package/frontend-dist/assets/{requestDetail-DZ55ph4h.js → requestDetail-8Sp9tWNb.js} +0 -0
- /package/frontend-dist/assets/{requestDetail-3KCtYe1N.js → requestDetail-CcHzzKYr.js} +0 -0
- /package/frontend-dist/assets/{retryRules-BXrRL52J.js → retryRules-C--dd-y8.js} +0 -0
- /package/frontend-dist/assets/{retryRules-CToGC6cR.js → retryRules-CzLnagW_.js} +0 -0
- /package/frontend-dist/assets/{routerKeys-DbTg4OP1.js → routerKeys-CB2l_V7w.js} +0 -0
- /package/frontend-dist/assets/{routerKeys-Be7OZCn0.js → routerKeys-p_ioAckE.js} +0 -0
- /package/frontend-dist/assets/{schedules-Bd66RL7P.js → schedules-Cz_-Wfa_.js} +0 -0
- /package/frontend-dist/assets/{schedules-HDwMuDgX.js → schedules-DTgk603B.js} +0 -0
- /package/frontend-dist/assets/{settings-DCS-RTKl.js → settings-B5Mq1HN8.js} +0 -0
- /package/frontend-dist/assets/{settings-C4zZB9GY.js → settings-j3dzVXzy.js} +0 -0
- /package/frontend-dist/assets/{setup-CrjgRrYP.js → setup-DaeEG9ll.js} +0 -0
- /package/frontend-dist/assets/{setup-DmgXvgkY.js → setup-Dryg-9wL.js} +0 -0
- /package/frontend-dist/assets/{sidebar-3c8D7l60.js → sidebar-BQWT-QZb.js} +0 -0
- /package/frontend-dist/assets/{sidebar-vj4kQ6t1.js → sidebar-DYwEKca3.js} +0 -0
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"group": "DeepSeek",
|
|
4
|
+
"presets": [
|
|
5
|
+
{
|
|
6
|
+
"plan": "Anthropic",
|
|
7
|
+
"presetName": "deepseek",
|
|
8
|
+
"apiType": "anthropic",
|
|
9
|
+
"baseUrl": "https://api.deepseek.com/anthropic",
|
|
10
|
+
"modelsEndpoint": "/v1/models",
|
|
11
|
+
"models": [
|
|
12
|
+
"deepseek-v4-flash",
|
|
13
|
+
"deepseek-v4-pro"
|
|
14
|
+
]
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"plan": "OpenAI",
|
|
18
|
+
"presetName": "deepseek-openai",
|
|
19
|
+
"apiType": "openai",
|
|
20
|
+
"baseUrl": "https://api.deepseek.com",
|
|
21
|
+
"modelsEndpoint": "/v1/models",
|
|
22
|
+
"models": [
|
|
23
|
+
"deepseek-v4-flash",
|
|
24
|
+
"deepseek-v4-pro"
|
|
25
|
+
]
|
|
26
|
+
}
|
|
27
|
+
]
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"group": "百度千帆",
|
|
31
|
+
"presets": [
|
|
32
|
+
{
|
|
33
|
+
"plan": "API",
|
|
34
|
+
"presetName": "qianfan",
|
|
35
|
+
"apiType": "openai",
|
|
36
|
+
"baseUrl": "https://qianfan.baidubce.com/v2",
|
|
37
|
+
"modelsEndpoint": "/v1/models",
|
|
38
|
+
"models": [
|
|
39
|
+
"ernie-4.0-8k",
|
|
40
|
+
"ernie-4.0-turbo-8k",
|
|
41
|
+
"ernie-3.5-8k",
|
|
42
|
+
"ernie-speed-8k",
|
|
43
|
+
"ernie-lite-8k",
|
|
44
|
+
"ernie-x1-32k-preview",
|
|
45
|
+
"deepseek-v3",
|
|
46
|
+
"deepseek-r1"
|
|
47
|
+
],
|
|
48
|
+
"upstreamPath": "/chat/completions"
|
|
49
|
+
}
|
|
50
|
+
]
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"group": "科大讯飞",
|
|
54
|
+
"presets": [
|
|
55
|
+
{
|
|
56
|
+
"plan": "API",
|
|
57
|
+
"presetName": "iflytek-spark",
|
|
58
|
+
"apiType": "openai",
|
|
59
|
+
"baseUrl": "https://spark-api-open.xf-yun.com",
|
|
60
|
+
"modelsEndpoint": "/v1/models",
|
|
61
|
+
"models": [
|
|
62
|
+
"4.0Ultra",
|
|
63
|
+
"generalv3.5",
|
|
64
|
+
"max-32k",
|
|
65
|
+
"generalv3",
|
|
66
|
+
"pro-128k",
|
|
67
|
+
"lite"
|
|
68
|
+
]
|
|
69
|
+
}
|
|
70
|
+
]
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
"group": "硅基流动",
|
|
74
|
+
"presets": [
|
|
75
|
+
{
|
|
76
|
+
"plan": "API",
|
|
77
|
+
"presetName": "siliconflow",
|
|
78
|
+
"apiType": "openai",
|
|
79
|
+
"baseUrl": "https://api.siliconflow.cn",
|
|
80
|
+
"modelsEndpoint": "/v1/models",
|
|
81
|
+
"models": [
|
|
82
|
+
"deepseek-ai/DeepSeek-V3.2-Exp",
|
|
83
|
+
"deepseek-ai/DeepSeek-R1",
|
|
84
|
+
"Qwen/Qwen3-8B",
|
|
85
|
+
"Qwen/Qwen2.5-72B-Instruct",
|
|
86
|
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
|
87
|
+
"moonshotai/Kimi-K2-Instruct",
|
|
88
|
+
"moonshotai/Kimi-K2.5"
|
|
89
|
+
]
|
|
90
|
+
}
|
|
91
|
+
]
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
"group": "智谱",
|
|
95
|
+
"presets": [
|
|
96
|
+
{
|
|
97
|
+
"plan": "Coding Plan",
|
|
98
|
+
"presetName": "zhipu-coding-plan",
|
|
99
|
+
"apiType": "openai",
|
|
100
|
+
"baseUrl": "https://open.bigmodel.cn",
|
|
101
|
+
"upstreamPath": "/api/coding/paas/v4/chat/completions",
|
|
102
|
+
"modelsEndpoint": "/v1/models",
|
|
103
|
+
"models": [
|
|
104
|
+
"glm-5.1",
|
|
105
|
+
"glm-5",
|
|
106
|
+
"glm-5-turbo",
|
|
107
|
+
"glm-4.7",
|
|
108
|
+
"glm-4.5-air"
|
|
109
|
+
]
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"plan": "API",
|
|
113
|
+
"presetName": "zhipu",
|
|
114
|
+
"apiType": "openai",
|
|
115
|
+
"baseUrl": "https://open.bigmodel.cn/api/paas/v4",
|
|
116
|
+
"modelsEndpoint": "/models",
|
|
117
|
+
"models": [
|
|
118
|
+
"glm-5.1",
|
|
119
|
+
"glm-5",
|
|
120
|
+
"glm-5-turbo",
|
|
121
|
+
"glm-4.7",
|
|
122
|
+
"glm-4.7-flash",
|
|
123
|
+
"glm-4.6"
|
|
124
|
+
]
|
|
125
|
+
}
|
|
126
|
+
]
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
"group": "月之暗面",
|
|
130
|
+
"presets": [
|
|
131
|
+
{
|
|
132
|
+
"plan": "Coding Plan",
|
|
133
|
+
"presetName": "kimi-coding-plan",
|
|
134
|
+
"apiType": "anthropic",
|
|
135
|
+
"baseUrl": "https://api.kimi.com/coding",
|
|
136
|
+
"modelsEndpoint": "/v1/models",
|
|
137
|
+
"models": [
|
|
138
|
+
"kimi-for-coding",
|
|
139
|
+
"kimi-k2.5"
|
|
140
|
+
]
|
|
141
|
+
},
|
|
142
|
+
{
|
|
143
|
+
"plan": "API",
|
|
144
|
+
"presetName": "kimi",
|
|
145
|
+
"apiType": "openai",
|
|
146
|
+
"baseUrl": "https://api.moonshot.cn",
|
|
147
|
+
"modelsEndpoint": "/v1/models",
|
|
148
|
+
"models": [
|
|
149
|
+
"kimi-k2.6",
|
|
150
|
+
"kimi-k2.5",
|
|
151
|
+
"kimi-k2-turbo-preview",
|
|
152
|
+
"kimi-k2-thinking",
|
|
153
|
+
"moonshot-v1-128k"
|
|
154
|
+
]
|
|
155
|
+
}
|
|
156
|
+
]
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
"group": "Minimax",
|
|
160
|
+
"presets": [
|
|
161
|
+
{
|
|
162
|
+
"plan": "Token Plan",
|
|
163
|
+
"presetName": "minimax-token-plan",
|
|
164
|
+
"apiType": "anthropic",
|
|
165
|
+
"baseUrl": "https://api.minimaxi.com/anthropic",
|
|
166
|
+
"modelsEndpoint": "/v1/models",
|
|
167
|
+
"models": [
|
|
168
|
+
"MiniMax-M2.7"
|
|
169
|
+
]
|
|
170
|
+
},
|
|
171
|
+
{
|
|
172
|
+
"plan": "API",
|
|
173
|
+
"presetName": "minimax",
|
|
174
|
+
"apiType": "openai",
|
|
175
|
+
"baseUrl": "https://api.minimax.chat",
|
|
176
|
+
"modelsEndpoint": "/v1/models",
|
|
177
|
+
"models": [
|
|
178
|
+
"MiniMax-M2.7",
|
|
179
|
+
"MiniMax-M2.7-highspeed",
|
|
180
|
+
"MiniMax-M2.5",
|
|
181
|
+
"MiniMax-M2.5-highspeed",
|
|
182
|
+
"MiniMax-M2.1",
|
|
183
|
+
"MiniMax-M2"
|
|
184
|
+
]
|
|
185
|
+
}
|
|
186
|
+
]
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
"group": "火山引擎",
|
|
190
|
+
"presets": [
|
|
191
|
+
{
|
|
192
|
+
"plan": "Coding Plan",
|
|
193
|
+
"presetName": "volcengine-coding-plan",
|
|
194
|
+
"apiType": "anthropic",
|
|
195
|
+
"baseUrl": "https://ark.cn-beijing.volces.com/api/coding",
|
|
196
|
+
"modelsEndpoint": "/v1/models",
|
|
197
|
+
"models": [
|
|
198
|
+
"ark-code-latest",
|
|
199
|
+
"doubao-seed-2.0-code",
|
|
200
|
+
"kimi-k2.5",
|
|
201
|
+
"glm-4.7",
|
|
202
|
+
"deepseek-v3.2"
|
|
203
|
+
]
|
|
204
|
+
},
|
|
205
|
+
{
|
|
206
|
+
"plan": "API",
|
|
207
|
+
"presetName": "volcengine",
|
|
208
|
+
"apiType": "openai",
|
|
209
|
+
"baseUrl": "https://ark.cn-beijing.volces.com/api/v3",
|
|
210
|
+
"modelsEndpoint": "/models",
|
|
211
|
+
"models": [
|
|
212
|
+
"doubao-seed-2-0-pro-260215",
|
|
213
|
+
"doubao-seed-1-8-251228",
|
|
214
|
+
"doubao-seed-code-preview-251028"
|
|
215
|
+
]
|
|
216
|
+
}
|
|
217
|
+
]
|
|
218
|
+
},
|
|
219
|
+
{
|
|
220
|
+
"group": "阿里云",
|
|
221
|
+
"presets": [
|
|
222
|
+
{
|
|
223
|
+
"plan": "Coding Plan",
|
|
224
|
+
"presetName": "aliyun-coding-plan",
|
|
225
|
+
"apiType": "anthropic",
|
|
226
|
+
"baseUrl": "https://coding.dashscope.aliyuncs.com/apps/anthropic",
|
|
227
|
+
"models": [
|
|
228
|
+
"qwen3.6-plus",
|
|
229
|
+
"qwen3-coder-next",
|
|
230
|
+
"qwen3-coder-plus",
|
|
231
|
+
"kimi-k2.5",
|
|
232
|
+
"glm-5",
|
|
233
|
+
"MiniMax-M2.5"
|
|
234
|
+
]
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
"plan": "API",
|
|
238
|
+
"presetName": "aliyun",
|
|
239
|
+
"apiType": "openai",
|
|
240
|
+
"baseUrl": "https://dashscope.aliyuncs.com/compatible-mode",
|
|
241
|
+
"modelsEndpoint": "/v1/models",
|
|
242
|
+
"models": [
|
|
243
|
+
"qwen3.6-plus",
|
|
244
|
+
"qwen3.5-plus",
|
|
245
|
+
"qwen3-max",
|
|
246
|
+
"qwen3.5-flash",
|
|
247
|
+
"qwen3-coder-plus",
|
|
248
|
+
"qwen3-coder-next"
|
|
249
|
+
]
|
|
250
|
+
}
|
|
251
|
+
]
|
|
252
|
+
},
|
|
253
|
+
{
|
|
254
|
+
"group": "腾讯云",
|
|
255
|
+
"presets": [
|
|
256
|
+
{
|
|
257
|
+
"plan": "Coding Plan",
|
|
258
|
+
"presetName": "tencent-coding-plan",
|
|
259
|
+
"apiType": "anthropic",
|
|
260
|
+
"baseUrl": "https://api.lkeap.cloud.tencent.com/coding/anthropic",
|
|
261
|
+
"modelsEndpoint": "/v1/models",
|
|
262
|
+
"models": [
|
|
263
|
+
"tc-code-latest",
|
|
264
|
+
"hunyuan-2.0-instruct",
|
|
265
|
+
"hunyuan-2.0-thinking",
|
|
266
|
+
"hunyuan-turbos",
|
|
267
|
+
"hunyuan-t1",
|
|
268
|
+
"glm-5",
|
|
269
|
+
"kimi-k2.5"
|
|
270
|
+
]
|
|
271
|
+
},
|
|
272
|
+
{
|
|
273
|
+
"plan": "API",
|
|
274
|
+
"presetName": "tencent",
|
|
275
|
+
"apiType": "openai",
|
|
276
|
+
"baseUrl": "https://api.hunyuan.cloud.tencent.com",
|
|
277
|
+
"modelsEndpoint": "/v1/models",
|
|
278
|
+
"models": [
|
|
279
|
+
"hunyuan-2.0-thinking",
|
|
280
|
+
"hunyuan-2.0-instruct",
|
|
281
|
+
"hunyuan-t1-latest",
|
|
282
|
+
"hunyuan-a13b",
|
|
283
|
+
"hunyuan-turbos-latest"
|
|
284
|
+
]
|
|
285
|
+
}
|
|
286
|
+
]
|
|
287
|
+
},
|
|
288
|
+
{
|
|
289
|
+
"group": "OpenCode",
|
|
290
|
+
"presets": [
|
|
291
|
+
{
|
|
292
|
+
"plan": "Go OpenAI",
|
|
293
|
+
"presetName": "opencode-go-openai",
|
|
294
|
+
"apiType": "openai",
|
|
295
|
+
"baseUrl": "https://opencode.ai/zen/go/v1/chat/completions",
|
|
296
|
+
"modelsEndpoint": "/models",
|
|
297
|
+
"models": [
|
|
298
|
+
"glm-5.1",
|
|
299
|
+
"glm-5",
|
|
300
|
+
"kimi-k2.5",
|
|
301
|
+
"kimi-k2.6",
|
|
302
|
+
"deepseek-v4-pro",
|
|
303
|
+
"deepseek-v4-flash",
|
|
304
|
+
"mimo-v2-pro",
|
|
305
|
+
"mimo-v2-omni",
|
|
306
|
+
"mimo-v2.5-pro",
|
|
307
|
+
"mimo-v2.5",
|
|
308
|
+
"qwen3.6-plus",
|
|
309
|
+
"qwen3.5-plus"
|
|
310
|
+
]
|
|
311
|
+
},
|
|
312
|
+
{
|
|
313
|
+
"plan": "Go Anthropic",
|
|
314
|
+
"presetName": "opencode-go-anthropic",
|
|
315
|
+
"apiType": "anthropic",
|
|
316
|
+
"baseUrl": "https://opencode.ai/zen/go/v1/messages",
|
|
317
|
+
"models": [
|
|
318
|
+
"minimax-m2.7",
|
|
319
|
+
"minimax-m2.5"
|
|
320
|
+
]
|
|
321
|
+
}
|
|
322
|
+
]
|
|
323
|
+
},
|
|
324
|
+
{
|
|
325
|
+
"group": "阶跃星辰",
|
|
326
|
+
"presets": [
|
|
327
|
+
{
|
|
328
|
+
"plan": "Step Plan",
|
|
329
|
+
"presetName": "stepfun-step-plan",
|
|
330
|
+
"apiType": "anthropic",
|
|
331
|
+
"baseUrl": "https://api.stepfun.com/step_plan",
|
|
332
|
+
"modelsEndpoint": "/v1/models",
|
|
333
|
+
"models": [
|
|
334
|
+
"step-3.5-flash-2603",
|
|
335
|
+
"step-3.5-flash"
|
|
336
|
+
]
|
|
337
|
+
},
|
|
338
|
+
{
|
|
339
|
+
"plan": "API",
|
|
340
|
+
"presetName": "stepfun",
|
|
341
|
+
"apiType": "openai",
|
|
342
|
+
"baseUrl": "https://api.stepfun.com",
|
|
343
|
+
"modelsEndpoint": "/v1/models",
|
|
344
|
+
"models": [
|
|
345
|
+
"step-3.5-flash",
|
|
346
|
+
"step-3",
|
|
347
|
+
"step-2-mini",
|
|
348
|
+
"step-2-16k",
|
|
349
|
+
"step-1-8k",
|
|
350
|
+
"step-1-32k"
|
|
351
|
+
]
|
|
352
|
+
}
|
|
353
|
+
]
|
|
354
|
+
}
|
|
355
|
+
]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
[
|
|
2
|
+
{ "name": "429 Too Many Requests", "status_code": 429, "body_pattern": ".*", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": [] },
|
|
3
|
+
{ "name": "503 Service Unavailable", "status_code": 503, "body_pattern": ".*", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": [] },
|
|
4
|
+
{ "name": "ZAI 网络错误 (code 1234)", "status_code": 400, "body_pattern": "\"type\"\\s*:\\s*\"error\".*\"code\"\\s*:\\s*\"1234\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
|
|
5
|
+
{ "name": "ZAI 临时不可用", "status_code": 400, "body_pattern": "\"type\"\\s*:\\s*\"error\".*请稍后重试", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
|
|
6
|
+
{ "name": "ZAI 操作失败 (code 500)", "status_code": 400, "body_pattern": "\"type\"\\s*:\\s*\"error\".*\"code\"\\s*:\\s*\"500\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
|
|
7
|
+
{ "name": "ZAI 速率限制 (HTTP 200, code 1302)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1302\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
|
|
8
|
+
{ "name": "ZAI SSE 错误 (HTTP 200, code 500)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"500\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
|
|
9
|
+
{ "name": "ZAI SSE 错误 (HTTP 200, code 1234)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1234\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
|
|
10
|
+
{ "name": "ZAI 模型过载 (HTTP 200, code 1305)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1305\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
|
|
11
|
+
{ "name": "KIMI 401 认证错误", "status_code": 401, "body_pattern": ".*authentication_error.*", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 3, "max_delay_ms": 60000, "providers": ["月之暗面"] }
|
|
12
|
+
]
|
|
@@ -7,6 +7,8 @@ export interface ProviderPreset {
|
|
|
7
7
|
/** 上游模型列表端点路径,如 /v1/models 或 /models;拼接在 baseUrl 后 */
|
|
8
8
|
modelsEndpoint?: string;
|
|
9
9
|
models: string[];
|
|
10
|
+
/** 由 API handler 补充:模型名 → capabilities 映射 */
|
|
11
|
+
modelCapabilities?: Record<string, string[]>;
|
|
10
12
|
}
|
|
11
13
|
export interface ProviderGroup {
|
|
12
14
|
group: string;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"providers":1,"retryRules":1}
|
|
@@ -53,7 +53,7 @@ export declare class RequestTracker {
|
|
|
53
53
|
attempts?: AttemptSnapshot[];
|
|
54
54
|
}): void;
|
|
55
55
|
/** Update stream metrics for a completed request (e.g., after cache estimation) */
|
|
56
|
-
updateCompletedMetrics(id: string, cacheReadTokens: number): void;
|
|
56
|
+
updateCompletedMetrics(id: string, cacheReadTokens: number, cacheReadTokensEstimated?: boolean): void;
|
|
57
57
|
getActive(): ActiveRequest[];
|
|
58
58
|
getRecent(limit?: number): ActiveRequest[];
|
|
59
59
|
get(id: string): ActiveRequest | undefined;
|
|
@@ -144,13 +144,14 @@ export class RequestTracker {
|
|
|
144
144
|
this.broadcast("request_complete", completed);
|
|
145
145
|
}
|
|
146
146
|
/** Update stream metrics for a completed request (e.g., after cache estimation) */
|
|
147
|
-
updateCompletedMetrics(id, cacheReadTokens) {
|
|
147
|
+
updateCompletedMetrics(id, cacheReadTokens, cacheReadTokensEstimated) {
|
|
148
148
|
const req = this.recentCompleted.find(r => r.id === id);
|
|
149
149
|
if (!req || !req.streamMetrics)
|
|
150
150
|
return;
|
|
151
151
|
req.streamMetrics = {
|
|
152
152
|
...req.streamMetrics,
|
|
153
153
|
cacheReadTokens,
|
|
154
|
+
cacheReadTokensEstimated: cacheReadTokensEstimated ? 1 : 0,
|
|
154
155
|
};
|
|
155
156
|
this.broadcast("request_complete", req);
|
|
156
157
|
}
|
|
@@ -51,6 +51,7 @@ export interface StreamMetricsSnapshot {
|
|
|
51
51
|
inputTokens: number | null;
|
|
52
52
|
outputTokens: number | null;
|
|
53
53
|
cacheReadTokens: number | null;
|
|
54
|
+
cacheReadTokensEstimated?: number;
|
|
54
55
|
ttftMs: number | null;
|
|
55
56
|
tokensPerSecond: number | null;
|
|
56
57
|
stopReason: string | null;
|
package/dist/core/types.d.ts
CHANGED
|
@@ -37,6 +37,7 @@ export interface MetricsResult {
|
|
|
37
37
|
output_tokens: number | null;
|
|
38
38
|
cache_creation_tokens: number | null;
|
|
39
39
|
cache_read_tokens: number | null;
|
|
40
|
+
cache_read_tokens_estimated?: number;
|
|
40
41
|
ttft_ms: number | null;
|
|
41
42
|
/** T6 - T0: proxy end-to-end streaming duration */
|
|
42
43
|
total_duration_ms: number | null;
|
package/dist/index.js
CHANGED
|
@@ -6,6 +6,7 @@ import { randomUUID } from "crypto";
|
|
|
6
6
|
import Fastify from "fastify";
|
|
7
7
|
import { insertRequestLog } from "./db/logs.js";
|
|
8
8
|
import { HTTP_NOT_FOUND, HTTP_INTERNAL_ERROR, getProxyApiType } from "./core/constants.js";
|
|
9
|
+
import { loadModelDirectory } from "./config/model-context.js";
|
|
9
10
|
import { API_CODE, apiError, isAdminApiResponse, statusToApiCode } from "./admin/api-response.js";
|
|
10
11
|
const PROVIDER_DEFAULT_QUEUE_TIMEOUT_MS = 5000;
|
|
11
12
|
const PROVIDER_DEFAULT_MAX_QUEUE_SIZE = 100;
|
|
@@ -82,6 +83,8 @@ export async function buildApp(options) {
|
|
|
82
83
|
else {
|
|
83
84
|
db = initDatabase(config.DB_PATH);
|
|
84
85
|
}
|
|
86
|
+
// 加载外部模型目录(ai-model-directory),fallback 到硬编码白名单
|
|
87
|
+
loadModelDirectory();
|
|
85
88
|
const isDev = process.env.NODE_ENV !== "production";
|
|
86
89
|
const MAX_BODY_SIZE_MB = 50;
|
|
87
90
|
const KB = 1024;
|
|
@@ -120,8 +123,21 @@ export async function buildApp(options) {
|
|
|
120
123
|
return new Error(message);
|
|
121
124
|
});
|
|
122
125
|
// 记录请求到达时间,供全局错误处理计算延迟
|
|
123
|
-
app.addHook("onRequest", (request,
|
|
126
|
+
app.addHook("onRequest", (request, reply, done) => {
|
|
124
127
|
request.receivedAt = Date.now();
|
|
128
|
+
// 全局 EPIPE 防护:ServerResponse 的 write 异步完成失败时,
|
|
129
|
+
// 内部 socketErrorListener → response.destroy(err) → response.emit('error')。
|
|
130
|
+
// 若无 listener 则该 error 成为 uncaught exception。
|
|
131
|
+
// 代理路由在 create-proxy-handler.ts 中已有额外监听,此处覆盖所有路由。
|
|
132
|
+
reply.raw.on("error", (err) => {
|
|
133
|
+
const code = err.code;
|
|
134
|
+
if (code === 'EPIPE') {
|
|
135
|
+
request.log.debug({ err }, "client disconnected (EPIPE)");
|
|
136
|
+
}
|
|
137
|
+
else {
|
|
138
|
+
request.log.warn({ err }, "response stream error");
|
|
139
|
+
}
|
|
140
|
+
});
|
|
125
141
|
done();
|
|
126
142
|
});
|
|
127
143
|
// 统一错误处理:代理路由保持 {error:{message}},Admin API 使用信封格式
|
|
@@ -99,6 +99,7 @@ export class MetricsExtractor {
|
|
|
99
99
|
output_tokens: this.outputTokens,
|
|
100
100
|
cache_creation_tokens: this.cacheCreationTokens,
|
|
101
101
|
cache_read_tokens: this.cacheReadTokens,
|
|
102
|
+
cache_read_tokens_estimated: this.cacheReadTokens !== null ? 0 : undefined,
|
|
102
103
|
ttft_ms: this.ttftMs,
|
|
103
104
|
total_duration_ms: totalDurationMs,
|
|
104
105
|
tokens_per_second: totalTps,
|
|
@@ -306,6 +307,7 @@ function extractOpenAINonStream(parsed) {
|
|
|
306
307
|
output_tokens: usage?.completion_tokens ?? null,
|
|
307
308
|
cache_creation_tokens: null,
|
|
308
309
|
cache_read_tokens: details?.cached_tokens ?? null,
|
|
310
|
+
cache_read_tokens_estimated: details?.cached_tokens != null ? 0 : undefined,
|
|
309
311
|
ttft_ms: null,
|
|
310
312
|
total_duration_ms: null,
|
|
311
313
|
tokens_per_second: null,
|
|
@@ -321,6 +323,7 @@ function extractAnthropicNonStream(parsed) {
|
|
|
321
323
|
output_tokens: usage?.output_tokens ?? null,
|
|
322
324
|
cache_creation_tokens: usage?.cache_creation_input_tokens ?? null,
|
|
323
325
|
cache_read_tokens: usage?.cache_read_input_tokens ?? null,
|
|
326
|
+
cache_read_tokens_estimated: usage?.cache_read_input_tokens != null ? 0 : undefined,
|
|
324
327
|
ttft_ms: null,
|
|
325
328
|
total_duration_ms: null,
|
|
326
329
|
tokens_per_second: null,
|
|
@@ -192,8 +192,23 @@ export function createProxyHandler(config) {
|
|
|
192
192
|
// Socket error handling
|
|
193
193
|
const socketErrorHandler = (err) => request.log.debug({ err }, "client socket error");
|
|
194
194
|
request.raw.socket.on("error", socketErrorHandler);
|
|
195
|
+
// reply.raw (ServerResponse) error handling
|
|
196
|
+
// Node.js 中,TCP socket write 异步完成失败时(如 EPIPE),
|
|
197
|
+
// 内部 socketErrorListener → response.destroy(err) → response.emit('error')。
|
|
198
|
+
// 若无 listener,该 error 成为 uncaught exception 导致进程退出。
|
|
199
|
+
const replyErrorHandler = (err) => {
|
|
200
|
+
const code = err.code;
|
|
201
|
+
if (code === 'EPIPE') {
|
|
202
|
+
request.log.debug({ err }, "client disconnected (EPIPE)");
|
|
203
|
+
}
|
|
204
|
+
else {
|
|
205
|
+
request.log.warn({ err }, "response stream error");
|
|
206
|
+
}
|
|
207
|
+
};
|
|
208
|
+
reply.raw.on("error", replyErrorHandler);
|
|
195
209
|
reply.raw.on("close", () => {
|
|
196
210
|
request.raw.socket.removeListener("error", socketErrorHandler);
|
|
211
|
+
reply.raw.removeListener("error", replyErrorHandler);
|
|
197
212
|
});
|
|
198
213
|
// 创建 pipeline context
|
|
199
214
|
const ctx = createPipelineContext(request, reply, apiType);
|