llm-simple-router 0.10.13 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/config/model-directory.json +1 -0
  2. package/config/recommended-providers.json +6 -5
  3. package/dist/admin/groups.js +25 -0
  4. package/dist/admin/monitor.js +15 -6
  5. package/dist/admin/providers.js +22 -3
  6. package/dist/admin/recommended.js +13 -1
  7. package/dist/config/model-context.d.ts +12 -0
  8. package/dist/config/model-context.js +96 -2
  9. package/dist/config/model-directory.json +1 -0
  10. package/dist/config/recommended-providers.json +355 -0
  11. package/dist/config/recommended-retry-rules.json +12 -0
  12. package/dist/config/recommended.d.ts +2 -0
  13. package/dist/config/version.json +1 -0
  14. package/dist/core/monitor/request-tracker.d.ts +1 -1
  15. package/dist/core/monitor/request-tracker.js +2 -1
  16. package/dist/core/monitor/types.d.ts +1 -0
  17. package/dist/core/types.d.ts +1 -0
  18. package/dist/index.js +17 -1
  19. package/dist/metrics/metrics-extractor.js +3 -0
  20. package/dist/proxy/handler/create-proxy-handler.js +15 -0
  21. package/dist/proxy/handler/failover-loop.js +88 -63
  22. package/dist/proxy/pipeline-snapshot.d.ts +9 -1
  23. package/dist/proxy/proxy-logging.js +2 -2
  24. package/dist/proxy/routing/modality-redirect.d.ts +22 -0
  25. package/dist/proxy/routing/modality-redirect.js +252 -0
  26. package/dist/proxy/routing/overflow.d.ts +11 -0
  27. package/dist/proxy/routing/overflow.js +24 -0
  28. package/dist/proxy/transform/plugin-registry.js +1 -1
  29. package/dist/proxy/transform/stream-oa2ant.js +3 -0
  30. package/dist/proxy/transport/http.js +6 -0
  31. package/dist/proxy/transport/proxy-agent.js +20 -8
  32. package/dist/proxy/transport/stream.js +8 -1
  33. package/dist/proxy/transport/transport-fn.js +12 -0
  34. package/frontend-dist/assets/CardContent-yiYaxAko.js +1 -0
  35. package/frontend-dist/assets/CardTitle-CzqSlrtn.js +1 -0
  36. package/frontend-dist/assets/Checkbox-2voapLgE.js +1 -0
  37. package/frontend-dist/assets/CollapsibleContent-DHkVSWt2.js +1 -0
  38. package/frontend-dist/assets/CollapsibleTrigger-DbVCeTdD.js +1 -0
  39. package/frontend-dist/assets/Dashboard-xT1CEwOR.js +3 -0
  40. package/frontend-dist/assets/{Input-Ey_q_5_r.js → Input-DEfnoFS3.js} +1 -1
  41. package/frontend-dist/assets/Label-CjUuzGNQ.js +1 -0
  42. package/frontend-dist/assets/Login-CJDEk-tO.js +1 -0
  43. package/frontend-dist/assets/Logs-CzdPCIYV.js +1 -0
  44. package/frontend-dist/assets/MappingEntryEditor-GejG6FYv.js +1 -0
  45. package/frontend-dist/assets/ModelCard-DdQtySPM.js +1 -0
  46. package/frontend-dist/assets/ModelMappings-DffY7Izx.js +1 -0
  47. package/frontend-dist/assets/Monitor-y6d6LInm.js +1 -0
  48. package/frontend-dist/assets/Providers-Cb-CB1yf.js +1 -0
  49. package/frontend-dist/assets/ProxyEnhancement-CywRxDop.js +1 -0
  50. package/frontend-dist/assets/QuickSetup-Nj_ysAdc.js +1 -0
  51. package/frontend-dist/assets/RetryRules-DRdeZUPt.js +1 -0
  52. package/frontend-dist/assets/RouterKeys-BHOhDgXZ.js +1 -0
  53. package/frontend-dist/assets/RovingFocusItem-NxZWBEpr.js +1 -0
  54. package/frontend-dist/assets/Schedules-C4jRCbnI.js +1 -0
  55. package/frontend-dist/assets/Settings-Cn0qnqMY.js +6 -0
  56. package/frontend-dist/assets/Setup-BjN6KU0y.js +1 -0
  57. package/frontend-dist/assets/Switch-bk3eQSZ_.js +1 -0
  58. package/frontend-dist/assets/TooltipTrigger-DmYucHtv.js +1 -0
  59. package/frontend-dist/assets/TransformRulesForm-Bo-zFABv.js +1 -0
  60. package/frontend-dist/assets/UnifiedRequestDialog-5-vBmVMH.js +3 -0
  61. package/frontend-dist/assets/VisuallyHiddenInput-BflIWQCW.js +1 -0
  62. package/frontend-dist/assets/{button-C7HO6Dyb.js → button-DZwflOXO.js} +2 -2
  63. package/frontend-dist/assets/{copy-DxwFlq2A.js → copy-zQQvOqam.js} +1 -1
  64. package/frontend-dist/assets/dialog-C7v6Gaak.js +1 -0
  65. package/frontend-dist/assets/index-ClQS69Or.css +1 -0
  66. package/frontend-dist/assets/index-PMAQyWJb.js +3 -0
  67. package/frontend-dist/assets/mappings-BpkOqnsu.js +1 -0
  68. package/frontend-dist/assets/mappings-D7Qy46v_.js +1 -0
  69. package/frontend-dist/assets/{providers-Bcea72GK.js → providers-BI5dO-j0.js} +1 -1
  70. package/frontend-dist/assets/{providers-DNICB6Kg.js → providers-BzxbZ85B.js} +1 -1
  71. package/frontend-dist/assets/{trash-2-D2SrfECO.js → trash-2-CrcHK-G_.js} +1 -1
  72. package/frontend-dist/assets/{useClipboard-CttzUerj.js → useClipboard-B4K3eogm.js} +1 -1
  73. package/frontend-dist/assets/{useLogRetention-Dv0deAan.js → useLogRetention-BNbFXLBO.js} +1 -1
  74. package/frontend-dist/index.html +3 -3
  75. package/package.json +2 -2
  76. package/frontend-dist/assets/CardContent-DfVo-N85.js +0 -1
  77. package/frontend-dist/assets/CardTitle-npwJSAlz.js +0 -1
  78. package/frontend-dist/assets/Checkbox-Ddnzkh_i.js +0 -1
  79. package/frontend-dist/assets/CollapsibleContent-BTVazeoQ.js +0 -1
  80. package/frontend-dist/assets/CollapsibleTrigger-DCQeyHrt.js +0 -1
  81. package/frontend-dist/assets/Dashboard-DjnImtwH.js +0 -3
  82. package/frontend-dist/assets/Label-Dw5HcYsL.js +0 -1
  83. package/frontend-dist/assets/Login-CSrfhhm9.js +0 -1
  84. package/frontend-dist/assets/Logs-HR1DZs1M.js +0 -1
  85. package/frontend-dist/assets/MappingEntryEditor-C9pgNL0Q.js +0 -1
  86. package/frontend-dist/assets/ModelCard-IQMwlnCm.js +0 -1
  87. package/frontend-dist/assets/ModelMappings-kRx-GL_7.js +0 -1
  88. package/frontend-dist/assets/Monitor-y1ofDNK7.js +0 -1
  89. package/frontend-dist/assets/Providers-C1bP2PoM.js +0 -1
  90. package/frontend-dist/assets/ProxyEnhancement-DQx4coxn.js +0 -1
  91. package/frontend-dist/assets/QuickSetup-DHX9-CnO.js +0 -1
  92. package/frontend-dist/assets/RetryRules-zdJE0bFL.js +0 -1
  93. package/frontend-dist/assets/RouterKeys-CD0rI4kv.js +0 -1
  94. package/frontend-dist/assets/RovingFocusItem-CFmjbm49.js +0 -1
  95. package/frontend-dist/assets/Schedules-BUm3cC6w.js +0 -1
  96. package/frontend-dist/assets/Settings-D7z5IRkY.js +0 -6
  97. package/frontend-dist/assets/Setup-i9inmgjB.js +0 -1
  98. package/frontend-dist/assets/Switch-C9DeYAnK.js +0 -1
  99. package/frontend-dist/assets/TooltipTrigger-Dr6kqGSH.js +0 -1
  100. package/frontend-dist/assets/TransformRulesForm-CyXh4jHa.js +0 -1
  101. package/frontend-dist/assets/UnifiedRequestDialog-6ZRBfjko.js +0 -3
  102. package/frontend-dist/assets/VisuallyHiddenInput-CwE9jREu.js +0 -1
  103. package/frontend-dist/assets/constants-yM0YwP2s.js +0 -1
  104. package/frontend-dist/assets/dialog-BWB1aLcT.js +0 -1
  105. package/frontend-dist/assets/index-DeeDpH_W.css +0 -1
  106. package/frontend-dist/assets/index-itL9--Q_.js +0 -3
  107. package/frontend-dist/assets/mappings-6w7mc8YK.js +0 -1
  108. package/frontend-dist/assets/mappings-C1fK_e70.js +0 -1
  109. /package/frontend-dist/assets/{common-D96jEq-h.js → common-Bvxev9Ev.js} +0 -0
  110. /package/frontend-dist/assets/{common-BpwAv-lj.js → common-Cn0QcrnY.js} +0 -0
  111. /package/frontend-dist/assets/{dashboard-DjgmcUG5.js → dashboard-Cejt1wVQ.js} +0 -0
  112. /package/frontend-dist/assets/{dashboard-COCyp2p_.js → dashboard-DLTOR0fN.js} +0 -0
  113. /package/frontend-dist/assets/{login-BTNL5nN5.js → login-BkOvA7gg.js} +0 -0
  114. /package/frontend-dist/assets/{login-Sef1i0de.js → login-DWRFsEu3.js} +0 -0
  115. /package/frontend-dist/assets/{logs-CBRLywRw.js → logs-CA8USnXG.js} +0 -0
  116. /package/frontend-dist/assets/{logs-B-6cgV12.js → logs-QPt2Ybwy.js} +0 -0
  117. /package/frontend-dist/assets/{monitor-CaDMr_KG.js → monitor-CcPZdXUM.js} +0 -0
  118. /package/frontend-dist/assets/{monitor-C9j7ppMj.js → monitor-D-0KOVTC.js} +0 -0
  119. /package/frontend-dist/assets/{proxyEnhancement-DpIVSv-g.js → proxyEnhancement-B6vdsMeK.js} +0 -0
  120. /package/frontend-dist/assets/{proxyEnhancement-rSM6KhbN.js → proxyEnhancement-UuPFs4M3.js} +0 -0
  121. /package/frontend-dist/assets/{quickSetup-CCxaqY3U.js → quickSetup-CSpWmAy-.js} +0 -0
  122. /package/frontend-dist/assets/{quickSetup-DgDENHE4.js → quickSetup-D8ruRelW.js} +0 -0
  123. /package/frontend-dist/assets/{requestDetail-DZ55ph4h.js → requestDetail-8Sp9tWNb.js} +0 -0
  124. /package/frontend-dist/assets/{requestDetail-3KCtYe1N.js → requestDetail-CcHzzKYr.js} +0 -0
  125. /package/frontend-dist/assets/{retryRules-BXrRL52J.js → retryRules-C--dd-y8.js} +0 -0
  126. /package/frontend-dist/assets/{retryRules-CToGC6cR.js → retryRules-CzLnagW_.js} +0 -0
  127. /package/frontend-dist/assets/{routerKeys-DbTg4OP1.js → routerKeys-CB2l_V7w.js} +0 -0
  128. /package/frontend-dist/assets/{routerKeys-Be7OZCn0.js → routerKeys-p_ioAckE.js} +0 -0
  129. /package/frontend-dist/assets/{schedules-Bd66RL7P.js → schedules-Cz_-Wfa_.js} +0 -0
  130. /package/frontend-dist/assets/{schedules-HDwMuDgX.js → schedules-DTgk603B.js} +0 -0
  131. /package/frontend-dist/assets/{settings-DCS-RTKl.js → settings-B5Mq1HN8.js} +0 -0
  132. /package/frontend-dist/assets/{settings-C4zZB9GY.js → settings-j3dzVXzy.js} +0 -0
  133. /package/frontend-dist/assets/{setup-CrjgRrYP.js → setup-DaeEG9ll.js} +0 -0
  134. /package/frontend-dist/assets/{setup-DmgXvgkY.js → setup-Dryg-9wL.js} +0 -0
  135. /package/frontend-dist/assets/{sidebar-3c8D7l60.js → sidebar-BQWT-QZb.js} +0 -0
  136. /package/frontend-dist/assets/{sidebar-vj4kQ6t1.js → sidebar-DYwEKca3.js} +0 -0
@@ -0,0 +1,355 @@
1
+ [
2
+ {
3
+ "group": "DeepSeek",
4
+ "presets": [
5
+ {
6
+ "plan": "Anthropic",
7
+ "presetName": "deepseek",
8
+ "apiType": "anthropic",
9
+ "baseUrl": "https://api.deepseek.com/anthropic",
10
+ "modelsEndpoint": "/v1/models",
11
+ "models": [
12
+ "deepseek-v4-flash",
13
+ "deepseek-v4-pro"
14
+ ]
15
+ },
16
+ {
17
+ "plan": "OpenAI",
18
+ "presetName": "deepseek-openai",
19
+ "apiType": "openai",
20
+ "baseUrl": "https://api.deepseek.com",
21
+ "modelsEndpoint": "/v1/models",
22
+ "models": [
23
+ "deepseek-v4-flash",
24
+ "deepseek-v4-pro"
25
+ ]
26
+ }
27
+ ]
28
+ },
29
+ {
30
+ "group": "百度千帆",
31
+ "presets": [
32
+ {
33
+ "plan": "API",
34
+ "presetName": "qianfan",
35
+ "apiType": "openai",
36
+ "baseUrl": "https://qianfan.baidubce.com/v2",
37
+ "modelsEndpoint": "/v1/models",
38
+ "models": [
39
+ "ernie-4.0-8k",
40
+ "ernie-4.0-turbo-8k",
41
+ "ernie-3.5-8k",
42
+ "ernie-speed-8k",
43
+ "ernie-lite-8k",
44
+ "ernie-x1-32k-preview",
45
+ "deepseek-v3",
46
+ "deepseek-r1"
47
+ ],
48
+ "upstreamPath": "/chat/completions"
49
+ }
50
+ ]
51
+ },
52
+ {
53
+ "group": "科大讯飞",
54
+ "presets": [
55
+ {
56
+ "plan": "API",
57
+ "presetName": "iflytek-spark",
58
+ "apiType": "openai",
59
+ "baseUrl": "https://spark-api-open.xf-yun.com",
60
+ "modelsEndpoint": "/v1/models",
61
+ "models": [
62
+ "4.0Ultra",
63
+ "generalv3.5",
64
+ "max-32k",
65
+ "generalv3",
66
+ "pro-128k",
67
+ "lite"
68
+ ]
69
+ }
70
+ ]
71
+ },
72
+ {
73
+ "group": "硅基流动",
74
+ "presets": [
75
+ {
76
+ "plan": "API",
77
+ "presetName": "siliconflow",
78
+ "apiType": "openai",
79
+ "baseUrl": "https://api.siliconflow.cn",
80
+ "modelsEndpoint": "/v1/models",
81
+ "models": [
82
+ "deepseek-ai/DeepSeek-V3.2-Exp",
83
+ "deepseek-ai/DeepSeek-R1",
84
+ "Qwen/Qwen3-8B",
85
+ "Qwen/Qwen2.5-72B-Instruct",
86
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
87
+ "moonshotai/Kimi-K2-Instruct",
88
+ "moonshotai/Kimi-K2.5"
89
+ ]
90
+ }
91
+ ]
92
+ },
93
+ {
94
+ "group": "智谱",
95
+ "presets": [
96
+ {
97
+ "plan": "Coding Plan",
98
+ "presetName": "zhipu-coding-plan",
99
+ "apiType": "openai",
100
+ "baseUrl": "https://open.bigmodel.cn",
101
+ "upstreamPath": "/api/coding/paas/v4/chat/completions",
102
+ "modelsEndpoint": "/v1/models",
103
+ "models": [
104
+ "glm-5.1",
105
+ "glm-5",
106
+ "glm-5-turbo",
107
+ "glm-4.7",
108
+ "glm-4.5-air"
109
+ ]
110
+ },
111
+ {
112
+ "plan": "API",
113
+ "presetName": "zhipu",
114
+ "apiType": "openai",
115
+ "baseUrl": "https://open.bigmodel.cn/api/paas/v4",
116
+ "modelsEndpoint": "/models",
117
+ "models": [
118
+ "glm-5.1",
119
+ "glm-5",
120
+ "glm-5-turbo",
121
+ "glm-4.7",
122
+ "glm-4.7-flash",
123
+ "glm-4.6"
124
+ ]
125
+ }
126
+ ]
127
+ },
128
+ {
129
+ "group": "月之暗面",
130
+ "presets": [
131
+ {
132
+ "plan": "Coding Plan",
133
+ "presetName": "kimi-coding-plan",
134
+ "apiType": "anthropic",
135
+ "baseUrl": "https://api.kimi.com/coding",
136
+ "modelsEndpoint": "/v1/models",
137
+ "models": [
138
+ "kimi-for-coding",
139
+ "kimi-k2.5"
140
+ ]
141
+ },
142
+ {
143
+ "plan": "API",
144
+ "presetName": "kimi",
145
+ "apiType": "openai",
146
+ "baseUrl": "https://api.moonshot.cn",
147
+ "modelsEndpoint": "/v1/models",
148
+ "models": [
149
+ "kimi-k2.6",
150
+ "kimi-k2.5",
151
+ "kimi-k2-turbo-preview",
152
+ "kimi-k2-thinking",
153
+ "moonshot-v1-128k"
154
+ ]
155
+ }
156
+ ]
157
+ },
158
+ {
159
+ "group": "Minimax",
160
+ "presets": [
161
+ {
162
+ "plan": "Token Plan",
163
+ "presetName": "minimax-token-plan",
164
+ "apiType": "anthropic",
165
+ "baseUrl": "https://api.minimaxi.com/anthropic",
166
+ "modelsEndpoint": "/v1/models",
167
+ "models": [
168
+ "MiniMax-M2.7"
169
+ ]
170
+ },
171
+ {
172
+ "plan": "API",
173
+ "presetName": "minimax",
174
+ "apiType": "openai",
175
+ "baseUrl": "https://api.minimax.chat",
176
+ "modelsEndpoint": "/v1/models",
177
+ "models": [
178
+ "MiniMax-M2.7",
179
+ "MiniMax-M2.7-highspeed",
180
+ "MiniMax-M2.5",
181
+ "MiniMax-M2.5-highspeed",
182
+ "MiniMax-M2.1",
183
+ "MiniMax-M2"
184
+ ]
185
+ }
186
+ ]
187
+ },
188
+ {
189
+ "group": "火山引擎",
190
+ "presets": [
191
+ {
192
+ "plan": "Coding Plan",
193
+ "presetName": "volcengine-coding-plan",
194
+ "apiType": "anthropic",
195
+ "baseUrl": "https://ark.cn-beijing.volces.com/api/coding",
196
+ "modelsEndpoint": "/v1/models",
197
+ "models": [
198
+ "ark-code-latest",
199
+ "doubao-seed-2.0-code",
200
+ "kimi-k2.5",
201
+ "glm-4.7",
202
+ "deepseek-v3.2"
203
+ ]
204
+ },
205
+ {
206
+ "plan": "API",
207
+ "presetName": "volcengine",
208
+ "apiType": "openai",
209
+ "baseUrl": "https://ark.cn-beijing.volces.com/api/v3",
210
+ "modelsEndpoint": "/models",
211
+ "models": [
212
+ "doubao-seed-2-0-pro-260215",
213
+ "doubao-seed-1-8-251228",
214
+ "doubao-seed-code-preview-251028"
215
+ ]
216
+ }
217
+ ]
218
+ },
219
+ {
220
+ "group": "阿里云",
221
+ "presets": [
222
+ {
223
+ "plan": "Coding Plan",
224
+ "presetName": "aliyun-coding-plan",
225
+ "apiType": "anthropic",
226
+ "baseUrl": "https://coding.dashscope.aliyuncs.com/apps/anthropic",
227
+ "models": [
228
+ "qwen3.6-plus",
229
+ "qwen3-coder-next",
230
+ "qwen3-coder-plus",
231
+ "kimi-k2.5",
232
+ "glm-5",
233
+ "MiniMax-M2.5"
234
+ ]
235
+ },
236
+ {
237
+ "plan": "API",
238
+ "presetName": "aliyun",
239
+ "apiType": "openai",
240
+ "baseUrl": "https://dashscope.aliyuncs.com/compatible-mode",
241
+ "modelsEndpoint": "/v1/models",
242
+ "models": [
243
+ "qwen3.6-plus",
244
+ "qwen3.5-plus",
245
+ "qwen3-max",
246
+ "qwen3.5-flash",
247
+ "qwen3-coder-plus",
248
+ "qwen3-coder-next"
249
+ ]
250
+ }
251
+ ]
252
+ },
253
+ {
254
+ "group": "腾讯云",
255
+ "presets": [
256
+ {
257
+ "plan": "Coding Plan",
258
+ "presetName": "tencent-coding-plan",
259
+ "apiType": "anthropic",
260
+ "baseUrl": "https://api.lkeap.cloud.tencent.com/coding/anthropic",
261
+ "modelsEndpoint": "/v1/models",
262
+ "models": [
263
+ "tc-code-latest",
264
+ "hunyuan-2.0-instruct",
265
+ "hunyuan-2.0-thinking",
266
+ "hunyuan-turbos",
267
+ "hunyuan-t1",
268
+ "glm-5",
269
+ "kimi-k2.5"
270
+ ]
271
+ },
272
+ {
273
+ "plan": "API",
274
+ "presetName": "tencent",
275
+ "apiType": "openai",
276
+ "baseUrl": "https://api.hunyuan.cloud.tencent.com",
277
+ "modelsEndpoint": "/v1/models",
278
+ "models": [
279
+ "hunyuan-2.0-thinking",
280
+ "hunyuan-2.0-instruct",
281
+ "hunyuan-t1-latest",
282
+ "hunyuan-a13b",
283
+ "hunyuan-turbos-latest"
284
+ ]
285
+ }
286
+ ]
287
+ },
288
+ {
289
+ "group": "OpenCode",
290
+ "presets": [
291
+ {
292
+ "plan": "Go OpenAI",
293
+ "presetName": "opencode-go-openai",
294
+ "apiType": "openai",
295
+ "baseUrl": "https://opencode.ai/zen/go/v1/chat/completions",
296
+ "modelsEndpoint": "/models",
297
+ "models": [
298
+ "glm-5.1",
299
+ "glm-5",
300
+ "kimi-k2.5",
301
+ "kimi-k2.6",
302
+ "deepseek-v4-pro",
303
+ "deepseek-v4-flash",
304
+ "mimo-v2-pro",
305
+ "mimo-v2-omni",
306
+ "mimo-v2.5-pro",
307
+ "mimo-v2.5",
308
+ "qwen3.6-plus",
309
+ "qwen3.5-plus"
310
+ ]
311
+ },
312
+ {
313
+ "plan": "Go Anthropic",
314
+ "presetName": "opencode-go-anthropic",
315
+ "apiType": "anthropic",
316
+ "baseUrl": "https://opencode.ai/zen/go/v1/messages",
317
+ "models": [
318
+ "minimax-m2.7",
319
+ "minimax-m2.5"
320
+ ]
321
+ }
322
+ ]
323
+ },
324
+ {
325
+ "group": "阶跃星辰",
326
+ "presets": [
327
+ {
328
+ "plan": "Step Plan",
329
+ "presetName": "stepfun-step-plan",
330
+ "apiType": "anthropic",
331
+ "baseUrl": "https://api.stepfun.com/step_plan",
332
+ "modelsEndpoint": "/v1/models",
333
+ "models": [
334
+ "step-3.5-flash-2603",
335
+ "step-3.5-flash"
336
+ ]
337
+ },
338
+ {
339
+ "plan": "API",
340
+ "presetName": "stepfun",
341
+ "apiType": "openai",
342
+ "baseUrl": "https://api.stepfun.com",
343
+ "modelsEndpoint": "/v1/models",
344
+ "models": [
345
+ "step-3.5-flash",
346
+ "step-3",
347
+ "step-2-mini",
348
+ "step-2-16k",
349
+ "step-1-8k",
350
+ "step-1-32k"
351
+ ]
352
+ }
353
+ ]
354
+ }
355
+ ]
@@ -0,0 +1,12 @@
1
+ [
2
+ { "name": "429 Too Many Requests", "status_code": 429, "body_pattern": ".*", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": [] },
3
+ { "name": "503 Service Unavailable", "status_code": 503, "body_pattern": ".*", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": [] },
4
+ { "name": "ZAI 网络错误 (code 1234)", "status_code": 400, "body_pattern": "\"type\"\\s*:\\s*\"error\".*\"code\"\\s*:\\s*\"1234\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
5
+ { "name": "ZAI 临时不可用", "status_code": 400, "body_pattern": "\"type\"\\s*:\\s*\"error\".*请稍后重试", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
6
+ { "name": "ZAI 操作失败 (code 500)", "status_code": 400, "body_pattern": "\"type\"\\s*:\\s*\"error\".*\"code\"\\s*:\\s*\"500\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
7
+ { "name": "ZAI 速率限制 (HTTP 200, code 1302)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1302\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
8
+ { "name": "ZAI SSE 错误 (HTTP 200, code 500)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"500\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
9
+ { "name": "ZAI SSE 错误 (HTTP 200, code 1234)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1234\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
10
+ { "name": "ZAI 模型过载 (HTTP 200, code 1305)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1305\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
11
+ { "name": "KIMI 401 认证错误", "status_code": 401, "body_pattern": ".*authentication_error.*", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 3, "max_delay_ms": 60000, "providers": ["月之暗面"] }
12
+ ]
@@ -7,6 +7,8 @@ export interface ProviderPreset {
7
7
  /** 上游模型列表端点路径,如 /v1/models 或 /models;拼接在 baseUrl 后 */
8
8
  modelsEndpoint?: string;
9
9
  models: string[];
10
+ /** 由 API handler 补充:模型名 → capabilities 映射 */
11
+ modelCapabilities?: Record<string, string[]>;
10
12
  }
11
13
  export interface ProviderGroup {
12
14
  group: string;
@@ -0,0 +1 @@
1
+ {"providers":1,"retryRules":1}
@@ -53,7 +53,7 @@ export declare class RequestTracker {
53
53
  attempts?: AttemptSnapshot[];
54
54
  }): void;
55
55
  /** Update stream metrics for a completed request (e.g., after cache estimation) */
56
- updateCompletedMetrics(id: string, cacheReadTokens: number): void;
56
+ updateCompletedMetrics(id: string, cacheReadTokens: number, cacheReadTokensEstimated?: boolean): void;
57
57
  getActive(): ActiveRequest[];
58
58
  getRecent(limit?: number): ActiveRequest[];
59
59
  get(id: string): ActiveRequest | undefined;
@@ -144,13 +144,14 @@ export class RequestTracker {
144
144
  this.broadcast("request_complete", completed);
145
145
  }
146
146
  /** Update stream metrics for a completed request (e.g., after cache estimation) */
147
- updateCompletedMetrics(id, cacheReadTokens) {
147
+ updateCompletedMetrics(id, cacheReadTokens, cacheReadTokensEstimated) {
148
148
  const req = this.recentCompleted.find(r => r.id === id);
149
149
  if (!req || !req.streamMetrics)
150
150
  return;
151
151
  req.streamMetrics = {
152
152
  ...req.streamMetrics,
153
153
  cacheReadTokens,
154
+ cacheReadTokensEstimated: cacheReadTokensEstimated ? 1 : 0,
154
155
  };
155
156
  this.broadcast("request_complete", req);
156
157
  }
@@ -51,6 +51,7 @@ export interface StreamMetricsSnapshot {
51
51
  inputTokens: number | null;
52
52
  outputTokens: number | null;
53
53
  cacheReadTokens: number | null;
54
+ cacheReadTokensEstimated?: number;
54
55
  ttftMs: number | null;
55
56
  tokensPerSecond: number | null;
56
57
  stopReason: string | null;
@@ -37,6 +37,7 @@ export interface MetricsResult {
37
37
  output_tokens: number | null;
38
38
  cache_creation_tokens: number | null;
39
39
  cache_read_tokens: number | null;
40
+ cache_read_tokens_estimated?: number;
40
41
  ttft_ms: number | null;
41
42
  /** T6 - T0: proxy end-to-end streaming duration */
42
43
  total_duration_ms: number | null;
package/dist/index.js CHANGED
@@ -6,6 +6,7 @@ import { randomUUID } from "crypto";
6
6
  import Fastify from "fastify";
7
7
  import { insertRequestLog } from "./db/logs.js";
8
8
  import { HTTP_NOT_FOUND, HTTP_INTERNAL_ERROR, getProxyApiType } from "./core/constants.js";
9
+ import { loadModelDirectory } from "./config/model-context.js";
9
10
  import { API_CODE, apiError, isAdminApiResponse, statusToApiCode } from "./admin/api-response.js";
10
11
  const PROVIDER_DEFAULT_QUEUE_TIMEOUT_MS = 5000;
11
12
  const PROVIDER_DEFAULT_MAX_QUEUE_SIZE = 100;
@@ -82,6 +83,8 @@ export async function buildApp(options) {
82
83
  else {
83
84
  db = initDatabase(config.DB_PATH);
84
85
  }
86
+ // 加载外部模型目录(ai-model-directory),fallback 到硬编码白名单
87
+ loadModelDirectory();
85
88
  const isDev = process.env.NODE_ENV !== "production";
86
89
  const MAX_BODY_SIZE_MB = 50;
87
90
  const KB = 1024;
@@ -120,8 +123,21 @@ export async function buildApp(options) {
120
123
  return new Error(message);
121
124
  });
122
125
  // 记录请求到达时间,供全局错误处理计算延迟
123
- app.addHook("onRequest", (request, _reply, done) => {
126
+ app.addHook("onRequest", (request, reply, done) => {
124
127
  request.receivedAt = Date.now();
128
+ // 全局 EPIPE 防护:ServerResponse 的 write 异步完成失败时,
129
+ // 内部 socketErrorListener → response.destroy(err) → response.emit('error')。
130
+ // 若无 listener 则该 error 成为 uncaught exception。
131
+ // 代理路由在 create-proxy-handler.ts 中已有额外监听,此处覆盖所有路由。
132
+ reply.raw.on("error", (err) => {
133
+ const code = err.code;
134
+ if (code === 'EPIPE') {
135
+ request.log.debug({ err }, "client disconnected (EPIPE)");
136
+ }
137
+ else {
138
+ request.log.warn({ err }, "response stream error");
139
+ }
140
+ });
125
141
  done();
126
142
  });
127
143
  // 统一错误处理:代理路由保持 {error:{message}},Admin API 使用信封格式
@@ -99,6 +99,7 @@ export class MetricsExtractor {
99
99
  output_tokens: this.outputTokens,
100
100
  cache_creation_tokens: this.cacheCreationTokens,
101
101
  cache_read_tokens: this.cacheReadTokens,
102
+ cache_read_tokens_estimated: this.cacheReadTokens !== null ? 0 : undefined,
102
103
  ttft_ms: this.ttftMs,
103
104
  total_duration_ms: totalDurationMs,
104
105
  tokens_per_second: totalTps,
@@ -306,6 +307,7 @@ function extractOpenAINonStream(parsed) {
306
307
  output_tokens: usage?.completion_tokens ?? null,
307
308
  cache_creation_tokens: null,
308
309
  cache_read_tokens: details?.cached_tokens ?? null,
310
+ cache_read_tokens_estimated: details?.cached_tokens != null ? 0 : undefined,
309
311
  ttft_ms: null,
310
312
  total_duration_ms: null,
311
313
  tokens_per_second: null,
@@ -321,6 +323,7 @@ function extractAnthropicNonStream(parsed) {
321
323
  output_tokens: usage?.output_tokens ?? null,
322
324
  cache_creation_tokens: usage?.cache_creation_input_tokens ?? null,
323
325
  cache_read_tokens: usage?.cache_read_input_tokens ?? null,
326
+ cache_read_tokens_estimated: usage?.cache_read_input_tokens != null ? 0 : undefined,
324
327
  ttft_ms: null,
325
328
  total_duration_ms: null,
326
329
  tokens_per_second: null,
@@ -192,8 +192,23 @@ export function createProxyHandler(config) {
192
192
  // Socket error handling
193
193
  const socketErrorHandler = (err) => request.log.debug({ err }, "client socket error");
194
194
  request.raw.socket.on("error", socketErrorHandler);
195
+ // reply.raw (ServerResponse) error handling
196
+ // Node.js 中,TCP socket write 异步完成失败时(如 EPIPE),
197
+ // 内部 socketErrorListener → response.destroy(err) → response.emit('error')。
198
+ // 若无 listener,该 error 成为 uncaught exception 导致进程退出。
199
+ const replyErrorHandler = (err) => {
200
+ const code = err.code;
201
+ if (code === 'EPIPE') {
202
+ request.log.debug({ err }, "client disconnected (EPIPE)");
203
+ }
204
+ else {
205
+ request.log.warn({ err }, "response stream error");
206
+ }
207
+ };
208
+ reply.raw.on("error", replyErrorHandler);
195
209
  reply.raw.on("close", () => {
196
210
  request.raw.socket.removeListener("error", socketErrorHandler);
211
+ reply.raw.removeListener("error", replyErrorHandler);
197
212
  });
198
213
  // 创建 pipeline context
199
214
  const ctx = createPipelineContext(request, reply, apiType);