llm-simple-router 0.8.2 → 0.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. package/README.en.md +319 -0
  2. package/README.md +2 -0
  3. package/config/recommended-providers.json +33 -9
  4. package/config/recommended-retry-rules.json +9 -8
  5. package/dist/admin/providers.js +11 -9
  6. package/dist/admin/quick-setup.d.ts +13 -0
  7. package/dist/admin/quick-setup.js +169 -0
  8. package/dist/admin/recommended.js +5 -1
  9. package/dist/admin/routes.js +2 -0
  10. package/dist/config/model-context.d.ts +8 -2
  11. package/dist/config/model-context.js +17 -5
  12. package/dist/config/recommended.d.ts +2 -1
  13. package/dist/config/recommended.js +5 -9
  14. package/dist/core/constants.js +2 -0
  15. package/dist/db/index.js +5 -0
  16. package/dist/db/migrations/033_add_adaptive_concurrency.sql +3 -0
  17. package/dist/db/migrations/036_add_openai_responses_api_type.sql +68 -0
  18. package/dist/db/migrations/037_fix_035_data_corruption.sql +54 -0
  19. package/dist/db/providers.d.ts +3 -3
  20. package/dist/index.js +7 -3
  21. package/dist/metrics/metrics-extractor.d.ts +3 -2
  22. package/dist/metrics/metrics-extractor.js +45 -0
  23. package/dist/metrics/sse-metrics-transform.d.ts +1 -1
  24. package/dist/metrics/sse-metrics-transform.js +10 -0
  25. package/dist/monitor/request-tracker.d.ts +1 -1
  26. package/dist/monitor/stream-content-accumulator.d.ts +1 -1
  27. package/dist/monitor/stream-extractor.d.ts +1 -1
  28. package/dist/monitor/stream-extractor.js +21 -0
  29. package/dist/monitor/types.d.ts +1 -1
  30. package/dist/proxy/handler/proxy-handler-utils.d.ts +1 -1
  31. package/dist/proxy/handler/proxy-handler.d.ts +1 -1
  32. package/dist/proxy/handler/proxy-handler.js +8 -2
  33. package/dist/proxy/handler/responses.d.ts +7 -0
  34. package/dist/proxy/handler/responses.js +48 -0
  35. package/dist/proxy/loop-prevention/tool-loop-guard.d.ts +1 -1
  36. package/dist/proxy/loop-prevention/tool-loop-guard.js +10 -0
  37. package/dist/proxy/orchestration/orchestrator.d.ts +1 -1
  38. package/dist/proxy/orchestration/semaphore.js +6 -0
  39. package/dist/proxy/patch/deepseek/index.d.ts +1 -1
  40. package/dist/proxy/patch/deepseek/patch-thinking-param.d.ts +1 -1
  41. package/dist/proxy/patch/index.d.ts +3 -0
  42. package/dist/proxy/patch/index.js +28 -0
  43. package/dist/proxy/patch/tool-round-limiter.d.ts +1 -1
  44. package/dist/proxy/patch/tool-round-limiter.js +16 -0
  45. package/dist/proxy/proxy-core.d.ts +1 -1
  46. package/dist/proxy/proxy-logging.d.ts +3 -3
  47. package/dist/proxy/response-transform.js +13 -0
  48. package/dist/proxy/transform/id-utils.d.ts +1 -0
  49. package/dist/proxy/transform/id-utils.js +3 -0
  50. package/dist/proxy/transform/plugin-types.d.ts +5 -5
  51. package/dist/proxy/transform/request-bridge-responses.d.ts +19 -0
  52. package/dist/proxy/transform/request-bridge-responses.js +311 -0
  53. package/dist/proxy/transform/request-transform-responses.d.ts +2 -0
  54. package/dist/proxy/transform/request-transform-responses.js +350 -0
  55. package/dist/proxy/transform/response-bridge-responses.d.ts +23 -0
  56. package/dist/proxy/transform/response-bridge-responses.js +173 -0
  57. package/dist/proxy/transform/response-transform-responses.d.ts +2 -0
  58. package/dist/proxy/transform/response-transform-responses.js +137 -0
  59. package/dist/proxy/transform/stream-ant2resp.d.ts +26 -0
  60. package/dist/proxy/transform/stream-ant2resp.js +322 -0
  61. package/dist/proxy/transform/stream-bridge-chat2resp.d.ts +40 -0
  62. package/dist/proxy/transform/stream-bridge-chat2resp.js +382 -0
  63. package/dist/proxy/transform/stream-bridge-resp2chat.d.ts +24 -0
  64. package/dist/proxy/transform/stream-bridge-resp2chat.js +237 -0
  65. package/dist/proxy/transform/stream-resp2ant.d.ts +21 -0
  66. package/dist/proxy/transform/stream-resp2ant.js +238 -0
  67. package/dist/proxy/transform/stream-transform-base.d.ts +1 -0
  68. package/dist/proxy/transform/stream-transform-base.js +3 -0
  69. package/dist/proxy/transform/transform-coordinator.d.ts +1 -0
  70. package/dist/proxy/transform/transform-coordinator.js +127 -8
  71. package/dist/proxy/transform/types-responses.d.ts +177 -0
  72. package/dist/proxy/transform/types-responses.js +27 -0
  73. package/dist/proxy/transform/types.d.ts +3 -1
  74. package/dist/proxy/transport/transport-fn.d.ts +1 -1
  75. package/frontend-dist/assets/CardContent-BhMXx-JD.js +1 -0
  76. package/frontend-dist/assets/CardTitle-DQDjTee3.js +1 -0
  77. package/frontend-dist/assets/CascadingModelSelect-JBQq3JJt.js +1 -0
  78. package/frontend-dist/assets/Checkbox-ByxbKP_C.js +1 -0
  79. package/frontend-dist/assets/CollapsibleContent-GecW2Jk_.js +1 -0
  80. package/frontend-dist/assets/CollapsibleTrigger-Cib3-OsK.js +1 -0
  81. package/frontend-dist/assets/Collection-Dbvdpa0m.js +1 -0
  82. package/frontend-dist/assets/Dashboard-3MJPLflT.js +3 -0
  83. package/frontend-dist/assets/DialogTitle-Ej_rtfV1.js +1 -0
  84. package/frontend-dist/assets/{Input-CAnKUBBK.js → Input-tcnrMp1v.js} +1 -1
  85. package/frontend-dist/assets/Label-BwzPFyL-.js +1 -0
  86. package/frontend-dist/assets/Login-Cdsw2pWC.js +1 -0
  87. package/frontend-dist/assets/Logs-5_CWiws5.js +1 -0
  88. package/frontend-dist/assets/MappingList-D8HRph05.js +1 -0
  89. package/frontend-dist/assets/ModelCard-CZbQcYNn.js +1 -0
  90. package/frontend-dist/assets/ModelMappings-CJqgl7O8.js +1 -0
  91. package/frontend-dist/assets/Monitor-B8v5a8fB.js +1 -0
  92. package/frontend-dist/assets/PopoverTrigger-C88SpJNZ.js +1 -0
  93. package/frontend-dist/assets/PopperContent-6BXua_FZ.js +1 -0
  94. package/frontend-dist/assets/Providers-DH0nvlGn.js +1 -0
  95. package/frontend-dist/assets/ProxyEnhancement-CAH-44W-.js +5 -0
  96. package/frontend-dist/assets/QuickSetup-CsDO-ZGP.js +1 -0
  97. package/frontend-dist/assets/RetryRules-8iT9fLsH.js +1 -0
  98. package/frontend-dist/assets/RouterKeys-BFoEmWgj.js +1 -0
  99. package/frontend-dist/assets/RovingFocusItem-DdPUFQHC.js +1 -0
  100. package/frontend-dist/assets/Schedules-B8Se31u4.js +1 -0
  101. package/frontend-dist/assets/SelectValue-CT2z_-6j.js +1 -0
  102. package/frontend-dist/assets/Settings-BHvtsJKD.js +6 -0
  103. package/frontend-dist/assets/Setup-k-l9KDC0.js +1 -0
  104. package/frontend-dist/assets/Switch-D1NdA4ax.js +1 -0
  105. package/frontend-dist/assets/TableHeader-CcMyOsUB.js +1 -0
  106. package/frontend-dist/assets/Teleport-Bmeh33lB.js +3 -0
  107. package/frontend-dist/assets/TooltipTrigger-LegC_Uvp.js +1 -0
  108. package/frontend-dist/assets/UnifiedRequestDialog-BVw6W2pk.js +3 -0
  109. package/frontend-dist/assets/UnifiedRequestDialog-C4MTxb25.css +1 -0
  110. package/frontend-dist/assets/VisuallyHidden-ogESfc9X.js +1 -0
  111. package/frontend-dist/assets/VisuallyHiddenInput-BQemVGau.js +1 -0
  112. package/frontend-dist/assets/alert-dialog-DzKCAoYJ.js +1 -0
  113. package/frontend-dist/assets/badge-C-9zPTgw.js +1 -0
  114. package/frontend-dist/assets/button-D27ClX8J.js +14 -0
  115. package/frontend-dist/assets/check-yTAivq1h.js +1 -0
  116. package/frontend-dist/assets/common-CWCbKHOK.js +1 -0
  117. package/frontend-dist/assets/common-D4xnnaqi.js +1 -0
  118. package/frontend-dist/assets/constants-B-VELBjk.js +1 -0
  119. package/frontend-dist/assets/copy-DWG9cQPR.js +1 -0
  120. package/frontend-dist/assets/dashboard-B8eI-t8c.js +1 -0
  121. package/frontend-dist/assets/dashboard-Dbe6A2lu.js +1 -0
  122. package/frontend-dist/assets/dialog-BnYR6_dh.js +1 -0
  123. package/frontend-dist/assets/file-text-D33FJAPX.js +1 -0
  124. package/frontend-dist/assets/format-BhxQSgt6.js +1 -0
  125. package/frontend-dist/assets/i18n-CwUfS0tE.js +1 -0
  126. package/frontend-dist/assets/index-B348nt-T.css +1 -0
  127. package/frontend-dist/assets/index-DPRxBo3N.js +1 -0
  128. package/frontend-dist/assets/lib-D0Ek2pPZ.js +1 -0
  129. package/frontend-dist/assets/loader-circle-EpKC006I.js +1 -0
  130. package/frontend-dist/assets/login-BTolYxVI.js +1 -0
  131. package/frontend-dist/assets/login-w_ICpiU5.js +1 -0
  132. package/frontend-dist/assets/logs-7dT2uyMa.js +1 -0
  133. package/frontend-dist/assets/logs-_3w8tDQa.js +1 -0
  134. package/frontend-dist/assets/mappings-Bbn3r2uJ.js +1 -0
  135. package/frontend-dist/assets/mappings-CTZ-zb1x.js +1 -0
  136. package/frontend-dist/assets/monitor-DN5m5n_x.js +1 -0
  137. package/frontend-dist/assets/monitor-DysWEOtt.js +1 -0
  138. package/frontend-dist/assets/providers-C1gQGzwa.js +1 -0
  139. package/frontend-dist/assets/providers-CCfko___.js +1 -0
  140. package/frontend-dist/assets/proxyEnhancement-BItabyLo.js +1 -0
  141. package/frontend-dist/assets/proxyEnhancement-DeMb7wIE.js +1 -0
  142. package/frontend-dist/assets/quickSetup-C75HMC_z.js +1 -0
  143. package/frontend-dist/assets/quickSetup-DStZWiuf.js +1 -0
  144. package/frontend-dist/assets/requestDetail-BoaPEQs-.js +1 -0
  145. package/frontend-dist/assets/requestDetail-CM5kFgy6.js +1 -0
  146. package/frontend-dist/assets/retryRules-CIF37gOl.js +1 -0
  147. package/frontend-dist/assets/retryRules-o_D8S5gy.js +1 -0
  148. package/frontend-dist/assets/routerKeys-BAvjW0V8.js +1 -0
  149. package/frontend-dist/assets/routerKeys-mQt2YPuE.js +1 -0
  150. package/frontend-dist/assets/schedules-BCV2rxK-.js +1 -0
  151. package/frontend-dist/assets/schedules-Qte9b7b_.js +1 -0
  152. package/frontend-dist/assets/settings-Bgu2lJfy.js +1 -0
  153. package/frontend-dist/assets/settings-UCmMSq_F.js +1 -0
  154. package/frontend-dist/assets/setup-B_fAfMoV.js +1 -0
  155. package/frontend-dist/assets/setup-Chc246Zi.js +1 -0
  156. package/frontend-dist/assets/sidebar-B7rejnZA.js +1 -0
  157. package/frontend-dist/assets/sidebar-CBMItLst.js +1 -0
  158. package/frontend-dist/assets/sun-BylRZIWt.js +1 -0
  159. package/frontend-dist/assets/trash-2-QNFff7V4.js +1 -0
  160. package/frontend-dist/assets/{useClipboard-BmmsNSGV.js → useClipboard-BFt5f-_-.js} +1 -1
  161. package/frontend-dist/assets/{useFocusGuards-A-9V2Y-b.js → useFocusGuards-DQBZKWnu.js} +1 -1
  162. package/frontend-dist/assets/useFormControl-T2RQNBqs.js +1 -0
  163. package/frontend-dist/assets/useLogRetention-NrrZrpPE.js +1 -0
  164. package/frontend-dist/assets/useNonce-DR38uny5.js +1 -0
  165. package/frontend-dist/assets/useTheme-CpTI547G.js +1 -0
  166. package/frontend-dist/assets/x-DSgLgKC_.js +1 -0
  167. package/frontend-dist/index.html +25 -22
  168. package/package.json +1 -1
  169. package/dist/db/migrations/033_add_pipeline_snapshot.sql +0 -1
  170. package/frontend-dist/assets/CardContent-BVMQ2_pg.js +0 -1
  171. package/frontend-dist/assets/CardTitle-GLv7QyIY.js +0 -1
  172. package/frontend-dist/assets/CascadingModelSelect-CBhqKFDX.js +0 -1
  173. package/frontend-dist/assets/Checkbox-HPVDmEdV.js +0 -1
  174. package/frontend-dist/assets/CollapsibleTrigger-DhxD9tpM.js +0 -1
  175. package/frontend-dist/assets/Collection-BRt7YxN8.js +0 -1
  176. package/frontend-dist/assets/Dashboard-D1Ys8Zog.js +0 -3
  177. package/frontend-dist/assets/DialogTitle-23q73lwF.js +0 -1
  178. package/frontend-dist/assets/Label-DWdYtVMI.js +0 -1
  179. package/frontend-dist/assets/Login-w5WFOinP.js +0 -1
  180. package/frontend-dist/assets/Logs-C1F1ZmWF.js +0 -1
  181. package/frontend-dist/assets/ModelMappings-BzmecWEH.js +0 -1
  182. package/frontend-dist/assets/Monitor-DrAZFTKR.js +0 -1
  183. package/frontend-dist/assets/PopoverTrigger-Bj65uUbv.js +0 -1
  184. package/frontend-dist/assets/PopperContent-gzzf1XHe.js +0 -1
  185. package/frontend-dist/assets/Providers-DSgf4mb6.js +0 -1
  186. package/frontend-dist/assets/ProxyEnhancement-Bb1cCP6d.js +0 -5
  187. package/frontend-dist/assets/RetryRules-BwPfEZtm.js +0 -1
  188. package/frontend-dist/assets/RouterKeys-CzTSq1Mx.js +0 -1
  189. package/frontend-dist/assets/RovingFocusItem-CXM_Yfkm.js +0 -1
  190. package/frontend-dist/assets/Schedules-DVilCXrC.js +0 -1
  191. package/frontend-dist/assets/SelectValue-C0-LzGQY.js +0 -1
  192. package/frontend-dist/assets/Settings-Bpk53zVX.js +0 -6
  193. package/frontend-dist/assets/Setup-Dn7EgC49.js +0 -1
  194. package/frontend-dist/assets/Switch-BO8Ooae6.js +0 -1
  195. package/frontend-dist/assets/TableHeader-Bded9VTC.js +0 -1
  196. package/frontend-dist/assets/TabsTrigger-BzKMi9AF.js +0 -1
  197. package/frontend-dist/assets/Teleport-DizRK5O3.js +0 -3
  198. package/frontend-dist/assets/TooltipTrigger-EiIy2zn8.js +0 -1
  199. package/frontend-dist/assets/UnifiedRequestDialog-BABsTaGb.js +0 -3
  200. package/frontend-dist/assets/UnifiedRequestDialog-BjEigSaR.css +0 -1
  201. package/frontend-dist/assets/VisuallyHidden-5AozJQza.js +0 -1
  202. package/frontend-dist/assets/VisuallyHiddenInput-DdiZrV2i.js +0 -1
  203. package/frontend-dist/assets/alert-dialog-DlKUuTPe.js +0 -1
  204. package/frontend-dist/assets/arrow-down-CxWKmZ2I.js +0 -1
  205. package/frontend-dist/assets/badge-9KJEMa53.js +0 -1
  206. package/frontend-dist/assets/button-Ul8WlrM5.js +0 -12
  207. package/frontend-dist/assets/check-7ahK--N4.js +0 -1
  208. package/frontend-dist/assets/constants-D_0jiLjw.js +0 -1
  209. package/frontend-dist/assets/copy-DzU2pAMG.js +0 -1
  210. package/frontend-dist/assets/dialog-B9j-FMrd.js +0 -1
  211. package/frontend-dist/assets/file-text-Bj3ZIo-E.js +0 -1
  212. package/frontend-dist/assets/format-Dln15Luw.js +0 -1
  213. package/frontend-dist/assets/index-Bz_ZaXNn.css +0 -1
  214. package/frontend-dist/assets/index-MedWZMHB.js +0 -1
  215. package/frontend-dist/assets/lib-Hhs3NqfD.js +0 -1
  216. package/frontend-dist/assets/loader-circle-5TJUukEe.js +0 -1
  217. package/frontend-dist/assets/useFormControl-DEO19lRe.js +0 -1
  218. package/frontend-dist/assets/useLogRetention-BfnBFZ5K.js +0 -1
  219. package/frontend-dist/assets/useNonce-BfwUJ1Ci.js +0 -1
  220. package/frontend-dist/assets/x-Cfopt3QL.js +0 -1
  221. /package/dist/db/migrations/{034_drop_redundant_log_columns.sql → 035_drop_redundant_log_columns.sql} +0 -0
  222. /package/frontend-dist/assets/{ohash.D__AXeF1-D5e5Wyzx.js → ohash.D__AXeF1-CTo5WcIm.js} +0 -0
package/README.en.md ADDED
@@ -0,0 +1,319 @@
1
+ **[English](README.en.md)** | **[中文](README.md)**
2
+
3
+ # LLM Simple Router
4
+
5
+ An LLM API proxy router that receives requests from clients like Claude Code and Cursor, forwards them to configured backend Providers through model mapping and routing strategies, supporting both streaming (SSE) and non-streaming proxying.
6
+
7
+ **Core problem it solves**: Chinese domestic models have frequent rate limits, switching between multiple providers is cumbersome, and concurrency control is missing.
8
+
9
+ ## Who Is This For
10
+
11
+ - Developers using Claude Code with Chinese domestic models (Zhipu, Moonshot, Minimax, etc.)
12
+ - Those who want automatic retries for rate-limit errors, time-based model switching, and concurrency queue management
13
+ - Anyone looking for a turnkey solution without the hassle
14
+
15
+ ## Feature Overview
16
+
17
+ | Feature | Description |
18
+ |---------|-------------|
19
+ | Automatic retries | Exponential backoff retries for 429/400/network timeouts, pre-configured for Zhipu models by default |
20
+ | Multi-provider support | Zhipu, Moonshot, Minimax, Volcano Engine, Alibaba Cloud, Tencent Cloud, etc. Base URL is auto-filled when you select a Coding Plan |
21
+ | Time-based model mapping | Automatically switch backend models by time period (e.g., switch to Kimi during peak hours, back to GLM during off-peak) |
22
+ | Concurrency queue | Per-Provider concurrency limits with queueing for excess requests |
23
+ | Failover | Multiple Providers as backups; automatically switches to the next on failure |
24
+ | Real-time monitoring | SSE-based live view of active requests, queue status, and streaming output |
25
+ | Multi-key management | Independent API keys + model whitelists for multi-user/multi-project setups |
26
+ | Request logs | Full four-stage tracing (client request / upstream request / upstream response / client response) |
27
+ | Performance metrics | TTFT, TPS, Token usage, cache hit rate |
28
+
29
+ > **API Compatibility:** Supports Anthropic-compatible API (adapted for Claude Code). OpenAI-compatible API (`/v1/chat/completions`) is not yet fully tested.
30
+
31
+ ## Admin Dashboard
32
+
33
+ | Provider Management + Concurrency Control | Real-time Monitoring |
34
+ |---|---|
35
+ | ![Provider](docs/screenshot/provider_concurrency.png) | ![Monitor](docs/screenshot/monitor.png) |
36
+
37
+ | Model Mapping | Retry Rules |
38
+ |---|---|
39
+ | ![Mapping](docs/screenshot/model_mapping.png) | ![Retry](docs/screenshot/retry.png) |
40
+
41
+ | Dashboard | Request Logs |
42
+ |---|---|
43
+ | ![Dashboard](docs/screenshot/dashboard.png) | ![Logs](docs/screenshot/log.png) |
44
+
45
+ | Proxy Enhancement (Experimental) |
46
+ |----------------------------------|
47
+ | ![Proxy Enhancement](docs/screenshot/proxy_enhance.png) |
48
+
49
+ ## Quick Start
50
+
51
+ ### 1. Start the Router
52
+
53
+ ```bash
54
+ npx llm-simple-router
55
+ ```
56
+
57
+ Visit http://localhost:9981/admin — on first access you'll see the Setup page to set an admin password. Data is stored in `~/.llm-simple-router/`.
58
+
59
+ ### 2. Configure a Provider
60
+
61
+ Go to Admin Dashboard > Provider page > Add Provider. Select a Coding Plan and the Base URL will be auto-filled — you only need to provide the API Key.
62
+
63
+ ### 3. Configure Model Mapping
64
+
65
+ Go to Admin Dashboard > Model Mapping page.
66
+
67
+ **Core concept:** The client sends a request with model name A. The Router replaces it with model name B (supported by the backend Provider) based on mapping rules, then forwards the request:
68
+
69
+ ```
70
+ Claude Code (model A) → Router (A → B) → Provider API (model B)
71
+ ```
72
+
73
+ Simply configure "client model = A, backend model = B, select provider" in the mapping table.
74
+
75
+ #### Claude Code Default Model Names
76
+
77
+ When no environment variables are set, Claude Code uses the following default model names: `opus`, `sonnet`, `haiku`. If the backend is Zhipu Coding Plan, the mapping configuration would be:
78
+
79
+ | Client Model | Backend Model | Provider | Time Window |
80
+ |-------------|---------------|----------|-------------|
81
+ | opus | glm-5.1 | Zhipu Coding Plan | All day |
82
+ | sonnet | glm-5.1 | Zhipu Coding Plan | All day |
83
+ | haiku | glm-5-turbo | Zhipu Coding Plan | All day |
84
+
85
+ You can also use time-based mapping to auto-switch during peak hours:
86
+
87
+ | Client Model | Backend Model | Provider | Time Window |
88
+ |-------------|---------------|----------|-------------|
89
+ | sonnet | glm-5.1 | Zhipu Coding Plan | 00:00-14:00 |
90
+ | sonnet | kimi-for-coding | Moonshot | 14:00-18:00 |
91
+ | sonnet | glm-5.1 | Zhipu Coding Plan | 18:00-24:00 |
92
+
93
+ ### 4. Configure Claude Code
94
+
95
+ Create a Router API key in the admin dashboard, then choose one of the following methods. **You only need one of the two.**
96
+
97
+ **Method 1: Shell alias (recommended)**
98
+
99
+ Minimal configuration — Claude Code uses default model names (opus / sonnet / haiku), and the Router converts them via the mapping table:
100
+
101
+ ```bash
102
+ alias clode='\
103
+ export ANTHROPIC_AUTH_TOKEN="<your-router-key>" && \
104
+ export ANTHROPIC_BASE_URL="http://127.0.0.1:9981" && \
105
+ claude'
106
+ ```
107
+
108
+ You can also specify model names directly via environment variables, bypassing Router mapping:
109
+
110
+ ```bash
111
+ alias clode='\
112
+ export ANTHROPIC_AUTH_TOKEN="sk-router-xxxxxxxx" && \
113
+ export ANTHROPIC_BASE_URL="http://192.168.1.111:9981" && \
114
+ export ANTHROPIC_MODEL="glm-5" && \
115
+ export ANTHROPIC_DEFAULT_OPUS_MODEL="glm-5.1" && \
116
+ export ANTHROPIC_DEFAULT_SONNET_MODEL="glm-5" && \
117
+ export ANTHROPIC_DEFAULT_HAIKU_MODEL="glm-5-turbo" && \
118
+ export ANTHROPIC_SMALL_FAST_MODEL="glm-5-turbo" && \
119
+ claude'
120
+ ```
121
+
122
+ > For debugging, add flags: `claude --dangerously-skip-permissions --verbose --debug`, or set `export DEBUG=claude:*` for detailed logs.
123
+
124
+ **Method 2: ~/.claude/settings.json**
125
+
126
+ Configure in the `env` field of `~/.claude/settings.json` — same effect as exporting environment variables:
127
+
128
+ Minimal configuration:
129
+
130
+ ```json
131
+ {
132
+ "env": {
133
+ "ANTHROPIC_AUTH_TOKEN": "<your-router-key>",
134
+ "ANTHROPIC_BASE_URL": "http://127.0.0.1:9981"
135
+ }
136
+ }
137
+ ```
138
+
139
+ Override model names:
140
+
141
+ ```json
142
+ {
143
+ "env": {
144
+ "ANTHROPIC_AUTH_TOKEN": "sk-router-xxxxxxxx",
145
+ "ANTHROPIC_BASE_URL": "http://192.168.1.111:9981",
146
+ "ANTHROPIC_MODEL": "glm-5",
147
+ "ANTHROPIC_DEFAULT_OPUS_MODEL": "glm-5.1",
148
+ "ANTHROPIC_DEFAULT_SONNET_MODEL": "glm-5",
149
+ "ANTHROPIC_DEFAULT_HAIKU_MODEL": "glm-5-turbo",
150
+ "ANTHROPIC_SMALL_FAST_MODEL": "glm-5-turbo"
151
+ }
152
+ }
153
+ ```
154
+
155
+ > Environment variables in settings.json apply to all projects. To apply only to the current project, place them in `.claude/settings.json` (in the project root).
156
+
157
+ ### 5. Usage
158
+
159
+ ```bash
160
+ # Method 1 (shell alias)
161
+ clode
162
+
163
+ # Method 2 (settings.json)
164
+ claude
165
+ ```
166
+
167
+ ## Docker Deployment
168
+
169
+ ```bash
170
+ docker compose up -d
171
+ ```
172
+
173
+ Environment variables are configured through the Setup page — no `.env` file needed.
174
+
175
+ ## Process Management
176
+
177
+ After upgrading via the Web UI, the service needs to restart to take effect. Use one of the following deployment methods to ensure automatic recovery after crashes or upgrade restarts.
178
+
179
+ ### PM2 (Recommended)
180
+
181
+ ```bash
182
+ # Install PM2
183
+ npm install -g pm2
184
+
185
+ # Install Router globally
186
+ npm install -g llm-simple-router
187
+
188
+ # Start (PM2 auto-restarts crashed processes)
189
+ pm2 start llm-simple-router --name llm-router
190
+
191
+ # View logs
192
+ pm2 logs llm-router
193
+
194
+ # Enable startup on boot
195
+ pm2 startup
196
+ pm2 save
197
+ ```
198
+
199
+ Upgrade flow: Web UI one-click upgrade → click restart → PM2 auto-spawns new process (< 1s downtime).
200
+
201
+ ### systemd (Linux Servers)
202
+
203
+ Create a service file at `/etc/systemd/system/llm-simple-router.service`:
204
+
205
+ ```ini
206
+ [Unit]
207
+ Description=LLM Simple Router
208
+ After=network.target
209
+
210
+ [Service]
211
+ Type=simple
212
+ ExecStart=/usr/local/bin/llm-simple-router
213
+ Restart=always
214
+ RestartSec=3
215
+ Environment=PORT=9981
216
+ Environment=LOG_LEVEL=info
217
+ # Configure other environment variables as needed
218
+ # Environment=DB_PATH=/var/lib/llm-simple-router/router.db
219
+
220
+ [Install]
221
+ WantedBy=multi-user.target
222
+ ```
223
+
224
+ > **Note:** The `ExecStart` path depends on how Node.js is installed. Use `which llm-simple-router` to confirm the actual path.
225
+
226
+ ```bash
227
+ # Enable and start
228
+ sudo systemctl enable llm-simple-router
229
+ sudo systemctl start llm-simple-router
230
+
231
+ # View status and logs
232
+ sudo systemctl status llm-simple-router
233
+ journalctl -u llm-simple-router -f
234
+ ```
235
+
236
+ Upgrade flow: Web UI one-click upgrade → click restart → systemd auto-restarts (< 1s downtime).
237
+
238
+ ### npx / Manual Start
239
+
240
+ No extra configuration needed. After upgrading via Web UI and clicking restart, the Router automatically spawns a new process and exits the old one. Brief interruption of about 1-2 seconds.
241
+
242
+ > **Note:** If you directly `Ctrl+C` or close the terminal, the service won't auto-recover. For production, use PM2 or systemd.
243
+
244
+ ## How It Works
245
+
246
+ ```
247
+ Claude Code → Router (model mapping + auto-retry + concurrency control) → Zhipu GLM / Kimi / Other Providers
248
+ ```
249
+
250
+ The Router finds the backend provider via model mapping → forwards the request → auto-retries failed requests → logs and records performance metrics → returns the response.
251
+
252
+ ### Architecture Diagram
253
+
254
+ **System Context** ([detailed description](docs/system-context.md)):
255
+
256
+ ```mermaid
257
+ graph LR
258
+ Clients["Claude Code / Cursor / Other Clients"]
259
+ Admin["Administrator"]
260
+ Router>"LLM Simple Router"]
261
+ Providers>"Zhipu / Moonshot / OpenAI / Anthropic / ..."]
262
+
263
+ Clients -->|"API Request<br/>Bearer Token"| Router
264
+ Admin -->|"Admin Dashboard<br/>/admin/"| Router
265
+ Router -->|"Forwarded Request<br/>SSE Streaming"| Providers
266
+ ```
267
+
268
+ **Request Processing Pipeline** ([detailed description](docs/request-pipeline.md)):
269
+
270
+ ```mermaid
271
+ flowchart LR
272
+ A[Client Request] --> B[Authentication]
273
+ B --> C[Model Mapping<br/>+ Routing Strategy]
274
+ C --> D[Concurrency Queue]
275
+ D --> E[Call Upstream<br/>Auto-retry on Failure]
276
+ E --> F[Log Request<br/>+ Metrics]
277
+ F --> G[Return Response]
278
+
279
+ E -.->|Failure| C
280
+ ```
281
+
282
+ When the Router receives a request: Authentication → find backend Provider via mapping rules → queue for concurrency control → forward to upstream (auto-retry on failure; under Failover strategy, switches Provider) → log and record metrics → return response.
283
+
284
+ ## Environment Variables
285
+
286
+ All secrets are configured through the Setup page. The following are optional configurations:
287
+
288
+ | Variable | Default | Description |
289
+ |----------|---------|-------------|
290
+ | `PORT` | `9981` | Service port |
291
+ | `DB_PATH` | `~/.llm-simple-router/router.db` | SQLite database path |
292
+ | `LOG_LEVEL` | `info` | Log level |
293
+ | `TZ` | `Asia/Shanghai` | Timezone setting |
294
+ | `STREAM_TIMEOUT_MS` | `3000000` | Streaming proxy idle timeout (ms) |
295
+ | `RETRY_MAX_ATTEMPTS` | `3` | Maximum retry attempts |
296
+ | `RETRY_BASE_DELAY_MS` | `1000` | Retry base delay (ms) |
297
+
298
+ ## Development
299
+
300
+ ```bash
301
+ # Backend (hot reload)
302
+ npm run dev
303
+
304
+ # Frontend (hot reload, proxies API to backend :9980)
305
+ cd frontend && npm run dev
306
+
307
+ # Build
308
+ npm run build:full
309
+
310
+ # Test
311
+ npm test
312
+
313
+ # Lint
314
+ npm run lint
315
+ ```
316
+
317
+ ## License
318
+
319
+ MIT
package/README.md CHANGED
@@ -1,3 +1,5 @@
1
+ **[English](README.en.md)** | **[中文](README.md)**
2
+
1
3
  # LLM Simple Router
2
4
 
3
5
  LLM API 代理路由器。接收 Claude Code / Cursor 等客户端请求,通过模型映射和路由策略转发到配置的后端 Provider,支持流式(SSE)和非流式代理。
@@ -7,14 +7,20 @@
7
7
  "presetName": "deepseek",
8
8
  "apiType": "anthropic",
9
9
  "baseUrl": "https://api.deepseek.com/anthropic",
10
- "models": ["deepseek-chat", "deepseek-reasoner"]
10
+ "models": [
11
+ "deepseek-v4-flash",
12
+ "deepseek-v4-pro"
13
+ ]
11
14
  },
12
15
  {
13
16
  "plan": "OpenAI",
14
17
  "presetName": "deepseek-openai",
15
18
  "apiType": "openai",
16
19
  "baseUrl": "https://api.deepseek.com",
17
- "models": ["deepseek-chat", "deepseek-reasoner"]
20
+ "models": [
21
+ "deepseek-v4-flash",
22
+ "deepseek-v4-pro"
23
+ ]
18
24
  }
19
25
  ]
20
26
  },
@@ -86,7 +92,12 @@
86
92
  "presetName": "zhipu-coding-plan",
87
93
  "apiType": "anthropic",
88
94
  "baseUrl": "https://open.bigmodel.cn/api/anthropic",
89
- "models": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5-air"]
95
+ "models": [
96
+ "glm-5.1",
97
+ "glm-5",
98
+ "glm-4.7",
99
+ "glm-4.5-air"
100
+ ]
90
101
  },
91
102
  {
92
103
  "plan": "API",
@@ -105,14 +116,17 @@
105
116
  ]
106
117
  },
107
118
  {
108
- "group": "KIMI",
119
+ "group": "月之暗面",
109
120
  "presets": [
110
121
  {
111
122
  "plan": "Coding Plan",
112
123
  "presetName": "kimi-coding-plan",
113
124
  "apiType": "anthropic",
114
125
  "baseUrl": "https://api.kimi.com/coding",
115
- "models": ["kimi-for-coding", "kimi-k2.5"]
126
+ "models": [
127
+ "kimi-for-coding",
128
+ "kimi-k2.5"
129
+ ]
116
130
  },
117
131
  {
118
132
  "plan": "API",
@@ -137,7 +151,9 @@
137
151
  "presetName": "minimax-token-plan",
138
152
  "apiType": "anthropic",
139
153
  "baseUrl": "https://api.minimaxi.com/anthropic",
140
- "models": ["MiniMax-M2.7"]
154
+ "models": [
155
+ "MiniMax-M2.7"
156
+ ]
141
157
  },
142
158
  {
143
159
  "plan": "API",
@@ -258,7 +274,12 @@
258
274
  "presetName": "opencode-go-anthropic",
259
275
  "apiType": "anthropic",
260
276
  "baseUrl": "https://opencode.ai/zen/go/v1/messages",
261
- "models": ["deepseek-v4-pro", "deepseek-v4-flash", "minimax-m2.7", "minimax-m2.5"]
277
+ "models": [
278
+ "deepseek-v4-pro",
279
+ "deepseek-v4-flash",
280
+ "minimax-m2.7",
281
+ "minimax-m2.5"
282
+ ]
262
283
  }
263
284
  ]
264
285
  },
@@ -270,7 +291,10 @@
270
291
  "presetName": "stepfun-step-plan",
271
292
  "apiType": "anthropic",
272
293
  "baseUrl": "https://api.stepfun.com/step_plan",
273
- "models": ["step-3.5-flash-2603", "step-3.5-flash"]
294
+ "models": [
295
+ "step-3.5-flash-2603",
296
+ "step-3.5-flash"
297
+ ]
274
298
  },
275
299
  {
276
300
  "plan": "API",
@@ -288,4 +312,4 @@
288
312
  }
289
313
  ]
290
314
  }
291
- ]
315
+ ]
@@ -1,10 +1,11 @@
1
1
  [
2
- { "name": "429 Too Many Requests", "status_code": 429, "body_pattern": ".*", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000 },
3
- { "name": "503 Service Unavailable", "status_code": 503, "body_pattern": ".*", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000 },
4
- { "name": "ZAI 网络错误 (code 1234)", "status_code": 400, "body_pattern": "\"type\"\\s*:\\s*\"error\".*\"code\"\\s*:\\s*\"1234\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000 },
5
- { "name": "ZAI 临时不可用", "status_code": 400, "body_pattern": "\"type\"\\s*:\\s*\"error\".*请稍后重试", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000 },
6
- { "name": "ZAI 操作失败 (code 500)", "status_code": 400, "body_pattern": "\"type\"\\s*:\\s*\"error\".*\"code\"\\s*:\\s*\"500\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000 },
7
- { "name": "ZAI 速率限制 (HTTP 200, code 1302)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1302\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000 },
8
- { "name": "ZAI SSE 错误 (HTTP 200, code 500)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"500\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000 },
9
- { "name": "ZAI SSE 错误 (HTTP 200, code 1234)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1234\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000 }
2
+ { "name": "429 Too Many Requests", "status_code": 429, "body_pattern": ".*", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": [] },
3
+ { "name": "503 Service Unavailable", "status_code": 503, "body_pattern": ".*", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": [] },
4
+ { "name": "ZAI 网络错误 (code 1234)", "status_code": 400, "body_pattern": "\"type\"\\s*:\\s*\"error\".*\"code\"\\s*:\\s*\"1234\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
5
+ { "name": "ZAI 临时不可用", "status_code": 400, "body_pattern": "\"type\"\\s*:\\s*\"error\".*请稍后重试", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
6
+ { "name": "ZAI 操作失败 (code 500)", "status_code": 400, "body_pattern": "\"type\"\\s*:\\s*\"error\".*\"code\"\\s*:\\s*\"500\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
7
+ { "name": "ZAI 速率限制 (HTTP 200, code 1302)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1302\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
8
+ { "name": "ZAI SSE 错误 (HTTP 200, code 500)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"500\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
9
+ { "name": "ZAI SSE 错误 (HTTP 200, code 1234)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1234\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
10
+ { "name": "KIMI 401 认证错误", "status_code": 401, "body_pattern": ".*authentication_error.*", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 3, "max_delay_ms": 60000, "providers": ["月之暗面"] }
10
11
  ]
@@ -58,9 +58,11 @@ function cascadeProviderDisable(db, providerId) {
58
58
  return result;
59
59
  }
60
60
  function extractModelOverrides(models) {
61
- const names = models.map(m => typeof m === "string" ? m : m.name);
61
+ const entries = models.map(m => typeof m === "string"
62
+ ? { name: m, patches: [] }
63
+ : { name: m.name, context_window: m.context_window, patches: m.patches ?? [] });
62
64
  const overrides = models.filter((m) => typeof m !== "string" && m.context_window != null);
63
- return { names, overrides };
65
+ return { entries, overrides };
64
66
  }
65
67
  const API_KEY_PREVIEW_PREFIX_LEN = 4;
66
68
  const PROVIDER_NAME_RE = /^[a-zA-Z0-9_-]+$/;
@@ -71,7 +73,7 @@ const CreateProviderSchema = Type.Object({
71
73
  api_key: Type.String({ minLength: 1 }),
72
74
  models: Type.Optional(Type.Array(Type.Union([
73
75
  Type.String(),
74
- Type.Object({ name: Type.String(), context_window: Type.Optional(Type.Number()) })
76
+ Type.Object({ name: Type.String(), context_window: Type.Optional(Type.Number()), patches: Type.Optional(Type.Array(Type.String())) })
75
77
  ]))),
76
78
  is_active: Type.Optional(Type.Number()),
77
79
  max_concurrency: Type.Optional(Type.Integer({ minimum: 0 })),
@@ -86,7 +88,7 @@ const UpdateProviderSchema = Type.Object({
86
88
  api_key: Type.Optional(Type.String({ minLength: 1 })),
87
89
  models: Type.Optional(Type.Array(Type.Union([
88
90
  Type.String(),
89
- Type.Object({ name: Type.String(), context_window: Type.Optional(Type.Number()) })
91
+ Type.Object({ name: Type.String(), context_window: Type.Optional(Type.Number()), patches: Type.Optional(Type.Array(Type.String())) })
90
92
  ]))),
91
93
  is_active: Type.Optional(Type.Number()),
92
94
  max_concurrency: Type.Optional(Type.Integer({ minimum: 0 })),
@@ -100,7 +102,7 @@ export const adminProviderRoutes = (app, options, done) => {
100
102
  const encryptionKey = getSetting(db, "encryption_key");
101
103
  const providers = getAllProviders(db);
102
104
  return reply.send(providers.map((s) => {
103
- const modelNames = parseModels(s.models || "[]");
105
+ const modelEntries = parseModels(s.models || "[]");
104
106
  const overrides = new Map(getModelInfoForProvider(db, s.id).map(m => [m.model_name, m.context_window]));
105
107
  return {
106
108
  id: s.id,
@@ -108,7 +110,7 @@ export const adminProviderRoutes = (app, options, done) => {
108
110
  api_type: s.api_type,
109
111
  base_url: s.base_url,
110
112
  api_key: s.api_key ? decrypt(s.api_key, encryptionKey) : "",
111
- models: buildModelInfoList(modelNames, overrides),
113
+ models: buildModelInfoList(modelEntries, overrides),
112
114
  is_active: s.is_active,
113
115
  max_concurrency: s.max_concurrency,
114
116
  queue_timeout_ms: s.queue_timeout_ms,
@@ -130,7 +132,7 @@ export const adminProviderRoutes = (app, options, done) => {
130
132
  return reply.code(HTTP_CONFLICT).send(apiError(API_CODE.CONFLICT_NAME, `Provider 名称 '${body.name}' 已存在`));
131
133
  }
132
134
  const encryptedKey = encrypt(body.api_key, getSetting(db, "encryption_key"));
133
- const { names: normalizedModels, overrides: contextOverrides } = extractModelOverrides((body.models ?? []));
135
+ const { entries: normalizedModels, overrides: contextOverrides } = extractModelOverrides((body.models ?? []));
134
136
  const isAdaptiveEnabled = body.adaptive_enabled ?? 0;
135
137
  const id = createProvider(db, {
136
138
  name: body.name,
@@ -190,8 +192,8 @@ export const adminProviderRoutes = (app, options, done) => {
190
192
  if (body.is_active !== undefined)
191
193
  fields.is_active = body.is_active;
192
194
  if (body.models !== undefined) {
193
- const { names, overrides } = extractModelOverrides(body.models);
194
- fields.models = JSON.stringify(names);
195
+ const { entries, overrides } = extractModelOverrides(body.models);
196
+ fields.models = JSON.stringify(entries);
195
197
  if (overrides.length > 0) {
196
198
  setModelInfoForProvider(db, id, overrides.map(o => ({ model_name: o.name, context_window: o.context_window })));
197
199
  }
@@ -0,0 +1,13 @@
1
+ import { FastifyPluginCallback } from "fastify";
2
+ import Database from "better-sqlite3";
3
+ import type { StateRegistry } from "../core/registry.js";
4
+ import type { RequestTracker } from "../monitor/request-tracker.js";
5
+ import type { AdaptiveConcurrencyController } from "../proxy/adaptive-controller.js";
6
+ interface QuickSetupRoutesOptions {
7
+ db: Database.Database;
8
+ stateRegistry?: StateRegistry;
9
+ tracker?: RequestTracker;
10
+ adaptiveController?: AdaptiveConcurrencyController;
11
+ }
12
+ export declare const adminQuickSetupRoutes: FastifyPluginCallback<QuickSetupRoutesOptions>;
13
+ export {};