llm-simple-router 0.11.1 → 0.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/config/recommended-retry-rules.json +2 -1
  2. package/dist/admin/monitor.js +9 -6
  3. package/dist/admin/proxy-enhancement.js +25 -5
  4. package/dist/admin/retry-rules.js +244 -1
  5. package/dist/config/model-context.js +14 -5
  6. package/dist/config/recommended-retry-rules.json +2 -1
  7. package/dist/core/monitor/stream-extractor.js +30 -2
  8. package/dist/index.js +1 -1
  9. package/dist/proxy/handler/create-proxy-handler.js +3 -64
  10. package/dist/proxy/orchestration/resilience.js +1 -1
  11. package/dist/proxy/proxy-logging.js +30 -1
  12. package/dist/proxy/transform/request-bridge-responses.js +13 -7
  13. package/dist/proxy/transform/request-transform-responses.js +4 -1
  14. package/dist/proxy/transform/request-transform.js +12 -8
  15. package/dist/proxy/transform/shared-normalize.d.ts +7 -0
  16. package/dist/proxy/transform/shared-normalize.js +14 -0
  17. package/dist/proxy/transform/thinking-mapper.d.ts +1 -0
  18. package/dist/proxy/transform/thinking-mapper.js +1 -1
  19. package/dist/proxy/transform/thinking-resolver.d.ts +19 -0
  20. package/dist/proxy/transform/thinking-resolver.js +51 -0
  21. package/dist/utils/llm-client.d.ts +17 -0
  22. package/dist/utils/llm-client.js +78 -0
  23. package/frontend-dist/assets/CardContent-B88j9H0z.js +1 -0
  24. package/frontend-dist/assets/CardTitle-B8a97nPD.js +1 -0
  25. package/frontend-dist/assets/CascadingModelSelect-C0yNZoXy.js +1 -0
  26. package/frontend-dist/assets/Checkbox-BqTMZz2v.js +1 -0
  27. package/frontend-dist/assets/CollapsibleContent-96COn5t_.js +1 -0
  28. package/frontend-dist/assets/CollapsibleTrigger-BhoqUmdk.js +1 -0
  29. package/frontend-dist/assets/Dashboard-DS1huRbF.js +3 -0
  30. package/frontend-dist/assets/{Input-a-p63V9b.js → Input-Bu0i-HPR.js} +1 -1
  31. package/frontend-dist/assets/Label-67a8eQwX.js +1 -0
  32. package/frontend-dist/assets/Login-DuJrav5s.js +1 -0
  33. package/frontend-dist/assets/Logs-Di2rtHWg.js +1 -0
  34. package/frontend-dist/assets/MappingEntryEditor-C_wjxov1.js +1 -0
  35. package/frontend-dist/assets/ModelMappings-CJKFlKoo.js +1 -0
  36. package/frontend-dist/assets/Monitor-C_KYa39G.js +1 -0
  37. package/frontend-dist/assets/Providers-OSQlM_k9.js +1 -0
  38. package/frontend-dist/assets/ProxyEnhancement-Cj7_GBQe.js +1 -0
  39. package/frontend-dist/assets/QuickSetup-_dHnO9A_.js +1 -0
  40. package/frontend-dist/assets/RetryRules-D1wNRCPz.js +1 -0
  41. package/frontend-dist/assets/RouterKeys-CZ2Vw6vB.js +1 -0
  42. package/frontend-dist/assets/RovingFocusItem-tLhdCrJ2.js +1 -0
  43. package/frontend-dist/assets/Schedules-BBBE5rzH.js +1 -0
  44. package/frontend-dist/assets/Settings-CNEWKmas.js +6 -0
  45. package/frontend-dist/assets/Setup-BJy7Hluo.js +1 -0
  46. package/frontend-dist/assets/Switch-B2K6kTfF.js +1 -0
  47. package/frontend-dist/assets/TooltipTrigger-qc84ev_b.js +1 -0
  48. package/frontend-dist/assets/TransformRulesForm-DdSMMv3Z.js +1 -0
  49. package/frontend-dist/assets/UnifiedRequestDialog-Bj4AG0sQ.css +1 -0
  50. package/frontend-dist/assets/UnifiedRequestDialog-BtsWE0wO.js +3 -0
  51. package/frontend-dist/assets/VisuallyHiddenInput-C6J7cjvG.js +1 -0
  52. package/frontend-dist/assets/{button-IIwokVyh.js → button-CSFUPP0s.js} +5 -5
  53. package/frontend-dist/assets/{copy-6ObXPuLk.js → copy-C9YdGmGg.js} +1 -1
  54. package/frontend-dist/assets/dashboard-DxQj2qDW.js +1 -0
  55. package/frontend-dist/assets/dashboard-oYrGiYFH.js +1 -0
  56. package/frontend-dist/assets/dialog-Dvd1Jkzx.js +1 -0
  57. package/frontend-dist/assets/index-BXsU7o6J.js +3 -0
  58. package/frontend-dist/assets/index-BowCJXHo.css +1 -0
  59. package/frontend-dist/assets/logs-C8j2wv9U.js +1 -0
  60. package/frontend-dist/assets/logs-DXEeXyQL.js +1 -0
  61. package/frontend-dist/assets/model-patches-C_Vq5bRS.js +1 -0
  62. package/frontend-dist/assets/proxyEnhancement-Caq4cKe6.js +3 -0
  63. package/frontend-dist/assets/proxyEnhancement-DsQ6_BKy.js +3 -0
  64. package/frontend-dist/assets/quickSetup-BL0txMvb.js +1 -0
  65. package/frontend-dist/assets/quickSetup-CvR1GTCW.js +1 -0
  66. package/frontend-dist/assets/{retryRules-CzLnagW_.js → retryRules-Btt-s8hs.js} +1 -1
  67. package/frontend-dist/assets/{retryRules-C--dd-y8.js → retryRules-Cnh9jDD4.js} +1 -1
  68. package/frontend-dist/assets/sparkles-C6fYCgIz.js +1 -0
  69. package/frontend-dist/assets/{trash-2-CFGaIrdz.js → trash-2-2cawWyKR.js} +1 -1
  70. package/frontend-dist/assets/{useClipboard-BeAQAXgb.js → useClipboard-Bk__77zn.js} +1 -1
  71. package/frontend-dist/assets/useLogRetention-BhmlsZRl.js +1 -0
  72. package/frontend-dist/index.html +3 -3
  73. package/package.json +1 -1
  74. package/frontend-dist/assets/CardContent-pH0VRq8h.js +0 -1
  75. package/frontend-dist/assets/CardTitle-D-llLD1f.js +0 -1
  76. package/frontend-dist/assets/Checkbox-D4LoZvTV.js +0 -1
  77. package/frontend-dist/assets/CollapsibleContent-B254RF6g.js +0 -1
  78. package/frontend-dist/assets/CollapsibleTrigger-DV0ZZnn3.js +0 -1
  79. package/frontend-dist/assets/Dashboard-BD0XGSwd.js +0 -3
  80. package/frontend-dist/assets/Label-CX2aLwv3.js +0 -1
  81. package/frontend-dist/assets/Login-CsW97ehB.js +0 -1
  82. package/frontend-dist/assets/Logs-CYdLJZSQ.js +0 -1
  83. package/frontend-dist/assets/MappingEntryEditor-CakGfYUb.js +0 -1
  84. package/frontend-dist/assets/ModelCard-CfaiLRNv.js +0 -1
  85. package/frontend-dist/assets/ModelMappings-XIqm9JCQ.js +0 -1
  86. package/frontend-dist/assets/Monitor-XrEdVpUU.js +0 -1
  87. package/frontend-dist/assets/Providers-DfGFCV59.js +0 -1
  88. package/frontend-dist/assets/ProxyEnhancement-M5m9s2Lc.js +0 -1
  89. package/frontend-dist/assets/QuickSetup-CAaTeVTO.js +0 -1
  90. package/frontend-dist/assets/RetryRules-pegfUlWk.js +0 -1
  91. package/frontend-dist/assets/RouterKeys-Cl_gTHw-.js +0 -1
  92. package/frontend-dist/assets/RovingFocusItem-BPcEf_Gq.js +0 -1
  93. package/frontend-dist/assets/Schedules-DEHeg94o.js +0 -1
  94. package/frontend-dist/assets/Settings-Gmo6ovhM.js +0 -6
  95. package/frontend-dist/assets/Setup-BJNhW5mF.js +0 -1
  96. package/frontend-dist/assets/Switch-8BLJ2Vef.js +0 -1
  97. package/frontend-dist/assets/TooltipTrigger-BhyYaMAs.js +0 -1
  98. package/frontend-dist/assets/TransformRulesForm-BHKWFh3L.js +0 -1
  99. package/frontend-dist/assets/UnifiedRequestDialog-BINmHzft.js +0 -3
  100. package/frontend-dist/assets/UnifiedRequestDialog-C4MTxb25.css +0 -1
  101. package/frontend-dist/assets/VisuallyHiddenInput-hHg5vqgm.js +0 -1
  102. package/frontend-dist/assets/dashboard-Cejt1wVQ.js +0 -1
  103. package/frontend-dist/assets/dashboard-DLTOR0fN.js +0 -1
  104. package/frontend-dist/assets/dialog-B2LrmGSG.js +0 -1
  105. package/frontend-dist/assets/index-ClQS69Or.css +0 -1
  106. package/frontend-dist/assets/index-DBjy8QDH.js +0 -3
  107. package/frontend-dist/assets/logs-CA8USnXG.js +0 -1
  108. package/frontend-dist/assets/logs-QPt2Ybwy.js +0 -1
  109. package/frontend-dist/assets/proxyEnhancement-B6vdsMeK.js +0 -3
  110. package/frontend-dist/assets/proxyEnhancement-UuPFs4M3.js +0 -3
  111. package/frontend-dist/assets/quickSetup-CSpWmAy-.js +0 -1
  112. package/frontend-dist/assets/quickSetup-D8ruRelW.js +0 -1
  113. package/frontend-dist/assets/useLogRetention-BLbOetBI.js +0 -1
  114. /package/frontend-dist/assets/{common-Cn0QcrnY.js → common-Cg4OGISS.js} +0 -0
  115. /package/frontend-dist/assets/{common-Bvxev9Ev.js → common-DpEjrxgC.js} +0 -0
  116. /package/frontend-dist/assets/{login-BkOvA7gg.js → login-COgZiZU0.js} +0 -0
  117. /package/frontend-dist/assets/{login-DWRFsEu3.js → login-Cqit6dLn.js} +0 -0
  118. /package/frontend-dist/assets/{mappings-BpkOqnsu.js → mappings-CIi5L6vx.js} +0 -0
  119. /package/frontend-dist/assets/{mappings-D7Qy46v_.js → mappings-DK14Q480.js} +0 -0
  120. /package/frontend-dist/assets/{monitor-CcPZdXUM.js → monitor-BrKGZyOA.js} +0 -0
  121. /package/frontend-dist/assets/{monitor-D-0KOVTC.js → monitor-sNuyagci.js} +0 -0
  122. /package/frontend-dist/assets/{providers-BI5dO-j0.js → providers-BjaFz2uN.js} +0 -0
  123. /package/frontend-dist/assets/{providers-BzxbZ85B.js → providers-Djvbh2Pk.js} +0 -0
  124. /package/frontend-dist/assets/{requestDetail-8Sp9tWNb.js → requestDetail-C6o1ku8x.js} +0 -0
  125. /package/frontend-dist/assets/{requestDetail-CcHzzKYr.js → requestDetail-DDzGbK-Q.js} +0 -0
  126. /package/frontend-dist/assets/{routerKeys-CB2l_V7w.js → routerKeys-BKIv9voD.js} +0 -0
  127. /package/frontend-dist/assets/{routerKeys-p_ioAckE.js → routerKeys-DHqew7e3.js} +0 -0
  128. /package/frontend-dist/assets/{schedules-Cz_-Wfa_.js → schedules-BdCs4P0W.js} +0 -0
  129. /package/frontend-dist/assets/{schedules-DTgk603B.js → schedules-Chof0Byr.js} +0 -0
  130. /package/frontend-dist/assets/{settings-B5Mq1HN8.js → settings-BAfdizNX.js} +0 -0
  131. /package/frontend-dist/assets/{settings-j3dzVXzy.js → settings-DheKiB0E.js} +0 -0
  132. /package/frontend-dist/assets/{setup-Dryg-9wL.js → setup-AGblmz9n.js} +0 -0
  133. /package/frontend-dist/assets/{setup-DaeEG9ll.js → setup-CghpqjMU.js} +0 -0
  134. /package/frontend-dist/assets/{sidebar-BQWT-QZb.js → sidebar-BMFaYdll.js} +0 -0
  135. /package/frontend-dist/assets/{sidebar-DYwEKca3.js → sidebar-CpYOxTtl.js} +0 -0
@@ -8,5 +8,6 @@
8
8
  { "name": "ZAI SSE 错误 (HTTP 200, code 500)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"500\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
9
9
  { "name": "ZAI SSE 错误 (HTTP 200, code 1234)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1234\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
10
10
  { "name": "ZAI 模型过载 (HTTP 200, code 1305)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1305\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
11
- { "name": "KIMI 401 认证错误", "status_code": 401, "body_pattern": ".*authentication_error.*", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 3, "max_delay_ms": 60000, "providers": ["月之暗面"] }
11
+ { "name": "KIMI 401 认证错误", "status_code": 401, "body_pattern": ".*authentication_error.*", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 3, "max_delay_ms": 60000, "providers": ["月之暗面"] },
12
+ { "name": "DeepSeek 并发限流 (429)", "status_code": 429, "body_pattern": "Too many requests.*concurrency", "retry_strategy": "exponential", "retry_delay_ms": 2000, "max_retries": 5, "max_delay_ms": 120000, "providers": ["DeepSeek", "OpenCode"] }
12
13
  ]
@@ -16,15 +16,12 @@ export const adminMonitorRoutes = (app, options, done) => {
16
16
  app.get("/admin/api/monitor/stream", (request, reply) => {
17
17
  // hijack() 让 Fastify 完全放弃响应管理,避免 onSend hook 向 SSE 流注入信封 JSON
18
18
  reply.hijack();
19
- const sseClient = adaptSSEClient(reply.raw);
20
- tracker.addClient(sseClient);
21
- // 在 writeHead 之前注册 close 处理器,避免竞态导致 tracker 泄漏
22
- reply.raw.on("close", () => {
23
- tracker.removeClient(sseClient);
24
- });
25
19
  // 客户端在 hijack 之前已断连,无需发送响应头
26
20
  if (reply.raw.destroyed)
27
21
  return;
22
+ // writeHead 必须在 addClient 之前调用,否则 sendInitialSnapshot 的 write()
23
+ // 会触发 Node.js 隐式 header 发送(Content-Type 默认非 text/event-stream),
24
+ // 导致浏览器 EventSource 解析失败、不断重连。
28
25
  try {
29
26
  reply.raw.writeHead(HTTP_OK, {
30
27
  "Content-Type": "text/event-stream",
@@ -34,7 +31,13 @@ export const adminMonitorRoutes = (app, options, done) => {
34
31
  }
35
32
  catch {
36
33
  request.log.debug("client disconnected before writeHead");
34
+ return;
37
35
  }
36
+ const sseClient = adaptSSEClient(reply.raw);
37
+ tracker.addClient(sseClient);
38
+ reply.raw.on("close", () => {
39
+ tracker.removeClient(sseClient);
40
+ });
38
41
  });
39
42
  app.get("/admin/api/monitor/request/:id", async (request, reply) => {
40
43
  const { id } = request.params;
@@ -6,6 +6,10 @@ const UpdateProxyEnhancementSchema = Type.Object({
6
6
  stream_loop_enabled: Type.Boolean(),
7
7
  tool_round_limit_enabled: Type.Boolean(),
8
8
  tool_error_logging_enabled: Type.Boolean(),
9
+ ai_retry_config: Type.Optional(Type.Union([
10
+ Type.Null(),
11
+ Type.Object({ provider_id: Type.String({ minLength: 1 }), model: Type.String({ minLength: 1 }) }),
12
+ ])),
9
13
  });
10
14
  export const adminProxyEnhancementRoutes = (app, options, done) => {
11
15
  const { db } = options;
@@ -25,18 +29,34 @@ export const adminProxyEnhancementRoutes = (app, options, done) => {
25
29
  }
26
30
  catch { /* eslint-disable-line taste/no-silent-catch -- invalid JSON, return defaults */ }
27
31
  }
28
- return reply.send(config);
32
+ const aiConfigRaw = getSetting(db, "ai_retry_config");
33
+ let aiRetryConfig = null;
34
+ if (aiConfigRaw) {
35
+ try {
36
+ aiRetryConfig = JSON.parse(aiConfigRaw);
37
+ }
38
+ catch (e) {
39
+ console.error('proxyEnhancement.parseAiConfig:', e);
40
+ aiRetryConfig = null; // 损坏的 JSON 回退为 null
41
+ }
42
+ }
43
+ return reply.send({ ...config, ai_retry_config: aiRetryConfig });
29
44
  });
30
45
  app.put("/admin/api/proxy-enhancement", { schema: { body: UpdateProxyEnhancementSchema } }, async (request, reply) => {
31
46
  const body = request.body;
47
+ const { ai_retry_config, ...enhancementFields } = body;
32
48
  const config = {
33
- tool_call_loop_enabled: body.tool_call_loop_enabled,
34
- stream_loop_enabled: body.stream_loop_enabled,
35
- tool_round_limit_enabled: body.tool_round_limit_enabled,
36
- tool_error_logging_enabled: body.tool_error_logging_enabled,
49
+ tool_call_loop_enabled: enhancementFields.tool_call_loop_enabled,
50
+ stream_loop_enabled: enhancementFields.stream_loop_enabled,
51
+ tool_round_limit_enabled: enhancementFields.tool_round_limit_enabled,
52
+ tool_error_logging_enabled: enhancementFields.tool_error_logging_enabled,
37
53
  };
38
54
  setSetting(db, "proxy_enhancement", JSON.stringify(config));
39
55
  clearEnhancementConfigCache();
56
+ // ai_retry_config is stored in a separate settings key
57
+ if (ai_retry_config !== undefined) {
58
+ setSetting(db, "ai_retry_config", ai_retry_config ? JSON.stringify(ai_retry_config) : "");
59
+ }
40
60
  return reply.send({ success: true });
41
61
  });
42
62
  done();
@@ -1,7 +1,18 @@
1
+ import { readFileSync } from "node:fs";
2
+ import { join, dirname } from "node:path";
3
+ import { fileURLToPath } from "node:url";
1
4
  import { Type } from "@sinclair/typebox";
2
5
  import { getAllRetryRules, getRetryRuleById, createRetryRule, updateRetryRule, deleteRetryRule, } from "../db/index.js";
3
- import { HTTP_BAD_REQUEST, HTTP_CREATED, HTTP_NOT_FOUND } from "./constants.js";
6
+ import { callLLM } from "../utils/llm-client.js";
7
+ import { getActiveRetryRules } from "../db/retry-rules.js";
8
+ import { getRequestLogById } from "../db/logs.js";
9
+ import { getProviderById } from "../db/providers.js";
10
+ import { getSetting } from "../db/settings.js";
11
+ import { decrypt } from "../utils/crypto.js";
12
+ import { HTTP_OK, HTTP_BAD_REQUEST, HTTP_CREATED, HTTP_NOT_FOUND } from "./constants.js";
4
13
  import { API_CODE, apiError } from "./api-response.js";
14
+ // 加载 AI 重试规则的 system prompt 模板(独立文件,避免模板字面量转义问题)
15
+ const AI_RETRY_PROMPT_TEMPLATE = readFileSync(join(dirname(fileURLToPath(import.meta.url)), "ai-retry-prompt.md"), "utf-8");
5
16
  const DEFAULT_RETRY_DELAY_MS = 5000;
6
17
  const DEFAULT_MAX_RETRIES = 10;
7
18
  const DEFAULT_MAX_DELAY_MS = 60000;
@@ -34,6 +45,121 @@ function validateBodyPattern(pattern) {
34
45
  return "Invalid body_pattern regex";
35
46
  }
36
47
  }
48
+ // ---------- AI Retry Rule Generation Helpers ----------
49
+ const MAX_RESPONSE_CHARS = 4000;
50
+ const STATUS_CODE_MIN = 100;
51
+ const STATUS_CODE_MAX = 599;
52
+ const MAX_RETRIES_UPPER = 100;
53
+ /** 从日志中提取响应文本,优先 upstream_response,回退 stream_text_content */
54
+ function extractResponseText(log) {
55
+ const raw = log.upstream_response || log.stream_text_content || "";
56
+ if (raw.length <= MAX_RESPONSE_CHARS)
57
+ return raw;
58
+ const TRUNCATION_SUFFIX = "\n...(truncated)";
59
+ const truncated = raw.substring(0, MAX_RESPONSE_CHARS - TRUNCATION_SUFFIX.length);
60
+ // 在 JSON 边界处截断,避免破坏键值对导致 AI 生成无效正则
61
+ const lastBrace = truncated.lastIndexOf("}");
62
+ const lastBracket = truncated.lastIndexOf("]");
63
+ const cutPoint = Math.max(lastBrace, lastBracket);
64
+ const MIN_RATIO_FOR_BOUNDARY_CUT = 0.5;
65
+ return cutPoint > truncated.length * MIN_RATIO_FOR_BOUNDARY_CUT ? truncated.substring(0, cutPoint + 1) + TRUNCATION_SUFFIX : truncated + TRUNCATION_SUFFIX;
66
+ }
67
+ /** 检查文本是否包含错误特征关键词(case-insensitive) */
68
+ function hasErrorFeatures(text) {
69
+ if (!text)
70
+ return false;
71
+ const lower = text.toLowerCase();
72
+ return lower.includes("error");
73
+ }
74
+ /** 解析 AI 返回的 JSON,支持 ```json 代码块包裹 */
75
+ function parseAIContent(content) {
76
+ const codeBlockMatch = content.match(/```json\s*([\s\S]*?)```/);
77
+ const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : content.trim();
78
+ try {
79
+ return JSON.parse(jsonStr);
80
+ }
81
+ catch {
82
+ return null;
83
+ }
84
+ }
85
+ /** 从 AI 返回的 error 字段提取可读错误信息(兼容 string 和 object 两种格式) */
86
+ function extractErrorMessage(error) {
87
+ if (typeof error === "string")
88
+ return error;
89
+ const obj = error;
90
+ const msg = obj.message;
91
+ return typeof msg === "string" ? msg : JSON.stringify(error);
92
+ }
93
+ /** 校验 AI 生成的规则字段,返回错误描述或 null */
94
+ function validateAIRule(parsed) {
95
+ if (typeof parsed.summary !== "string" || parsed.summary.trim() === "") {
96
+ return "summary is required";
97
+ }
98
+ if (typeof parsed.name !== "string" || parsed.name.trim() === "") {
99
+ return "name is required";
100
+ }
101
+ if (typeof parsed.status_code !== "number" || !Number.isInteger(parsed.status_code) || parsed.status_code < STATUS_CODE_MIN || parsed.status_code > STATUS_CODE_MAX) {
102
+ return "status_code must be 100-599";
103
+ }
104
+ if (typeof parsed.body_pattern !== "string") {
105
+ return "body_pattern is required";
106
+ }
107
+ try {
108
+ new RegExp(parsed.body_pattern);
109
+ }
110
+ catch {
111
+ return "body_pattern is not a valid regex";
112
+ }
113
+ // ReDoS 防护:限制正则长度 + 检测已知危险模式
114
+ const MAX_PATTERN_LENGTH = 500;
115
+ if (parsed.body_pattern.length > MAX_PATTERN_LENGTH) {
116
+ return `Rule validation failed: body_pattern too long (max ${MAX_PATTERN_LENGTH} chars)`;
117
+ }
118
+ const DANGEROUS_REGEX_PATTERNS = [
119
+ /\([^)]*\+[^)]*\+/, // 嵌套量词如 (a+b+)+
120
+ /\([^)]*[*+][^)]*\)\s*[*+]/, // 重复分组 + 量词
121
+ /\(\.\*[^)]*\)\s*[*+]/, // (.*)+ 类型
122
+ ];
123
+ for (const dangerous of DANGEROUS_REGEX_PATTERNS) {
124
+ if (dangerous.test(parsed.body_pattern)) {
125
+ return "Rule validation failed: body_pattern contains potentially catastrophic regex";
126
+ }
127
+ }
128
+ if (parsed.retry_strategy !== "fixed" && parsed.retry_strategy !== "exponential") {
129
+ return "retry_strategy must be 'fixed' or 'exponential'";
130
+ }
131
+ if (typeof parsed.retry_delay_ms !== "number" || !Number.isInteger(parsed.retry_delay_ms) || parsed.retry_delay_ms <= 0) {
132
+ return "retry_delay_ms must be a positive integer";
133
+ }
134
+ if (typeof parsed.max_retries !== "number" || !Number.isInteger(parsed.max_retries) || parsed.max_retries < 0 || parsed.max_retries > MAX_RETRIES_UPPER) {
135
+ return "max_retries must be 0-100";
136
+ }
137
+ if (typeof parsed.max_delay_ms !== "number" || !Number.isInteger(parsed.max_delay_ms) || parsed.max_delay_ms <= 0) {
138
+ return "max_delay_ms must be a positive integer";
139
+ }
140
+ return null;
141
+ }
142
+ const MAX_PROMPT_RULES = 20;
143
+ /** 构造 system prompt,基于外部模板文件 + 现有规则列表 */
144
+ function buildSystemPrompt(existingRules) {
145
+ const displayRules = existingRules.slice(0, MAX_PROMPT_RULES);
146
+ const rulesList = displayRules.length > 0
147
+ ? displayRules.map((r) => `- ${r.name}: status=${r.status_code}, pattern=${r.body_pattern}`).join("\n")
148
+ : "(none)";
149
+ const truncateHint = existingRules.length > MAX_PROMPT_RULES ? `\n... and ${existingRules.length - MAX_PROMPT_RULES} more rules` : "";
150
+ return `${AI_RETRY_PROMPT_TEMPLATE}\n\n${rulesList}${truncateHint}\n\nNote: The Response Body may be truncated. Generate body_pattern based only on the complete key-value pairs you can see.`;
151
+ }
152
+ /** 构造 user prompt,使用 provider_name 而非 provider_id */
153
+ function buildUserPrompt(log, responseText) {
154
+ const providerDisplayName = log.provider_name || log.provider_id || "unknown";
155
+ return `Provider: ${providerDisplayName}
156
+ Model: ${log.model ?? "unknown"}
157
+ Status Code: ${log.status_code ?? "N/A"}
158
+ Error Message: ${log.error_message ?? "N/A"}
159
+
160
+ Response Body:
161
+ ${responseText}`;
162
+ }
37
163
  export const adminRetryRuleRoutes = (app, options, done) => {
38
164
  const { db, stateRegistry } = options;
39
165
  app.get("/admin/api/retry-rules", async (_request, reply) => {
@@ -97,5 +223,122 @@ export const adminRetryRuleRoutes = (app, options, done) => {
97
223
  stateRegistry?.refreshRetryRules();
98
224
  return reply.send({ success: true });
99
225
  });
226
+ const AiGenerateBodySchema = Type.Object({
227
+ log_id: Type.String({ minLength: 1 }),
228
+ });
229
+ // AI generate retry rule endpoint
230
+ app.post("/admin/api/retry-rules/ai-generate", { schema: { body: AiGenerateBodySchema } }, async (request, reply) => {
231
+ const { log_id } = request.body;
232
+ // All responses let onSend hook wrap in { code, message, data } envelope
233
+ // Frontend request<T>() auto-unwraps body.data
234
+ // 1. Check AI config
235
+ const aiConfigRaw = getSetting(db, "ai_retry_config");
236
+ if (!aiConfigRaw) {
237
+ return reply.send({ success: false, error: "AI retry config not set" });
238
+ }
239
+ let aiConfig;
240
+ try {
241
+ aiConfig = JSON.parse(aiConfigRaw);
242
+ }
243
+ catch {
244
+ return reply.send({ success: false, error: "AI config is invalid JSON" });
245
+ }
246
+ if (!aiConfig.provider_id || !aiConfig.model) {
247
+ return reply.send({ success: false, error: "AI config is incomplete" });
248
+ }
249
+ // 2. Look up the log
250
+ const log = getRequestLogById(db, log_id);
251
+ if (!log) {
252
+ return reply.send({ success: false, error: "Log not found" });
253
+ }
254
+ // 3. Extract response text
255
+ const responseText = extractResponseText(log);
256
+ // 4. Pre-check: reject 2xx responses without error features
257
+ const HTTP_MULTIPLE_CHOICES = 300;
258
+ const is2xx = log.status_code !== null && log.status_code >= HTTP_OK && log.status_code < HTTP_MULTIPLE_CHOICES;
259
+ if (is2xx && !log.error_message && !hasErrorFeatures(responseText)) {
260
+ return reply.send({ success: false, error: "Cannot generate retry rule for a successful response" });
261
+ }
262
+ // 5. Get the configured AI provider
263
+ const provider = getProviderById(db, aiConfig.provider_id);
264
+ if (!provider) {
265
+ return reply.send({ success: false, error: "AI provider not found" });
266
+ }
267
+ // 6. Decrypt API key
268
+ const encryptionKey = getSetting(db, "encryption_key");
269
+ if (!encryptionKey) {
270
+ return reply.send({ success: false, error: "Encryption key not set" });
271
+ }
272
+ let apiKey;
273
+ try {
274
+ apiKey = decrypt(provider.api_key, encryptionKey);
275
+ }
276
+ catch {
277
+ return reply.send({ success: false, error: "Failed to decrypt API key" });
278
+ }
279
+ // 7. Build prompts
280
+ const existingRules = getActiveRetryRules(db);
281
+ const systemPrompt = buildSystemPrompt(existingRules);
282
+ const userPrompt = buildUserPrompt(log, responseText);
283
+ // 8. Call LLM
284
+ let llmResult;
285
+ try {
286
+ llmResult = await callLLM({
287
+ baseUrl: provider.base_url,
288
+ upstreamPath: provider.upstream_path,
289
+ apiKey,
290
+ model: aiConfig.model,
291
+ messages: [
292
+ { role: "system", content: systemPrompt },
293
+ { role: "user", content: userPrompt },
294
+ ],
295
+ maxTokens: 2048,
296
+ timeoutMs: 30_000,
297
+ });
298
+ }
299
+ catch (e) {
300
+ const msg = e instanceof Error ? e.message : "Unknown error";
301
+ if (!(e instanceof Error)) {
302
+ request.log.error({ err: e }, "LLM call failed with non-Error");
303
+ }
304
+ return reply.send({ success: false, error: `LLM call failed: ${msg}` });
305
+ }
306
+ // 9. Parse AI response
307
+ const parsed = parseAIContent(llmResult.content);
308
+ if (!parsed) {
309
+ // Check if the raw content is an error/refusal message
310
+ const lowerContent = llmResult.content.toLowerCase().trim();
311
+ if (lowerContent.startsWith("error") || lowerContent.includes("unable to")) {
312
+ return reply.send({ success: false, error: "AI returned an error exit" });
313
+ }
314
+ return reply.send({ success: false, error: "Failed to parse AI response as JSON" });
315
+ }
316
+ // 10. AI exit check — parsed object has an error field
317
+ if (parsed.error != null) {
318
+ const errorMsg = typeof parsed.error === "string"
319
+ ? parsed.error
320
+ : extractErrorMessage(parsed.error);
321
+ return reply.send({ success: false, error: errorMsg });
322
+ }
323
+ // 11. Validate fields
324
+ const validationError = validateAIRule(parsed);
325
+ if (validationError) {
326
+ return reply.send({ success: false, error: `Rule validation failed: ${validationError}` });
327
+ }
328
+ // 12. Return success
329
+ return reply.send({
330
+ success: true,
331
+ rule: {
332
+ name: parsed.name,
333
+ status_code: parsed.status_code,
334
+ body_pattern: parsed.body_pattern,
335
+ retry_strategy: parsed.retry_strategy,
336
+ retry_delay_ms: parsed.retry_delay_ms,
337
+ max_retries: parsed.max_retries,
338
+ max_delay_ms: parsed.max_delay_ms,
339
+ },
340
+ summary: parsed.summary,
341
+ });
342
+ });
100
343
  done();
101
344
  };
@@ -144,8 +144,17 @@ let directoryContextWindows = {};
144
144
  */
145
145
  export function loadModelDirectory(configDir) {
146
146
  try {
147
- // 默认相对于当前文件所在目录(dist/config/ 或 src/config/),而非 process.cwd()
148
- const dir = configDir ?? path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..", "config");
147
+ // 优先使用传入的 configDir,否则自动检测:
148
+ // - 生产 (dist/config/model-context.js): 上溯一级到 dist/ → dist/config/ (postbuild 已复制)
149
+ // - 开发 (src/config/model-context.ts): 上溯二级到包根 → config/
150
+ let dir = configDir;
151
+ if (!dir) {
152
+ const fileDir = path.dirname(fileURLToPath(import.meta.url));
153
+ const prodDir = path.resolve(fileDir, "..", "config");
154
+ dir = fs.existsSync(path.join(prodDir, "model-directory.json"))
155
+ ? prodDir
156
+ : path.resolve(fileDir, "..", "..", "config");
157
+ }
149
158
  const filePath = path.join(dir, "model-directory.json");
150
159
  const raw = fs.readFileSync(filePath, "utf-8");
151
160
  const data = JSON.parse(raw);
@@ -155,11 +164,11 @@ export function loadModelDirectory(configDir) {
155
164
  if (data.context_windows && typeof data.context_windows === "object") {
156
165
  directoryContextWindows = data.context_windows;
157
166
  }
158
- // eslint-disable-next-line taste/no-silent-catch -- 加载失败不影响启动,使用硬编码白名单兆底。但记录到 stderr 供诊断
159
167
  }
160
168
  catch (err) {
161
- // 加载失败不影响启动,使用硬编码白名单兆底。但记录到 stderr 供诊断
162
- console.error('loadModelDirectory: failed to load, using hardcoded fallback', err);
169
+ const msg = err instanceof Error ? err.message : typeof err === 'string' ? err : JSON.stringify(err);
170
+ console.warn(`loadModelDirectory: failed to load (${msg}), using hardcoded fallback`);
171
+ console.debug(err);
163
172
  }
164
173
  }
165
174
  /** 查询模型 capabilities:显式配置 > model-directory.json > 硬编码白名单 > ["text"] */
@@ -8,5 +8,6 @@
8
8
  { "name": "ZAI SSE 错误 (HTTP 200, code 500)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"500\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
9
9
  { "name": "ZAI SSE 错误 (HTTP 200, code 1234)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1234\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
10
10
  { "name": "ZAI 模型过载 (HTTP 200, code 1305)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1305\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000, "providers": ["智谱"] },
11
- { "name": "KIMI 401 认证错误", "status_code": 401, "body_pattern": ".*authentication_error.*", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 3, "max_delay_ms": 60000, "providers": ["月之暗面"] }
11
+ { "name": "KIMI 401 认证错误", "status_code": 401, "body_pattern": ".*authentication_error.*", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 3, "max_delay_ms": 60000, "providers": ["月之暗面"] },
12
+ { "name": "DeepSeek 并发限流 (429)", "status_code": 429, "body_pattern": "Too many requests.*concurrency", "retry_strategy": "exponential", "retry_delay_ms": 2000, "max_retries": 5, "max_delay_ms": 120000, "providers": ["DeepSeek", "OpenCode"] }
12
13
  ]
@@ -1,4 +1,8 @@
1
1
  const SSE_DATA_PREFIX = "data: ";
2
+ // OpenAI stream block index 分配:reasoning/text/tools 使用不同区间避免混合
3
+ const OPENAI_BLOCK_REASONING = 0;
4
+ const OPENAI_BLOCK_TEXT = 1;
5
+ const OPENAI_BLOCK_TOOLS = 2;
2
6
  export function extractStreamText(line, apiType) {
3
7
  const empty = { text: "", block: null };
4
8
  if (!line.startsWith(SSE_DATA_PREFIX))
@@ -16,8 +20,32 @@ export function extractStreamText(line, apiType) {
16
20
  if (apiType === "openai") {
17
21
  const choices = obj.choices;
18
22
  const delta = choices?.[0]?.delta;
19
- const text = delta?.content ?? delta?.reasoning_content ?? "";
20
- return { text, block: text ? { index: 0, type: "text", content: text } : null };
23
+ const text = delta?.content ?? "";
24
+ const reasoning = delta?.reasoning_content ?? "";
25
+ // OpenAI 不像 Anthropic 那样为不同 content type 分配独立 index。
26
+ // 策略:reasoning → OPENAI_BLOCK_REASONING, text → OPENAI_BLOCK_TEXT,
27
+ // tool_calls[N] → OPENAI_BLOCK_TOOLS + N。
28
+ // 这样不同类型的内容不会混在同一个 block 中。
29
+ if (reasoning) {
30
+ return { text: reasoning, block: { index: OPENAI_BLOCK_REASONING, type: "thinking", content: reasoning } };
31
+ }
32
+ if (text) {
33
+ return { text, block: { index: OPENAI_BLOCK_TEXT, type: "text", content: text } };
34
+ }
35
+ const toolCalls = delta?.tool_calls;
36
+ if (toolCalls) {
37
+ const tc = toolCalls[0];
38
+ if (tc) {
39
+ const tcIndex = tc.index ?? 0;
40
+ const fn = tc.function;
41
+ const args = fn?.arguments ?? "";
42
+ const name = fn?.name ?? "";
43
+ if (args || name) {
44
+ return { text: "", block: { index: OPENAI_BLOCK_TOOLS + tcIndex, type: "tool_use", content: args, name: name || undefined } };
45
+ }
46
+ }
47
+ }
48
+ return empty;
21
49
  }
22
50
  if (apiType === "openai-responses") {
23
51
  // Responses SSE uses named events, but line format is "data: {json}" (same as Anthropic)
package/dist/index.js CHANGED
@@ -322,7 +322,7 @@ export async function buildApp(options) {
322
322
  });
323
323
  }
324
324
  else {
325
- app.log.warn(`Frontend dist not found at ${frontendDist}, skipping static serving`);
325
+ app.log.debug(`Frontend dist not found at ${frontendDist}, skipping static serving`);
326
326
  }
327
327
  app.get("/health", async () => {
328
328
  return { status: "ok" };
@@ -19,12 +19,7 @@ import { SERVICE_KEYS } from "../../core/container.js";
19
19
  import { createPipelineContext } from "../pipeline/context.js";
20
20
  import { proxyPipeline } from "../pipeline/pipeline.js";
21
21
  import { executeFailoverLoop } from "./failover-loop.js";
22
- import { loadEnhancementConfig } from "../routing/enhancement-config.js";
23
- import { ToolLoopGuard } from "../../core/loop-prevention/index.js";
24
- import { HTTP_UNPROCESSABLE_ENTITY } from "../../core/constants.js";
25
22
  import { PipelineAbort } from "../pipeline/types.js";
26
- import { applyToolRoundLimit } from "../patch/tool-round-limiter.js";
27
- import { extractLastToolUse } from "./proxy-handler-utils.js";
28
23
  // ---------- Models handler (shared across openai/anthropic) ----------
29
24
  const ANTHROPIC_DEFAULT_PAGE_SIZE = 20;
30
25
  const ANTHROPIC_MAX_PAGE_SIZE = 1000;
@@ -98,60 +93,6 @@ function handleModelsRequest(db) {
98
93
  });
99
94
  };
100
95
  }
101
- // ---------- Enhancement preprocessing (extracted from old handleProxyRequest) ----------
102
- const TIER2_LOOP_THRESHOLD = 2;
103
- function applyEnhancementPreprocess(request, reply, ctx, db, container) {
104
- const enhancementConfig = loadEnhancementConfig(db);
105
- const apiType = ctx.apiType;
106
- const sessionId = ctx.metadata.get("session_id");
107
- // 工具轮数限制
108
- if (enhancementConfig.tool_round_limit_enabled) {
109
- const roundResult = applyToolRoundLimit(ctx.body, apiType);
110
- if (roundResult.injected) {
111
- ctx.body = roundResult.body;
112
- ctx.snapshot.add({ stage: "tool_round_limit", action: "inject_warning", rounds: roundResult.rounds });
113
- request.log.info({ sessionId, rounds: roundResult.rounds }, "Tool round limit reached, injecting warning prompt");
114
- }
115
- }
116
- // 工具循环检测
117
- if (!enhancementConfig.tool_call_loop_enabled || !sessionId)
118
- return;
119
- const sessionTracker = container.resolve(SERVICE_KEYS.sessionTracker);
120
- if (!sessionTracker)
121
- return;
122
- const routerKeyId = request.routerKey?.id ?? null;
123
- const sessionKey = routerKeyId ? `${routerKeyId}:${sessionId}` : sessionId;
124
- const lastToolUse = extractLastToolUse(ctx.body);
125
- if (!lastToolUse)
126
- return;
127
- const toolGuard = new ToolLoopGuard(sessionTracker, {
128
- enabled: true,
129
- minConsecutiveCount: 3,
130
- detectorConfig: { n: 6, windowSize: 500, repeatThreshold: 5 },
131
- });
132
- const checkResult = toolGuard.check(sessionKey, lastToolUse);
133
- if (!checkResult.detected)
134
- return;
135
- const loopCount = sessionTracker.getLoopCount(sessionKey);
136
- if (loopCount === 1) {
137
- ctx.body = toolGuard.injectLoopBreakPrompt(ctx.body, apiType, lastToolUse.toolName);
138
- ctx.snapshot.add({ stage: "tool_guard", action: "inject_break_prompt", tool: lastToolUse.toolName });
139
- request.log.warn({ sessionId, toolName: lastToolUse.toolName, loopCount }, "Tool call loop detected, injecting break prompt");
140
- }
141
- else if (loopCount === TIER2_LOOP_THRESHOLD) {
142
- throw new PipelineAbort(HTTP_UNPROCESSABLE_ENTITY, {
143
- error: {
144
- type: "tool_call_loop_detected",
145
- message: `检测到工具调用循环(连续重复调用 "${lastToolUse.toolName}")。请求已中断。`,
146
- suggestion: "请回顾对话历史,停止重复调用工具,直接告知用户当前的进展和遇到的问题。",
147
- },
148
- });
149
- }
150
- else {
151
- request.log.warn({ sessionId, toolName: lastToolUse.toolName, loopCount }, "Tool call loop detected, hard disconnecting");
152
- throw new PipelineAbort(HTTP_CLIENT_CLOSED, { _disconnect: true });
153
- }
154
- }
155
96
  // ---------- Factory ----------
156
97
  export function createProxyHandler(config) {
157
98
  const { apiType, paths } = config;
@@ -214,13 +155,10 @@ export function createProxyHandler(config) {
214
155
  const ctx = createPipelineContext(request, reply, apiType);
215
156
  // 注入 DB 到 metadata(hooks 需要访问 settings/写入数据)
216
157
  ctx.metadata.set("db", db);
158
+ ctx.metadata.set("container", container);
217
159
  // 执行 pre_route 阶段 hooks(client-detection 在此阶段设置 client_type / session_id)
218
- await proxyPipeline.emit("pre_route", ctx).catch(err => {
219
- request.log.error({ err }, "pre_route hook failed");
220
- });
221
- // 增强预处理(工具轮数限制 + 工具循环检测)
222
160
  try {
223
- applyEnhancementPreprocess(request, reply, ctx, db, container);
161
+ await proxyPipeline.emit("pre_route", ctx);
224
162
  }
225
163
  catch (e) {
226
164
  if (e instanceof PipelineAbort) {
@@ -230,6 +168,7 @@ export function createProxyHandler(config) {
230
168
  }
231
169
  return reply.code(e.statusCode).send(e.body);
232
170
  }
171
+ request.log.error({ err: e }, "pre_route hook failed");
233
172
  throw e;
234
173
  }
235
174
  const deps = {
@@ -172,7 +172,7 @@ export class ResilienceLayer {
172
172
  transportResult = await fn(currentTarget);
173
173
  }
174
174
  catch (err) {
175
- const errMsg = err instanceof Error ? err.message : err instanceof Error ? err.message : JSON.stringify(err);
175
+ const errMsg = err instanceof Error ? err.message : JSON.stringify(err);
176
176
  transportResult = { kind: "throw", error: err instanceof Error ? err : new Error(errMsg) };
177
177
  }
178
178
  lastResult = transportResult;
@@ -18,6 +18,35 @@ export function sanitizeHeadersForLog(headers) {
18
18
  }
19
19
  return sanitized;
20
20
  }
21
+ /** 从上游响应 body 中提取错误信息,用于 error_message 为空但上游返回了非 200 的场景 */
22
+ function extractErrorMessageFromResponse(responseBody) {
23
+ if (!responseBody)
24
+ return null;
25
+ const MAX_TEXT_LENGTH = 200;
26
+ try {
27
+ const parsed = JSON.parse(responseBody);
28
+ // OpenAI / DeepSeek 格式: { error: { message: "..." } }
29
+ const openaiMsg = parsed?.error?.message;
30
+ if (typeof openaiMsg === "string")
31
+ return openaiMsg;
32
+ // Cloudflare 格式: { title: "...", detail: "..." }
33
+ if (typeof parsed?.title === "string") {
34
+ const detail = parsed?.detail;
35
+ return typeof detail === "string" ? `${parsed.title}: ${detail}` : parsed.title;
36
+ }
37
+ // 兜底:直接 message 字段
38
+ if (typeof parsed?.message === "string")
39
+ return parsed.message;
40
+ }
41
+ catch {
42
+ // 非 JSON(如 HTML),截取前 200 字符
43
+ const text = responseBody.trim();
44
+ if (text.length > MAX_TEXT_LENGTH)
45
+ return text.slice(0, MAX_TEXT_LENGTH) + "...";
46
+ return text || null;
47
+ }
48
+ return null;
49
+ }
21
50
  // ---------- Logging helpers (extracted from proxy-core) ----------
22
51
  // ---------- New-architecture logging ----------
23
52
  export function logResilienceResult(db, params, attempts, result, startTime) {
@@ -77,7 +106,7 @@ export function logResilienceResult(db, params, attempts, result, startTime) {
77
106
  id: attemptLogId, api_type: params.apiType, model: params.model,
78
107
  provider_id: attempt.target.provider_id,
79
108
  status_code: attempt.statusCode, latency_ms: attempt.latencyMs,
80
- is_stream: params.isStream ? 1 : 0, error_message: null,
109
+ is_stream: params.isStream ? 1 : 0, error_message: extractErrorMessageFromResponse(attempt.responseBody),
81
110
  created_at: new Date().toISOString(),
82
111
  client_request: params.clientReq, upstream_request: params.upstreamReqBase,
83
112
  upstream_response: JSON.stringify({ statusCode: attempt.statusCode, headers: attempt.responseHeaders, body: attempt.responseBody }),