llm-simple-router 1.1.2 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/dist/admin/providers.js +6 -4
  2. package/dist/admin/quick-setup.js +3 -1
  3. package/dist/app/register-routes.js +2 -0
  4. package/dist/config/model-context.d.ts +2 -0
  5. package/dist/config/model-context.js +4 -0
  6. package/dist/core/concurrency/semaphore.d.ts +15 -2
  7. package/dist/core/concurrency/semaphore.js +36 -4
  8. package/dist/core/constants.d.ts +1 -0
  9. package/dist/core/constants.js +3 -0
  10. package/dist/core/monitor/request-tracker.d.ts +6 -0
  11. package/dist/core/monitor/request-tracker.js +15 -0
  12. package/dist/core/types.d.ts +1 -1
  13. package/dist/db/providers.d.ts +16 -3
  14. package/dist/db/providers.js +27 -12
  15. package/dist/index.js +2 -0
  16. package/dist/proxy/handler/iteration-setup.js +7 -3
  17. package/dist/proxy/orchestration/orchestrator.d.ts +1 -1
  18. package/dist/proxy/orchestration/orchestrator.js +37 -12
  19. package/dist/proxy/orchestration/resilience.d.ts +3 -1
  20. package/dist/proxy/orchestration/resilience.js +17 -2
  21. package/dist/proxy/orchestration/scope.d.ts +1 -1
  22. package/dist/proxy/orchestration/scope.js +2 -2
  23. package/dist/proxy/proxy-core.d.ts +0 -11
  24. package/dist/proxy/proxy-core.js +2 -63
  25. package/dist/proxy/transport/http.d.ts +11 -13
  26. package/dist/proxy/transport/http.js +40 -30
  27. package/dist/proxy/transport/shared.d.ts +23 -0
  28. package/dist/proxy/transport/shared.js +58 -0
  29. package/dist/proxy/transport/stream.d.ts +56 -3
  30. package/dist/proxy/transport/stream.js +128 -49
  31. package/dist/proxy/transport/transport-fn.d.ts +2 -1
  32. package/dist/proxy/transport/transport-fn.js +3 -3
  33. package/frontend-dist/assets/AuthLayout-CmG_8Ovs.js +1 -0
  34. package/frontend-dist/assets/Card-BlGAfXME.js +1 -0
  35. package/frontend-dist/assets/CardContent-DTKSvHkJ.js +1 -0
  36. package/frontend-dist/assets/CardTitle-Cv1xB3mR.js +1 -0
  37. package/frontend-dist/assets/CascadingModelSelect-ul9uB6Dy.js +1 -0
  38. package/frontend-dist/assets/Checkbox-DxV7pVKU.js +1 -0
  39. package/frontend-dist/assets/CollapsibleContent-BgO8VoPR.js +1 -0
  40. package/frontend-dist/assets/CollapsibleTrigger-dG3CqCAV.js +1 -0
  41. package/frontend-dist/assets/ConcurrencyControl-BKR3IfV4.js +1 -0
  42. package/frontend-dist/assets/Dashboard-DCOF-zaF.js +3 -0
  43. package/frontend-dist/assets/{Input-DMDfXTXB.js → Input-BBtQpfsU.js} +1 -1
  44. package/frontend-dist/assets/Label-DARM_yCh.js +1 -0
  45. package/frontend-dist/assets/Login-qTUiO2Vc.js +1 -0
  46. package/frontend-dist/assets/Logs-BAywknrp.js +1 -0
  47. package/frontend-dist/assets/ModelMappings-DU06Tex1.js +1 -0
  48. package/frontend-dist/assets/Monitor-DR_u-5V1.js +1 -0
  49. package/frontend-dist/assets/Providers-DM5iF-Z5.js +1 -0
  50. package/frontend-dist/assets/ProxyEnhancement-CI-lDGff.js +1 -0
  51. package/frontend-dist/assets/QuickSetup-AWc9oZz4.js +1 -0
  52. package/frontend-dist/assets/RetryRules-DhA2ONMo.js +1 -0
  53. package/frontend-dist/assets/RouterKeys-CFILIP_P.js +1 -0
  54. package/frontend-dist/assets/{RovingFocusItem-5H5eE6G2.js → RovingFocusItem-mNZuQwzG.js} +1 -1
  55. package/frontend-dist/assets/Schedules-C8A9Dyry.js +1 -0
  56. package/frontend-dist/assets/Separator-CtbYW3SR.js +1 -0
  57. package/frontend-dist/assets/Settings-etFCYRt3.js +6 -0
  58. package/frontend-dist/assets/Setup-veKl98QO.js +1 -0
  59. package/frontend-dist/assets/Skeleton-aBg0O52j.js +1 -0
  60. package/frontend-dist/assets/Switch-K9syOZ4L.js +1 -0
  61. package/frontend-dist/assets/TableHeader-Dpn1Lnaz.js +1 -0
  62. package/frontend-dist/assets/TabsTrigger-C6L7-25Q.js +1 -0
  63. package/frontend-dist/assets/UnifiedRequestDialog-BQNd5d8M.js +3 -0
  64. package/frontend-dist/assets/{VisuallyHiddenInput-DrNFhnVL.js → VisuallyHiddenInput-CM0ZcPu6.js} +1 -1
  65. package/frontend-dist/assets/arrow-down-D7MkIKwy.js +1 -0
  66. package/frontend-dist/assets/badge-BVIIW0-Q.js +1 -0
  67. package/frontend-dist/assets/{button-BBiWml8B.js → button-CZXw3CE5.js} +2 -2
  68. package/frontend-dist/assets/chevron-right-XHFgIZAJ.js +1 -0
  69. package/frontend-dist/assets/dialog-C1UP6R9l.js +1 -0
  70. package/frontend-dist/assets/{image-zYdpUIEA.js → image-B1uUZwVK.js} +1 -1
  71. package/frontend-dist/assets/{index-DyQ39g4W.css → index-DGJSS9jI.css} +1 -1
  72. package/frontend-dist/assets/index-DU-d4dwG.js +58 -0
  73. package/frontend-dist/assets/model-patches-DdJLVJUH.js +1 -0
  74. package/frontend-dist/assets/{pencil-C3-MFg-d.js → pencil-HavpPvNF.js} +1 -1
  75. package/frontend-dist/assets/plus-BjrVWmRw.js +1 -0
  76. package/frontend-dist/assets/quickSetup-jgJgPUcH.js +1 -0
  77. package/frontend-dist/assets/quickSetup-qTjp3Z6J.js +1 -0
  78. package/frontend-dist/assets/search-q46OssNL.js +1 -0
  79. package/frontend-dist/assets/{sparkles-B5RWZZuf.js → sparkles-CCPKwVxK.js} +1 -1
  80. package/frontend-dist/assets/transform-domain-D0mVmoZd.js +1 -0
  81. package/frontend-dist/assets/{trash-2-Dn3T5-Z1.js → trash-2-CHuDHKxp.js} +1 -1
  82. package/frontend-dist/assets/{useClipboard-Bx3CrPal.js → useClipboard-C3_tZc-3.js} +1 -1
  83. package/frontend-dist/assets/useLogRetention-BiFJhaOm.js +1 -0
  84. package/frontend-dist/assets/{useProviderGroups-Og5FpCPe.js → useProviderGroups-CEb_RKrl.js} +1 -1
  85. package/frontend-dist/index.html +3 -3
  86. package/package.json +1 -1
  87. package/frontend-dist/assets/AuthLayout-jELzICkx.js +0 -1
  88. package/frontend-dist/assets/Card-uC_v0CEa.js +0 -1
  89. package/frontend-dist/assets/CardContent-CP3OiCj4.js +0 -1
  90. package/frontend-dist/assets/CardTitle-DGxuW5DZ.js +0 -1
  91. package/frontend-dist/assets/CascadingModelSelect-Dzk7rxIN.js +0 -1
  92. package/frontend-dist/assets/Checkbox-C1aVqGdC.js +0 -1
  93. package/frontend-dist/assets/CollapsibleContent-pBG4UkLo.js +0 -1
  94. package/frontend-dist/assets/CollapsibleTrigger-D5TkgXmz.js +0 -1
  95. package/frontend-dist/assets/ConcurrencyControl-5GweS-rY.js +0 -1
  96. package/frontend-dist/assets/Dashboard-wjd3d3qk.js +0 -3
  97. package/frontend-dist/assets/Label-BQXea0mo.js +0 -1
  98. package/frontend-dist/assets/Login-DNpCjxrY.js +0 -1
  99. package/frontend-dist/assets/Logs-BuL2Z0sF.js +0 -1
  100. package/frontend-dist/assets/ModelMappings-DeRhu-2N.js +0 -1
  101. package/frontend-dist/assets/Monitor-V30dnACo.js +0 -1
  102. package/frontend-dist/assets/Providers-BkkQhSTb.js +0 -1
  103. package/frontend-dist/assets/ProxyEnhancement-DjgebwfU.js +0 -1
  104. package/frontend-dist/assets/QuickSetup-BTVjEiU7.js +0 -1
  105. package/frontend-dist/assets/RetryRules-DFBHBG-B.js +0 -1
  106. package/frontend-dist/assets/RouterKeys-DZADvMfh.js +0 -1
  107. package/frontend-dist/assets/Schedules-DUubZ2uN.js +0 -1
  108. package/frontend-dist/assets/Separator-BqIs_Dy3.js +0 -1
  109. package/frontend-dist/assets/Settings-CO59WLOZ.js +0 -6
  110. package/frontend-dist/assets/Setup-Br7JZKNp.js +0 -1
  111. package/frontend-dist/assets/Skeleton-gVyjaP-y.js +0 -1
  112. package/frontend-dist/assets/Switch-D1ER0j6H.js +0 -1
  113. package/frontend-dist/assets/TableHeader-MqyrNSsx.js +0 -1
  114. package/frontend-dist/assets/TabsTrigger-DdjWJbUq.js +0 -1
  115. package/frontend-dist/assets/UnifiedRequestDialog-DRthlI6j.js +0 -3
  116. package/frontend-dist/assets/arrow-down-BFgGYafs.js +0 -1
  117. package/frontend-dist/assets/badge-Db4OYMEf.js +0 -1
  118. package/frontend-dist/assets/chevron-right-DYwStkJr.js +0 -1
  119. package/frontend-dist/assets/dialog-DRYeWncC.js +0 -1
  120. package/frontend-dist/assets/index-DTujoAWx.js +0 -58
  121. package/frontend-dist/assets/model-patches-DIy-rFuq.js +0 -1
  122. package/frontend-dist/assets/plus-xmIDnujf.js +0 -1
  123. package/frontend-dist/assets/quickSetup-CqxQRMCR.js +0 -1
  124. package/frontend-dist/assets/quickSetup-DplqYrvf.js +0 -1
  125. package/frontend-dist/assets/search-BxNrTsG8.js +0 -1
  126. package/frontend-dist/assets/transform-domain-KBixlLXR.js +0 -1
  127. package/frontend-dist/assets/useLogRetention-CdccNhYN.js +0 -1
@@ -75,6 +75,8 @@ function extractModelOverrides(models) {
75
75
  const entry = { name, patches: (m.patches ?? []).map(normalizePatchName) };
76
76
  if (m.stream_timeout_ms != null)
77
77
  entry.stream_timeout_ms = m.stream_timeout_ms;
78
+ if (m.non_stream_timeout_ms != null)
79
+ entry.non_stream_timeout_ms = m.non_stream_timeout_ms;
78
80
  if (m.capabilities != null && Array.isArray(m.capabilities))
79
81
  entry.capabilities = m.capabilities;
80
82
  entries.push(entry);
@@ -124,8 +126,8 @@ const CreateProviderSchema = Type.Object({
124
126
  endpoints: Type.Optional(Type.Array(EndpointSchema, { minItems: 1 })),
125
127
  models: Type.Optional(Type.Array(Type.Union([
126
128
  Type.String(),
127
- Type.Object({ name: Type.String(), context_window: Type.Optional(Type.Number()), patches: Type.Optional(Type.Array(Type.String())), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), capabilities: Type.Optional(Type.Array(Type.String())) }),
128
- Type.Object({ id: Type.String(), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })) })
129
+ Type.Object({ name: Type.String(), context_window: Type.Optional(Type.Number()), patches: Type.Optional(Type.Array(Type.String())), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), non_stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), capabilities: Type.Optional(Type.Array(Type.String())) }),
130
+ Type.Object({ id: Type.String(), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), non_stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })) })
129
131
  ]))),
130
132
  is_active: Type.Optional(Type.Number()),
131
133
  max_concurrency: Type.Optional(Type.Integer({ minimum: 0 })),
@@ -146,8 +148,8 @@ const UpdateProviderSchema = Type.Object({
146
148
  endpoints: Type.Optional(Type.Array(EndpointSchema, { minItems: 1 })),
147
149
  models: Type.Optional(Type.Array(Type.Union([
148
150
  Type.String(),
149
- Type.Object({ name: Type.String(), context_window: Type.Optional(Type.Number()), patches: Type.Optional(Type.Array(Type.String())), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), capabilities: Type.Optional(Type.Array(Type.String())) }),
150
- Type.Object({ id: Type.String(), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })) })
151
+ Type.Object({ name: Type.String(), context_window: Type.Optional(Type.Number()), patches: Type.Optional(Type.Array(Type.String())), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), non_stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), capabilities: Type.Optional(Type.Array(Type.String())) }),
152
+ Type.Object({ id: Type.String(), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), non_stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })) })
151
153
  ]))),
152
154
  is_active: Type.Optional(Type.Number()),
153
155
  max_concurrency: Type.Optional(Type.Integer({ minimum: 0 })),
@@ -53,7 +53,8 @@ const QuickSetupProviderSchema = Type.Object({
53
53
  name: Type.String(),
54
54
  context_window: Type.Optional(Type.Number()),
55
55
  patches: Type.Optional(Type.Array(Type.String())),
56
- stream_timeout_ms: Type.Optional(Type.Number()),
56
+ stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })),
57
+ non_stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })),
57
58
  capabilities: Type.Optional(Type.Array(Type.String())),
58
59
  })),
59
60
  endpoints: Type.Optional(Type.Array(QuickSetupEndpointSchema, { minItems: 1 })),
@@ -122,6 +123,7 @@ export const adminQuickSetupRoutes = (app, options, done) => {
122
123
  ...(m.context_window != null ? { context_window: m.context_window } : {}),
123
124
  ...(m.patches && m.patches.length > 0 ? { patches: m.patches } : {}),
124
125
  ...(m.stream_timeout_ms != null ? { stream_timeout_ms: m.stream_timeout_ms } : {}),
126
+ ...(m.non_stream_timeout_ms != null ? { non_stream_timeout_ms: m.non_stream_timeout_ms } : {}),
125
127
  ...(m.capabilities && m.capabilities.length > 0 ? { capabilities: m.capabilities } : {}),
126
128
  }));
127
129
  const adaptiveEnabled = body.provider.concurrency_mode === 'auto' ? 1 : 0;
@@ -77,6 +77,8 @@ export function registerRoutes(app, opts) {
77
77
  dbSizeMonitor.stop();
78
78
  tracker.stopPushInterval();
79
79
  tracker.closeAllClients();
80
+ // 先终止所有 inflight 请求(复用 kill 机制同步释放信号量),再清空信号量配置
81
+ tracker.abortAllInflight();
80
82
  semaphoreManager.removeAll();
81
83
  proxyAgentFactory.invalidateAll();
82
84
  const sessionTracker = container.resolve(SERVICE_KEYS.sessionTracker);
@@ -3,6 +3,7 @@ export interface ModelInfo {
3
3
  context_window: number | null;
4
4
  patches: string[];
5
5
  stream_timeout_ms?: number;
6
+ non_stream_timeout_ms?: number;
6
7
  capabilities?: string[];
7
8
  }
8
9
  export interface ModelEntry {
@@ -10,6 +11,7 @@ export interface ModelEntry {
10
11
  context_window?: number;
11
12
  patches?: string[];
12
13
  stream_timeout_ms?: number;
14
+ non_stream_timeout_ms?: number;
13
15
  capabilities?: string[];
14
16
  }
15
17
  export declare const MODEL_CONTEXT_WINDOWS: Record<string, number>;
@@ -247,6 +247,8 @@ export function parseModels(raw) {
247
247
  };
248
248
  if (obj.stream_timeout_ms != null)
249
249
  entry.stream_timeout_ms = obj.stream_timeout_ms;
250
+ if (obj.non_stream_timeout_ms != null)
251
+ entry.non_stream_timeout_ms = obj.non_stream_timeout_ms;
250
252
  // capabilities: 显式 > model-directory > 硬编码白名单 > 默认 ["text"]
251
253
  entry.capabilities = obj.capabilities ?? lookupCapabilities(modelName);
252
254
  return entry;
@@ -267,6 +269,8 @@ export function buildModelInfoList(modelEntries, overrides) {
267
269
  };
268
270
  if (entry.stream_timeout_ms != null)
269
271
  info.stream_timeout_ms = entry.stream_timeout_ms;
272
+ if (entry.non_stream_timeout_ms != null)
273
+ info.non_stream_timeout_ms = entry.non_stream_timeout_ms;
270
274
  if (entry.capabilities != null)
271
275
  info.capabilities = entry.capabilities;
272
276
  return info;
@@ -6,19 +6,32 @@ export interface AcquireToken {
6
6
  readonly generation: number;
7
7
  /** acquire 时 maxConcurrency=0(不计数),release 时跳过递减 */
8
8
  readonly bypassed: boolean;
9
+ /** 幂等标志:release 置 true,重复 release 直接跳过(防 kill 与自然完成双重递减) */
10
+ released: boolean;
11
+ /** 关联请求 ID,用于 releaseByReqId 反查及自然完成时清理 reqTokenMap */
12
+ readonly reqId?: string;
9
13
  }
10
14
  export declare class SemaphoreManager {
11
15
  private readonly entries;
16
+ /** reqId → {token, providerId} 映射,支持 kill 时按 reqId 同步释放信号量。
17
+ * acquire 成功(含 bypassed/排队 resolve)时存入,release 时按 token.reqId 清理。 */
18
+ private readonly reqTokenMap;
12
19
  /** 全局 generation 计数器 — 每次 getOrCreate 分配唯一值,避免 disable+re-enable 后旧 token 匹配新条目 */
13
20
  private nextGeneration;
14
21
  private getOrCreate;
15
22
  updateConfig(providerId: string, config: ConcurrencyConfig): void;
23
+ /** 构建 token 并按 reqId 存入 reqTokenMap(统一 bypassed/direct/queued 三路径的记录逻辑) */
24
+ private buildAndRecordToken;
16
25
  acquire(providerId: string, signal?: AbortSignal, onQueued?: () => void, logger?: Logger, override?: {
17
26
  max_concurrency?: number;
18
27
  queue_timeout_ms?: number;
19
28
  max_queue_size?: number;
20
- }): Promise<AcquireToken>;
21
- release(providerId: string, token: AcquireToken, logger?: Logger): void;
29
+ }, reqId?: string): Promise<AcquireToken>;
30
+ release(providerId: string, token: AcquireToken | undefined, logger?: Logger): void;
31
+ /** 按 reqId 同步释放信号量(kill 路径专用)。
32
+ * - 已 acquire:取 {token, providerId} 调 release(幂等)
33
+ * - 排队中未 acquire(map 无记录):noop,不抛错、不递减 current */
34
+ releaseByReqId(reqId: string): void;
22
35
  getStatus(providerId: string): {
23
36
  active: number;
24
37
  queued: number;
@@ -2,6 +2,9 @@ import { SemaphoreQueueFullError, SemaphoreTimeoutError } from "../errors.js";
2
2
  export { SemaphoreQueueFullError, SemaphoreTimeoutError };
3
3
  export class SemaphoreManager {
4
4
  entries = new Map();
5
+ /** reqId → {token, providerId} 映射,支持 kill 时按 reqId 同步释放信号量。
6
+ * acquire 成功(含 bypassed/排队 resolve)时存入,release 时按 token.reqId 清理。 */
7
+ reqTokenMap = new Map();
5
8
  /** 全局 generation 计数器 — 每次 getOrCreate 分配唯一值,避免 disable+re-enable 后旧 token 匹配新条目 */
6
9
  nextGeneration = 0;
7
10
  getOrCreate(providerId) {
@@ -53,17 +56,24 @@ export class SemaphoreManager {
53
56
  e.resolve();
54
57
  }
55
58
  }
56
- async acquire(providerId, signal, onQueued, logger, override) {
59
+ /** 构建 token 并按 reqId 存入 reqTokenMap(统一 bypassed/direct/queued 三路径的记录逻辑) */
60
+ buildAndRecordToken(entry, bypassed, reqId, providerId) {
61
+ const token = { generation: entry.generation, bypassed, released: false, reqId };
62
+ if (reqId)
63
+ this.reqTokenMap.set(reqId, { token, providerId });
64
+ return token;
65
+ }
66
+ async acquire(providerId, signal, onQueued, logger, override, reqId) {
57
67
  const entry = this.getOrCreate(providerId);
58
68
  const maxConcurrency = override?.max_concurrency ?? entry.config.maxConcurrency;
59
69
  const queueTimeoutMs = Math.max(0, override?.queue_timeout_ms ?? entry.config.queueTimeoutMs);
60
70
  const maxQueueSize = Math.max(0, override?.max_queue_size ?? entry.config.maxQueueSize);
61
71
  if (maxConcurrency === 0)
62
- return { generation: entry.generation, bypassed: true };
72
+ return this.buildAndRecordToken(entry, true, reqId, providerId);
63
73
  if (entry.current < maxConcurrency) {
64
74
  entry.current++;
65
75
  logger?.debug?.({ providerId, current: entry.current, maxConcurrency, action: "acquire_direct" }, "Semaphore: acquired directly");
66
- return { generation: entry.generation, bypassed: false };
76
+ return this.buildAndRecordToken(entry, false, reqId, providerId);
67
77
  }
68
78
  if (entry.queue.length >= maxQueueSize) {
69
79
  logger?.debug?.({ providerId, queueLength: entry.queue.length, maxQueueSize, action: "acquire_rejected" }, "Semaphore: queue full, rejecting");
@@ -72,9 +82,11 @@ export class SemaphoreManager {
72
82
  logger?.debug?.({ providerId, current: entry.current, maxConcurrency, queueLength: entry.queue.length, action: "acquire_queued" }, "Semaphore: entering wait queue");
73
83
  onQueued?.();
74
84
  return new Promise((resolve, reject) => {
75
- const token = { generation: entry.generation, bypassed: false };
76
85
  const qe = {
77
86
  resolve: () => {
87
+ // 关键:在真正获取槽位后才构建并记录 token。
88
+ // 若在 executor 创建 token 后立即记录,排队中被 kill 会误减 current。
89
+ const token = this.buildAndRecordToken(entry, false, reqId, providerId);
78
90
  logger?.debug?.({ providerId, current: entry.current, maxConcurrency, queueLength: entry.queue.length, action: "acquire_resolved" }, "Semaphore: left wait queue, acquired");
79
91
  resolve(token);
80
92
  },
@@ -107,6 +119,17 @@ export class SemaphoreManager {
107
119
  });
108
120
  }
109
121
  release(providerId, token, logger) {
122
+ if (!token)
123
+ return;
124
+ // 幂等:kill 强制释放与自然完成都走此处,已 released 则跳过(防双重递减)
125
+ if (token.released) {
126
+ logger?.debug?.({ providerId, action: "release_idempotent" }, "Semaphore: token already released, skipping");
127
+ return;
128
+ }
129
+ token.released = true;
130
+ // 清理 reqTokenMap(自然完成自动回收,防 map 无限增长)
131
+ if (token.reqId)
132
+ this.reqTokenMap.delete(token.reqId);
110
133
  const entry = this.entries.get(providerId);
111
134
  if (!entry)
112
135
  return;
@@ -130,6 +153,15 @@ export class SemaphoreManager {
130
153
  logger?.debug?.({ providerId, current: entry.current, maxConcurrency: entry.config.maxConcurrency, action: "release_decrement" }, "Semaphore: released slot");
131
154
  }
132
155
  }
156
+ /** 按 reqId 同步释放信号量(kill 路径专用)。
157
+ * - 已 acquire:取 {token, providerId} 调 release(幂等)
158
+ * - 排队中未 acquire(map 无记录):noop,不抛错、不递减 current */
159
+ releaseByReqId(reqId) {
160
+ const record = this.reqTokenMap.get(reqId);
161
+ if (!record)
162
+ return;
163
+ this.release(record.providerId, record.token);
164
+ }
133
165
  getStatus(providerId) {
134
166
  const entry = this.entries.get(providerId);
135
167
  if (!entry)
@@ -14,5 +14,6 @@ export declare function getProxyApiType(url: string): string | null;
14
14
  export declare const MS_PER_SECOND = 1000;
15
15
  export declare const SECONDS_PER_DAY = 86400;
16
16
  export declare const UPSTREAM_SUCCESS = 200;
17
+ export declare const DEFAULT_GET_TIMEOUT_MS = 30000;
17
18
  /** 过滤掉不应转发给下游的 hop-by-hop headers */
18
19
  export declare function filterHeaders(raw: import("./types.js").RawHeaders): Record<string, string>;
@@ -27,6 +27,9 @@ export const MS_PER_SECOND = 1000;
27
27
  export const SECONDS_PER_DAY = 86_400;
28
28
  // 上游成功状态码
29
29
  export const UPSTREAM_SUCCESS = 200;
30
+ // callGet(admin 探测 /v1/models)默认超时:30s。
31
+ // 仅响应头前超时,与流式 idleTimer 无关。
32
+ export const DEFAULT_GET_TIMEOUT_MS = 30_000;
30
33
  /** 过滤掉不应转发给下游的 hop-by-hop headers */
31
34
  const SKIP_DOWNSTREAM = new Set([
32
35
  "content-length",
@@ -35,12 +35,16 @@ export declare class RequestTracker {
35
35
  readonly runtimeCollector: RuntimeCollector;
36
36
  private readonly semaphoreManager?;
37
37
  private adaptiveStatusProvider?;
38
+ /** kill 时同步释放信号量的回调(绑定到 semaphoreManager.releaseByReqId) */
39
+ private releaseSlotProvider?;
38
40
  constructor(deps?: {
39
41
  semaphoreManager?: ISemaphoreStatus;
40
42
  runtimeCollector?: RuntimeCollector;
41
43
  logger?: Logger;
42
44
  });
43
45
  setAdaptiveStatusProvider(provider: IAdaptiveStatus): void;
46
+ /** 注入信号量释放回调,kill 时同步释放槽位(防 kill 不释放信号量) */
47
+ setReleaseSlotProvider(fn: (reqId: string) => void): void;
44
48
  start(req: ActiveRequest): void;
45
49
  /** 轻量级节流推送:流式内容变更后 500ms 内批量广播 */
46
50
  private scheduleStreamContentPush;
@@ -63,6 +67,8 @@ export declare class RequestTracker {
63
67
  registerKillCallback(id: string, callback: () => void): void;
64
68
  /** 主动终止指定请求。返回 true 表示成功终止,false 表示请求不存在或已完成 */
65
69
  killRequest(id: string): boolean;
70
+ /** 优雅关闭时终止所有 inflight 请求,复用 kill 机制(含信号量释放 + tracker 完成) */
71
+ abortAllInflight(): void;
66
72
  getStats(): StatsSnapshot;
67
73
  getConcurrency(): ProviderConcurrencySnapshot[];
68
74
  getRuntime(): RuntimeMetrics;
@@ -27,6 +27,8 @@ export class RequestTracker {
27
27
  runtimeCollector;
28
28
  semaphoreManager;
29
29
  adaptiveStatusProvider;
30
+ /** kill 时同步释放信号量的回调(绑定到 semaphoreManager.releaseByReqId) */
31
+ releaseSlotProvider;
30
32
  constructor(deps) {
31
33
  this.semaphoreManager = deps?.semaphoreManager;
32
34
  this.runtimeCollector = deps?.runtimeCollector ?? new RuntimeCollector();
@@ -36,6 +38,10 @@ export class RequestTracker {
36
38
  setAdaptiveStatusProvider(provider) {
37
39
  this.adaptiveStatusProvider = provider;
38
40
  }
41
+ /** 注入信号量释放回调,kill 时同步释放槽位(防 kill 不释放信号量) */
42
+ setReleaseSlotProvider(fn) {
43
+ this.releaseSlotProvider = fn;
44
+ }
39
45
  // --- Core methods ---
40
46
  start(req) {
41
47
  this.activeMap.set(req.id, { ...req });
@@ -201,12 +207,21 @@ export class RequestTracker {
201
207
  this.killCallbacks.delete(id);
202
208
  this.logger?.info?.({ reqId: id }, "Tracker: killRequest");
203
209
  callback();
210
+ // 同步释放信号量槽位(releaseByReqId 幂等:未 acquire 的排队请求 noop)
211
+ this.releaseSlotProvider?.(id);
204
212
  // transport 可能尚未 resolve(上游未响应时 StreamProxy 不存在),强制完成请求
205
213
  if (this.activeMap.has(id)) {
206
214
  this.complete(id, { status: "failed" });
207
215
  }
208
216
  return true;
209
217
  }
218
+ /** 优雅关闭时终止所有 inflight 请求,复用 kill 机制(含信号量释放 + tracker 完成) */
219
+ abortAllInflight() {
220
+ const ids = [...this.killCallbacks.keys()];
221
+ for (const id of ids) {
222
+ this.killRequest(id);
223
+ }
224
+ }
210
225
  // --- Stats / monitoring ---
211
226
  getStats() {
212
227
  return this.statsAggregator.getStats();
@@ -91,7 +91,7 @@ export type TransportResult = {
91
91
  providerId: string;
92
92
  };
93
93
  timeoutMs?: number;
94
- abortReason?: "idle_timeout" | "client_disconnect" | "loop_detection";
94
+ abortReason?: "idle_timeout" | "client_disconnect" | "loop_detection" | "pipe_error";
95
95
  } | {
96
96
  kind: "error";
97
97
  statusCode: number;
@@ -24,9 +24,22 @@ export interface Provider {
24
24
  created_at: string;
25
25
  updated_at: string;
26
26
  }
27
- /** 默认流式超时 10 分钟 */
28
- export declare const DEFAULT_STREAM_TIMEOUT_MS = 600000;
29
- /** provider models JSON 中查找指定模型的超时值 */
27
+ /**
28
+ * 默认流式超时 5 分钟。
29
+ * 行为变更:v1.1.x 起从 600s(10min) 降为 300s(5min),影响未显式配置 stream_timeout_ms 的 provider。
30
+ * 长跑流式生成(长推理/长输出)若超 5min 会被中断,需在 provider/model 配置中显式调大或设 0(禁用)。
31
+ */
32
+ export declare const DEFAULT_STREAM_TIMEOUT_MS = 300000;
33
+ /** 默认非流式超时 10 分钟 */
34
+ export declare const DEFAULT_NON_STREAM_TIMEOUT_MS = 600000;
35
+ /** 从 provider 的 models JSON 中查找指定模型的流式/非流式超时值。
36
+ * stream: entry.stream_timeout_ms ?? DEFAULT_STREAM_TIMEOUT_MS,0→Infinity
37
+ * nonStream: entry.non_stream_timeout_ms ?? DEFAULT_NON_STREAM_TIMEOUT_MS,0→Infinity */
38
+ export declare function getModelTimeouts(provider: Provider, backendModel: string): {
39
+ stream: number;
40
+ nonStream: number;
41
+ };
42
+ /** @deprecated 改用 getModelTimeouts。保留为薄包装以兼容现有调用方(iteration-setup 等)。 */
30
43
  export declare function getModelStreamTimeout(provider: Provider, backendModel: string): number;
31
44
  export declare const PROVIDER_CONCURRENCY_DEFAULTS: {
32
45
  readonly max_concurrency: 0;
@@ -1,20 +1,35 @@
1
1
  import { randomUUID } from "crypto";
2
2
  import { buildUpdateQuery, deleteById } from "./helpers.js";
3
3
  import { parseModels } from "../config/model-context.js";
4
- /** 默认流式超时 10 分钟 */
5
- export const DEFAULT_STREAM_TIMEOUT_MS = 600_000;
6
- /** provider models JSON 中查找指定模型的超时值 */
7
- export function getModelStreamTimeout(provider, backendModel) {
4
+ /**
5
+ * 默认流式超时 5 分钟。
6
+ * 行为变更:v1.1.x 起从 600s(10min) 降为 300s(5min),影响未显式配置 stream_timeout_ms 的 provider。
7
+ * 长跑流式生成(长推理/长输出)若超 5min 会被中断,需在 provider/model 配置中显式调大或设 0(禁用)
8
+ */
9
+ export const DEFAULT_STREAM_TIMEOUT_MS = 300_000;
10
+ /** 默认非流式超时 10 分钟 */
11
+ export const DEFAULT_NON_STREAM_TIMEOUT_MS = 600_000;
12
+ /** 0 表示禁用超时(返回 Infinity);undefined/null/未设置 使用默认值 */
13
+ function resolveTimeout(value, fallback) {
14
+ return value === 0 ? Number.POSITIVE_INFINITY : value ?? fallback;
15
+ }
16
+ /** 从 provider 的 models JSON 中查找指定模型的流式/非流式超时值。
17
+ * stream: entry.stream_timeout_ms ?? DEFAULT_STREAM_TIMEOUT_MS,0→Infinity
18
+ * nonStream: entry.non_stream_timeout_ms ?? DEFAULT_NON_STREAM_TIMEOUT_MS,0→Infinity */
19
+ export function getModelTimeouts(provider, backendModel) {
8
20
  const entries = parseModels(provider.models);
9
21
  const entry = entries.find(m => m.name === backendModel);
10
- if (!entry)
11
- return DEFAULT_STREAM_TIMEOUT_MS;
12
- const timeout = entry.stream_timeout_ms;
13
- // stream_timeout_ms: 0 表示禁用超时,返回 Infinity;
14
- // undefined/null/未设置 表示使用默认值
15
- if (timeout === 0)
16
- return Number.POSITIVE_INFINITY;
17
- return timeout ?? DEFAULT_STREAM_TIMEOUT_MS;
22
+ if (!entry) {
23
+ return { stream: DEFAULT_STREAM_TIMEOUT_MS, nonStream: DEFAULT_NON_STREAM_TIMEOUT_MS };
24
+ }
25
+ return {
26
+ stream: resolveTimeout(entry.stream_timeout_ms, DEFAULT_STREAM_TIMEOUT_MS),
27
+ nonStream: resolveTimeout(entry.non_stream_timeout_ms, DEFAULT_NON_STREAM_TIMEOUT_MS),
28
+ };
29
+ }
30
+ /** @deprecated 改用 getModelTimeouts。保留为薄包装以兼容现有调用方(iteration-setup 等)。 */
31
+ export function getModelStreamTimeout(provider, backendModel) {
32
+ return getModelTimeouts(provider, backendModel).stream;
18
33
  }
19
34
  export const PROVIDER_CONCURRENCY_DEFAULTS = {
20
35
  max_concurrency: 0,
package/dist/index.js CHANGED
@@ -69,6 +69,8 @@ export async function buildApp(options) {
69
69
  const pluginRegistry = container.resolve(SERVICE_KEYS.pluginRegistry);
70
70
  // Wire adaptive controller to tracker
71
71
  tracker.setAdaptiveStatusProvider(adaptiveController);
72
+ // 绑定信号量释放回调:kill 时按 reqId 同步释放槽位(防 kill 不释放信号量)
73
+ tracker.setReleaseSlotProvider((reqId) => semaphoreManager.releaseByReqId(reqId));
72
74
  // 从 DB 读取已有 provider 的并发配置,初始化信号量/adaptive/tracker 缓存
73
75
  initializeProviderState(db, semaphoreManager, adaptiveController, tracker);
74
76
  // Step 3: 注册 auth + proxy handlers + 构建 StateRegistry
@@ -1,7 +1,8 @@
1
1
  import { resolveEndpoint } from "../routing/resolve-endpoint.js";
2
2
  import { sanitizeHeadersForLog } from "../proxy-logging.js";
3
- import { buildUpstreamHeaders, buildUpstreamUrl } from "../proxy-core.js";
4
- import { getModelStreamTimeout } from "../../db/providers.js";
3
+ import { buildUpstreamHeaders } from "../proxy-core.js";
4
+ import { buildUpstreamUrl } from "../transport/shared.js";
5
+ import { getModelTimeouts } from "../../db/providers.js";
5
6
  import { buildTransportFn } from "../transport/transport-fn.js";
6
7
  import { parseModels } from "../../config/model-context.js";
7
8
  import { applyProviderPatches } from "../patch/index.js";
@@ -87,10 +88,13 @@ export function buildIterationSetup(params) {
87
88
  } : undefined;
88
89
  // --- Build transport function ---
89
90
  const streamLoopEnabled = enhancementConfig.stream_loop_enabled;
91
+ // 合并 stream/nonStream 超时查询,单次 parseModels(applyProviderPatches 内另有一次解析)
92
+ const modelTimeouts = getModelTimeouts(provider, resolved.backend_model);
90
93
  const transportFn = buildTransportFn({
91
94
  provider, apiKey, body: patchedBody, cliHdrs, reply, upstreamPath: effectiveUpstreamPath, apiType: effectiveApiType,
92
95
  isStream, startTime, logId, effectiveModel: clientModel,
93
- streamTimeoutMs: getModelStreamTimeout(provider, resolved.backend_model),
96
+ nonStreamTimeoutMs: modelTimeouts.nonStream,
97
+ streamTimeoutMs: modelTimeouts.stream,
94
98
  tracker, matcher, request,
95
99
  streamLoopEnabled, formatTransform, responseTransform, injectedHeaders,
96
100
  timeoutContext: { modelId: resolved.backend_model, providerId: provider.id },
@@ -44,7 +44,7 @@ export interface HandleContext {
44
44
  failoverThreshold?: number;
45
45
  isFailover?: boolean;
46
46
  ruleMatcher?: RetryRuleMatcher;
47
- transportFn: (target: Target) => Promise<TransportResult>;
47
+ transportFn: (target: Target, signal?: AbortSignal) => Promise<TransportResult>;
48
48
  }
49
49
  /**
50
50
  * 工厂函数,消除 openai/anthropic 创建 orchestrator 的重复代码。
@@ -6,6 +6,9 @@ import { SemaphoreTimeoutError, SemaphoreQueueFullError } from "../../core/error
6
6
  import { extractThinkingLevel } from "../../db/logs.js";
7
7
  const DEFAULT_BASE_DELAY_MS = 1000;
8
8
  const DEFAULT_FAILOVER_THRESHOLD = 400;
9
+ // reply.raw close listener 上限:覆盖 MAX_FAILOVER_ITERATIONS(10) + Fastify/socket 自身 listener。
10
+ // failover 循环复用同一 reply 多次挂载 close listener,不提高上限会触发 MaxListenersExceededWarning。
11
+ const REPLY_CLOSE_MAX_LISTENERS = 16;
9
12
  /**
10
13
  * 从 clientRequest JSON 中提取 thinking level。
11
14
  * 委托给 db/logs.ts 的 extractThinkingLevel,保持日志写入和 orchestrator 使用同一逻辑。
@@ -32,11 +35,21 @@ export class ProxyOrchestrator {
32
35
  async handle(request, reply, apiType, config, ctx) {
33
36
  const providerId = config.provider.id;
34
37
  const controller = new AbortController();
35
- // 客户端断连时自动 abort(保留原有行为)
36
- request.raw.on("close", () => {
37
- if (!request.raw.readableEnded) {
38
+ // 客户端断连检测:监听 reply.raw(响应端),用 writableEnded 判断响应未完成才 abort
39
+ // 旧逻辑监听 request.raw + readableEnded,对 POST 请求 readableEnded 恒为 true(body 已读完),
40
+ // 导致 close 永不 abort。failover 循环复用同一 reply 多次调用 handle(),每次 new 一个独立
41
+ // AbortController 并挂载新 close listener,故需提高 listener 上限(覆盖 MAX_FAILOVER_ITERATIONS);
42
+ // close 只触发一次,listener 随 reply.raw GC 回收,无永久泄漏。
43
+ // 防御:reply.raw 在测试中可能是简化 mock(非 EventEmitter),typeof 守卫跳过。
44
+ const rawEmitter = reply.raw;
45
+ if (typeof rawEmitter.setMaxListeners === "function") {
46
+ const current = typeof rawEmitter.getMaxListeners === "function" ? rawEmitter.getMaxListeners() : 0;
47
+ if (current < REPLY_CLOSE_MAX_LISTENERS)
48
+ rawEmitter.setMaxListeners(REPLY_CLOSE_MAX_LISTENERS);
49
+ }
50
+ reply.raw.on("close", () => {
51
+ if (!reply.raw.writableEnded)
38
52
  controller.abort();
39
- }
40
53
  });
41
54
  const trackerReq = this.buildActiveRequest(request, config, apiType);
42
55
  let wasEverQueued = false;
@@ -48,7 +61,9 @@ export class ProxyOrchestrator {
48
61
  try {
49
62
  reply.raw.destroy();
50
63
  }
51
- catch { /* reply may already be destroyed */ } // eslint-disable-line taste/no-silent-catch
64
+ catch (e) {
65
+ request.log.debug({ err: e }, "reply.raw.destroy failed (already destroyed)");
66
+ }
52
67
  });
53
68
  return this.deps.semaphoreScope.withSlot(providerId, controller.signal, () => {
54
69
  trackerReq.queued = true;
@@ -59,8 +74,8 @@ export class ProxyOrchestrator {
59
74
  trackerReq.queued = false;
60
75
  this.deps.trackerScope.markQueued(trackerReq.id, false);
61
76
  }
62
- return this.executeResilience(config, ctx);
63
- }, config.concurrencyOverride);
77
+ return this.executeResilience(config, ctx, controller.signal);
78
+ }, config.concurrencyOverride, trackerReq.id);
64
79
  }, (result) => this.extractTrackStatus(result), (result) => result.attempts.map(a => ({
65
80
  statusCode: a.statusCode,
66
81
  error: a.error,
@@ -73,7 +88,10 @@ export class ProxyOrchestrator {
73
88
  // 如果有重试尝试(非 throw 类型),说明 resilience 层的重试规则匹配了,
74
89
  // 意味着这是一个"有意义的失败"——即使上游返回 200 body error 也应该计入退避
75
90
  const retryRuleMatched = status === "failed" && result.attempts.length > 1;
76
- this.deps.adaptiveController?.onRequestComplete(providerId, { success: status === "completed", statusCode, retryRuleMatched, requestId: config.trackerId, wasQueued: wasEverQueued });
91
+ // 客户端断连不计入 provider 失败统计,避免误降并发
92
+ if (!controller.signal.aborted) {
93
+ this.deps.adaptiveController?.onRequestComplete(providerId, { success: status === "completed", statusCode, retryRuleMatched, requestId: config.trackerId, wasQueued: wasEverQueued });
94
+ }
77
95
  this.sendResponse(reply, result.result, ctx);
78
96
  return result;
79
97
  }
@@ -81,11 +99,18 @@ export class ProxyOrchestrator {
81
99
  if (e instanceof ProviderSwitchNeeded) {
82
100
  const lastResult = e.lastResult;
83
101
  const statusCode = lastResult && "statusCode" in lastResult ? lastResult.statusCode : undefined;
84
- this.deps.adaptiveController?.onRequestComplete(providerId, { success: false, statusCode, retryRuleMatched: true, requestId: config.trackerId, wasQueued: wasEverQueued });
102
+ // 客户端断连不计入 provider 失败统计
103
+ if (!controller.signal.aborted) {
104
+ this.deps.adaptiveController?.onRequestComplete(providerId, { success: false, statusCode, retryRuleMatched: true, requestId: config.trackerId, wasQueued: wasEverQueued });
105
+ }
85
106
  }
86
107
  else if (e instanceof SemaphoreTimeoutError || e instanceof SemaphoreQueueFullError) {
87
108
  // 信号量超时或队列满:说明并发压力大,上报给自适应控制器
88
- this.deps.adaptiveController?.onRequestComplete(providerId, { success: false, statusCode: 429, requestId: config.trackerId });
109
+ // 客户端断连触发的 acquire abort AbortError 而非 SemaphoreError;
110
+ // queueTimeout 与断连竞态时归类为非 provider 失败更合理
111
+ if (!controller.signal.aborted) {
112
+ this.deps.adaptiveController?.onRequestComplete(providerId, { success: false, statusCode: 429, requestId: config.trackerId });
113
+ }
89
114
  }
90
115
  throw e;
91
116
  }
@@ -111,7 +136,7 @@ export class ProxyOrchestrator {
111
136
  mappingReason: config.mappingReason,
112
137
  };
113
138
  }
114
- async executeResilience(config, ctx) {
139
+ async executeResilience(config, ctx, signal) {
115
140
  if (!ctx?.transportFn)
116
141
  throw new Error("HandleContext.transportFn is required");
117
142
  const resilienceConfig = {
@@ -121,7 +146,7 @@ export class ProxyOrchestrator {
121
146
  ruleMatcher: ctx.ruleMatcher,
122
147
  providerId: config.provider.id,
123
148
  };
124
- return this.deps.resilience.execute(() => [config.resolved], ctx.transportFn, resilienceConfig);
149
+ return this.deps.resilience.execute(() => [config.resolved], ctx.transportFn, resilienceConfig, signal);
125
150
  }
126
151
  sendResponse(reply, result, ctx) {
127
152
  if (result.kind === "stream_success" || result.kind === "stream_abort" || result.kind === "throw") {
@@ -56,5 +56,7 @@ export interface ResilienceState {
56
56
  }
57
57
  export declare class ResilienceLayer {
58
58
  decide(result: TransportResult, state: ResilienceState, config: ResilienceConfig): ResilienceDecision;
59
- execute(targets: () => Target[], fn: (target: Target) => Promise<TransportResult>, config: ResilienceConfig): Promise<ResilienceResult>;
59
+ /** 客户端断连短路结果(不重试、不触发 failover/adaptive 退避) */
60
+ private clientAbortedResult;
61
+ execute(targets: () => Target[], fn: (target: Target, signal?: AbortSignal) => Promise<TransportResult>, config: ResilienceConfig, signal?: AbortSignal): Promise<ResilienceResult>;
60
62
  }
@@ -136,7 +136,16 @@ export class ResilienceLayer {
136
136
  }
137
137
  return { action: "done" };
138
138
  }
139
- async execute(targets, fn, config) {
139
+ /** 客户端断连短路结果(不重试、不触发 failover/adaptive 退避) */
140
+ clientAbortedResult(allAttempts, excludedTargets) {
141
+ return {
142
+ result: { kind: "throw", error: new Error("client aborted") },
143
+ attempts: allAttempts,
144
+ excludedTargets,
145
+ finalDecision: { action: "abort", reason: "client_aborted" },
146
+ };
147
+ }
148
+ async execute(targets, fn, config, signal) {
140
149
  const allAttempts = [];
141
150
  const excludedTargets = [];
142
151
  const perTargetCounts = new Map();
@@ -148,6 +157,9 @@ export class ResilienceLayer {
148
157
  perTargetCounts.set(targetKey(t), (perTargetCounts.get(targetKey(t)) ?? 0) + 1);
149
158
  };
150
159
  while (true) {
160
+ // 客户端断连短路:不重试、不触发 failover
161
+ if (signal?.aborted)
162
+ return this.clientAbortedResult(allAttempts, excludedTargets);
151
163
  if (globalAttemptIndex >= (config.iterationCap ?? DEFAULT_ITERATION_CAP)) {
152
164
  return {
153
165
  result: lastResult ?? { kind: "error", statusCode: 502, body: "Iteration cap exceeded", headers: {}, sentHeaders: {}, sentBody: "" },
@@ -171,7 +183,7 @@ export class ResilienceLayer {
171
183
  const start = Date.now();
172
184
  let transportResult;
173
185
  try {
174
- transportResult = await fn(currentTarget);
186
+ transportResult = await fn(currentTarget, signal);
175
187
  }
176
188
  catch (err) {
177
189
  const errMsg = err instanceof Error ? err.message : JSON.stringify(err);
@@ -211,6 +223,9 @@ export class ResilienceLayer {
211
223
  case "retry":
212
224
  globalAttemptIndex++;
213
225
  await sleep(decision.delayMs);
226
+ // sleep 期间客户端可能断连,再次检查避免无效重试
227
+ if (signal?.aborted)
228
+ return this.clientAbortedResult(allAttempts, excludedTargets);
214
229
  continue;
215
230
  case "failover":
216
231
  excludedTargets.push(decision.excludeTarget);
@@ -5,7 +5,7 @@ import type { ActiveRequest, AttemptSnapshot } from "../../core/monitor/types.js
5
5
  export declare class SemaphoreScope {
6
6
  private manager;
7
7
  constructor(manager: SemaphoreManager);
8
- withSlot<T>(providerId: string, signal: AbortSignal, onQueued: () => void, fn: () => Promise<T>, concurrencyOverride?: ConcurrencyOverride): Promise<T>;
8
+ withSlot<T>(providerId: string, signal: AbortSignal, onQueued: () => void, fn: () => Promise<T>, concurrencyOverride?: ConcurrencyOverride, reqId?: string): Promise<T>;
9
9
  }
10
10
  export declare class TrackerScope {
11
11
  private tracker;
@@ -3,8 +3,8 @@ export class SemaphoreScope {
3
3
  constructor(manager) {
4
4
  this.manager = manager;
5
5
  }
6
- async withSlot(providerId, signal, onQueued, fn, concurrencyOverride) {
7
- const token = await this.manager.acquire(providerId, signal, onQueued, undefined, concurrencyOverride);
6
+ async withSlot(providerId, signal, onQueued, fn, concurrencyOverride, reqId) {
7
+ const token = await this.manager.acquire(providerId, signal, onQueued, undefined, concurrencyOverride, reqId);
8
8
  try {
9
9
  return await fn();
10
10
  }