llm-simple-router 1.1.1 → 1.1.2-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/admin/providers.js +6 -4
- package/dist/admin/quick-setup.js +2 -0
- package/dist/app/register-routes.js +2 -0
- package/dist/config/model-context.d.ts +2 -0
- package/dist/config/model-context.js +4 -0
- package/dist/core/concurrency/semaphore.d.ts +15 -2
- package/dist/core/concurrency/semaphore.js +36 -4
- package/dist/core/constants.d.ts +1 -0
- package/dist/core/constants.js +3 -0
- package/dist/core/monitor/request-tracker.d.ts +6 -0
- package/dist/core/monitor/request-tracker.js +15 -0
- package/dist/core/types.d.ts +1 -1
- package/dist/db/providers.d.ts +12 -3
- package/dist/db/providers.js +23 -12
- package/dist/index.js +2 -0
- package/dist/proxy/handler/iteration-setup.js +3 -2
- package/dist/proxy/orchestration/orchestrator.d.ts +1 -1
- package/dist/proxy/orchestration/orchestrator.js +28 -11
- package/dist/proxy/orchestration/resilience.d.ts +3 -1
- package/dist/proxy/orchestration/resilience.js +17 -2
- package/dist/proxy/orchestration/scope.d.ts +1 -1
- package/dist/proxy/orchestration/scope.js +2 -2
- package/dist/proxy/transport/http.d.ts +14 -2
- package/dist/proxy/transport/http.js +24 -2
- package/dist/proxy/transport/stream.d.ts +56 -3
- package/dist/proxy/transport/stream.js +97 -44
- package/dist/proxy/transport/transport-fn.d.ts +2 -1
- package/dist/proxy/transport/transport-fn.js +3 -3
- package/frontend-dist/assets/AuthLayout-CNJ18LiO.js +1 -0
- package/frontend-dist/assets/Card-DHdWC9Sr.js +1 -0
- package/frontend-dist/assets/CardContent-D3XiSLZZ.js +1 -0
- package/frontend-dist/assets/CardTitle-ouEnIzFR.js +1 -0
- package/frontend-dist/assets/CascadingModelSelect-uCR_78CB.js +1 -0
- package/frontend-dist/assets/Checkbox-D4B5Gy-L.js +1 -0
- package/frontend-dist/assets/CollapsibleContent-D5uuYzAy.js +1 -0
- package/frontend-dist/assets/CollapsibleTrigger-7lOShWej.js +1 -0
- package/frontend-dist/assets/ConcurrencyControl-DSpn8cI_.js +1 -0
- package/frontend-dist/assets/Dashboard-DaMz3m0K.js +3 -0
- package/frontend-dist/assets/{Input-CR4ubPbn.js → Input-DRF4ONbF.js} +1 -1
- package/frontend-dist/assets/Label-muHpCvRx.js +1 -0
- package/frontend-dist/assets/Login-CuQA8bbc.js +1 -0
- package/frontend-dist/assets/Logs-CKO3germ.js +1 -0
- package/frontend-dist/assets/ModelMappings-De98UugX.js +1 -0
- package/frontend-dist/assets/Monitor-BzeTXX7u.js +1 -0
- package/frontend-dist/assets/Providers-C7TMCyLZ.js +1 -0
- package/frontend-dist/assets/ProxyEnhancement-CaXVxlxk.js +1 -0
- package/frontend-dist/assets/QuickSetup-9OsD4eib.js +1 -0
- package/frontend-dist/assets/RetryRules-CZtQYpr0.js +1 -0
- package/frontend-dist/assets/RouterKeys-WAbHBrzu.js +1 -0
- package/frontend-dist/assets/{RovingFocusItem-yW4o2j2b.js → RovingFocusItem-BIoz-v9k.js} +1 -1
- package/frontend-dist/assets/Schedules-DgxMv_wm.js +1 -0
- package/frontend-dist/assets/Separator-_gcj_6Dd.js +1 -0
- package/frontend-dist/assets/Settings-w33AM_0F.js +6 -0
- package/frontend-dist/assets/Setup-D6c5svHh.js +1 -0
- package/frontend-dist/assets/Skeleton-wP01Qhzi.js +1 -0
- package/frontend-dist/assets/Switch-BnSoCbqK.js +1 -0
- package/frontend-dist/assets/TableHeader-oSu5tayf.js +1 -0
- package/frontend-dist/assets/TabsTrigger-CzLaOeN9.js +1 -0
- package/frontend-dist/assets/UnifiedRequestDialog-lCG2BxB4.js +3 -0
- package/frontend-dist/assets/{VisuallyHiddenInput-DblTA4r2.js → VisuallyHiddenInput-Dz0c537Q.js} +1 -1
- package/frontend-dist/assets/arrow-down-DBlAmBEr.js +1 -0
- package/frontend-dist/assets/badge-qilWWy3M.js +1 -0
- package/frontend-dist/assets/{button-BLY-Hf5c.js → button-DhbTyZqO.js} +2 -2
- package/frontend-dist/assets/chevron-right-B1eJ5usR.js +1 -0
- package/frontend-dist/assets/dialog-J16vWuXK.js +1 -0
- package/frontend-dist/assets/{image-DIFk8qfT.js → image-BAKOQH7r.js} +1 -1
- package/frontend-dist/assets/index-BoaXEmuZ.js +58 -0
- package/frontend-dist/assets/{index-81QzLYNf.css → index-Db9D0WPf.css} +1 -1
- package/frontend-dist/assets/model-patches-B47btKty.js +1 -0
- package/frontend-dist/assets/{pencil-CAfQAVbM.js → pencil-CQiZ13On.js} +1 -1
- package/frontend-dist/assets/plus-BVJPX-4z.js +1 -0
- package/frontend-dist/assets/quickSetup-jgJgPUcH.js +1 -0
- package/frontend-dist/assets/quickSetup-qTjp3Z6J.js +1 -0
- package/frontend-dist/assets/search-DzYxIOWW.js +1 -0
- package/frontend-dist/assets/{sparkles-l34h8HXn.js → sparkles-o5rM_OIC.js} +1 -1
- package/frontend-dist/assets/transform-domain-DczpyJVT.js +1 -0
- package/frontend-dist/assets/{trash-2-DJZXGWVE.js → trash-2-DfV00PQC.js} +1 -1
- package/frontend-dist/assets/{useClipboard-n1VyehBH.js → useClipboard-BEGcEu2g.js} +1 -1
- package/frontend-dist/assets/useLogRetention-BT7mqSLb.js +1 -0
- package/frontend-dist/assets/{useProviderGroups-CSGN50We.js → useProviderGroups-CpH380OR.js} +1 -1
- package/frontend-dist/index.html +3 -3
- package/package.json +1 -1
- package/frontend-dist/assets/AuthLayout-BMEQBvsI.js +0 -1
- package/frontend-dist/assets/Card-Bp18rHny.js +0 -1
- package/frontend-dist/assets/CardContent-Dcd7e5Sl.js +0 -1
- package/frontend-dist/assets/CardTitle-4BPm9qod.js +0 -1
- package/frontend-dist/assets/CascadingModelSelect-C8eE-_Aa.js +0 -1
- package/frontend-dist/assets/Checkbox-bdMCxX9q.js +0 -1
- package/frontend-dist/assets/CollapsibleContent-CCmsmTzs.js +0 -1
- package/frontend-dist/assets/CollapsibleTrigger-ChYMLseB.js +0 -1
- package/frontend-dist/assets/ConcurrencyControl-POyXogzw.js +0 -1
- package/frontend-dist/assets/Dashboard-DsmSKish.js +0 -3
- package/frontend-dist/assets/Label-BDuz6YPS.js +0 -1
- package/frontend-dist/assets/Login-CioIRrBW.js +0 -1
- package/frontend-dist/assets/Logs-9vLzS1Bh.js +0 -1
- package/frontend-dist/assets/ModelMappings-BtQTCcI-.js +0 -1
- package/frontend-dist/assets/Monitor-CeGNhnXR.js +0 -1
- package/frontend-dist/assets/Providers-TpTO0Obk.js +0 -1
- package/frontend-dist/assets/ProxyEnhancement-Dj6SP79r.js +0 -1
- package/frontend-dist/assets/QuickSetup-CgFMibLV.js +0 -1
- package/frontend-dist/assets/RetryRules-Bnsxxp9o.js +0 -1
- package/frontend-dist/assets/RouterKeys-CBWNaImr.js +0 -1
- package/frontend-dist/assets/Schedules-xw4dyUxH.js +0 -1
- package/frontend-dist/assets/Separator-Cio3-Y1D.js +0 -1
- package/frontend-dist/assets/Settings-BYKxym6N.js +0 -6
- package/frontend-dist/assets/Setup-Bx9pqrpG.js +0 -1
- package/frontend-dist/assets/Skeleton-DHTCmupZ.js +0 -1
- package/frontend-dist/assets/Switch-Uke4Y0hy.js +0 -1
- package/frontend-dist/assets/TableHeader-Bxslem2C.js +0 -1
- package/frontend-dist/assets/TabsTrigger-DiaXIj9E.js +0 -1
- package/frontend-dist/assets/UnifiedRequestDialog-Bfe7dA3M.js +0 -3
- package/frontend-dist/assets/arrow-down-CIbMwJlY.js +0 -1
- package/frontend-dist/assets/badge-Bc2e7tTn.js +0 -1
- package/frontend-dist/assets/chevron-right-BZFZI5r7.js +0 -1
- package/frontend-dist/assets/dialog-BSTdB8YN.js +0 -1
- package/frontend-dist/assets/index-B4Kqm-X3.js +0 -58
- package/frontend-dist/assets/model-patches-B_G57swK.js +0 -1
- package/frontend-dist/assets/plus-BuTSg_Jf.js +0 -1
- package/frontend-dist/assets/quickSetup-CqxQRMCR.js +0 -1
- package/frontend-dist/assets/quickSetup-DplqYrvf.js +0 -1
- package/frontend-dist/assets/search-BFXW-Mr8.js +0 -1
- package/frontend-dist/assets/transform-domain-Hmcf4Zkk.js +0 -1
- package/frontend-dist/assets/useLogRetention-VT_euOKe.js +0 -1
package/dist/admin/providers.js
CHANGED
|
@@ -75,6 +75,8 @@ function extractModelOverrides(models) {
|
|
|
75
75
|
const entry = { name, patches: (m.patches ?? []).map(normalizePatchName) };
|
|
76
76
|
if (m.stream_timeout_ms != null)
|
|
77
77
|
entry.stream_timeout_ms = m.stream_timeout_ms;
|
|
78
|
+
if (m.non_stream_timeout_ms != null)
|
|
79
|
+
entry.non_stream_timeout_ms = m.non_stream_timeout_ms;
|
|
78
80
|
if (m.capabilities != null && Array.isArray(m.capabilities))
|
|
79
81
|
entry.capabilities = m.capabilities;
|
|
80
82
|
entries.push(entry);
|
|
@@ -124,8 +126,8 @@ const CreateProviderSchema = Type.Object({
|
|
|
124
126
|
endpoints: Type.Optional(Type.Array(EndpointSchema, { minItems: 1 })),
|
|
125
127
|
models: Type.Optional(Type.Array(Type.Union([
|
|
126
128
|
Type.String(),
|
|
127
|
-
Type.Object({ name: Type.String(), context_window: Type.Optional(Type.Number()), patches: Type.Optional(Type.Array(Type.String())), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), capabilities: Type.Optional(Type.Array(Type.String())) }),
|
|
128
|
-
Type.Object({ id: Type.String(), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })) })
|
|
129
|
+
Type.Object({ name: Type.String(), context_window: Type.Optional(Type.Number()), patches: Type.Optional(Type.Array(Type.String())), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), non_stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), capabilities: Type.Optional(Type.Array(Type.String())) }),
|
|
130
|
+
Type.Object({ id: Type.String(), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), non_stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })) })
|
|
129
131
|
]))),
|
|
130
132
|
is_active: Type.Optional(Type.Number()),
|
|
131
133
|
max_concurrency: Type.Optional(Type.Integer({ minimum: 0 })),
|
|
@@ -146,8 +148,8 @@ const UpdateProviderSchema = Type.Object({
|
|
|
146
148
|
endpoints: Type.Optional(Type.Array(EndpointSchema, { minItems: 1 })),
|
|
147
149
|
models: Type.Optional(Type.Array(Type.Union([
|
|
148
150
|
Type.String(),
|
|
149
|
-
Type.Object({ name: Type.String(), context_window: Type.Optional(Type.Number()), patches: Type.Optional(Type.Array(Type.String())), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), capabilities: Type.Optional(Type.Array(Type.String())) }),
|
|
150
|
-
Type.Object({ id: Type.String(), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })) })
|
|
151
|
+
Type.Object({ name: Type.String(), context_window: Type.Optional(Type.Number()), patches: Type.Optional(Type.Array(Type.String())), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), non_stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), capabilities: Type.Optional(Type.Array(Type.String())) }),
|
|
152
|
+
Type.Object({ id: Type.String(), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), non_stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })) })
|
|
151
153
|
]))),
|
|
152
154
|
is_active: Type.Optional(Type.Number()),
|
|
153
155
|
max_concurrency: Type.Optional(Type.Integer({ minimum: 0 })),
|
|
@@ -54,6 +54,7 @@ const QuickSetupProviderSchema = Type.Object({
|
|
|
54
54
|
context_window: Type.Optional(Type.Number()),
|
|
55
55
|
patches: Type.Optional(Type.Array(Type.String())),
|
|
56
56
|
stream_timeout_ms: Type.Optional(Type.Number()),
|
|
57
|
+
non_stream_timeout_ms: Type.Optional(Type.Number()),
|
|
57
58
|
capabilities: Type.Optional(Type.Array(Type.String())),
|
|
58
59
|
})),
|
|
59
60
|
endpoints: Type.Optional(Type.Array(QuickSetupEndpointSchema, { minItems: 1 })),
|
|
@@ -122,6 +123,7 @@ export const adminQuickSetupRoutes = (app, options, done) => {
|
|
|
122
123
|
...(m.context_window != null ? { context_window: m.context_window } : {}),
|
|
123
124
|
...(m.patches && m.patches.length > 0 ? { patches: m.patches } : {}),
|
|
124
125
|
...(m.stream_timeout_ms != null ? { stream_timeout_ms: m.stream_timeout_ms } : {}),
|
|
126
|
+
...(m.non_stream_timeout_ms != null ? { non_stream_timeout_ms: m.non_stream_timeout_ms } : {}),
|
|
125
127
|
...(m.capabilities && m.capabilities.length > 0 ? { capabilities: m.capabilities } : {}),
|
|
126
128
|
}));
|
|
127
129
|
const adaptiveEnabled = body.provider.concurrency_mode === 'auto' ? 1 : 0;
|
|
@@ -77,6 +77,8 @@ export function registerRoutes(app, opts) {
|
|
|
77
77
|
dbSizeMonitor.stop();
|
|
78
78
|
tracker.stopPushInterval();
|
|
79
79
|
tracker.closeAllClients();
|
|
80
|
+
// 先终止所有 inflight 请求(复用 kill 机制同步释放信号量),再清空信号量配置
|
|
81
|
+
tracker.abortAllInflight();
|
|
80
82
|
semaphoreManager.removeAll();
|
|
81
83
|
proxyAgentFactory.invalidateAll();
|
|
82
84
|
const sessionTracker = container.resolve(SERVICE_KEYS.sessionTracker);
|
|
@@ -3,6 +3,7 @@ export interface ModelInfo {
|
|
|
3
3
|
context_window: number | null;
|
|
4
4
|
patches: string[];
|
|
5
5
|
stream_timeout_ms?: number;
|
|
6
|
+
non_stream_timeout_ms?: number;
|
|
6
7
|
capabilities?: string[];
|
|
7
8
|
}
|
|
8
9
|
export interface ModelEntry {
|
|
@@ -10,6 +11,7 @@ export interface ModelEntry {
|
|
|
10
11
|
context_window?: number;
|
|
11
12
|
patches?: string[];
|
|
12
13
|
stream_timeout_ms?: number;
|
|
14
|
+
non_stream_timeout_ms?: number;
|
|
13
15
|
capabilities?: string[];
|
|
14
16
|
}
|
|
15
17
|
export declare const MODEL_CONTEXT_WINDOWS: Record<string, number>;
|
|
@@ -247,6 +247,8 @@ export function parseModels(raw) {
|
|
|
247
247
|
};
|
|
248
248
|
if (obj.stream_timeout_ms != null)
|
|
249
249
|
entry.stream_timeout_ms = obj.stream_timeout_ms;
|
|
250
|
+
if (obj.non_stream_timeout_ms != null)
|
|
251
|
+
entry.non_stream_timeout_ms = obj.non_stream_timeout_ms;
|
|
250
252
|
// capabilities: 显式 > model-directory > 硬编码白名单 > 默认 ["text"]
|
|
251
253
|
entry.capabilities = obj.capabilities ?? lookupCapabilities(modelName);
|
|
252
254
|
return entry;
|
|
@@ -267,6 +269,8 @@ export function buildModelInfoList(modelEntries, overrides) {
|
|
|
267
269
|
};
|
|
268
270
|
if (entry.stream_timeout_ms != null)
|
|
269
271
|
info.stream_timeout_ms = entry.stream_timeout_ms;
|
|
272
|
+
if (entry.non_stream_timeout_ms != null)
|
|
273
|
+
info.non_stream_timeout_ms = entry.non_stream_timeout_ms;
|
|
270
274
|
if (entry.capabilities != null)
|
|
271
275
|
info.capabilities = entry.capabilities;
|
|
272
276
|
return info;
|
|
@@ -6,19 +6,32 @@ export interface AcquireToken {
|
|
|
6
6
|
readonly generation: number;
|
|
7
7
|
/** acquire 时 maxConcurrency=0(不计数),release 时跳过递减 */
|
|
8
8
|
readonly bypassed: boolean;
|
|
9
|
+
/** 幂等标志:release 置 true,重复 release 直接跳过(防 kill 与自然完成双重递减) */
|
|
10
|
+
released: boolean;
|
|
11
|
+
/** 关联请求 ID,用于 releaseByReqId 反查及自然完成时清理 reqTokenMap */
|
|
12
|
+
readonly reqId?: string;
|
|
9
13
|
}
|
|
10
14
|
export declare class SemaphoreManager {
|
|
11
15
|
private readonly entries;
|
|
16
|
+
/** reqId → {token, providerId} 映射,支持 kill 时按 reqId 同步释放信号量。
|
|
17
|
+
* acquire 成功(含 bypassed/排队 resolve)时存入,release 时按 token.reqId 清理。 */
|
|
18
|
+
private readonly reqTokenMap;
|
|
12
19
|
/** 全局 generation 计数器 — 每次 getOrCreate 分配唯一值,避免 disable+re-enable 后旧 token 匹配新条目 */
|
|
13
20
|
private nextGeneration;
|
|
14
21
|
private getOrCreate;
|
|
15
22
|
updateConfig(providerId: string, config: ConcurrencyConfig): void;
|
|
23
|
+
/** 构建 token 并按 reqId 存入 reqTokenMap(统一 bypassed/direct/queued 三路径的记录逻辑) */
|
|
24
|
+
private buildAndRecordToken;
|
|
16
25
|
acquire(providerId: string, signal?: AbortSignal, onQueued?: () => void, logger?: Logger, override?: {
|
|
17
26
|
max_concurrency?: number;
|
|
18
27
|
queue_timeout_ms?: number;
|
|
19
28
|
max_queue_size?: number;
|
|
20
|
-
}): Promise<AcquireToken>;
|
|
21
|
-
release(providerId: string, token: AcquireToken, logger?: Logger): void;
|
|
29
|
+
}, reqId?: string): Promise<AcquireToken>;
|
|
30
|
+
release(providerId: string, token: AcquireToken | undefined, logger?: Logger): void;
|
|
31
|
+
/** 按 reqId 同步释放信号量(kill 路径专用)。
|
|
32
|
+
* - 已 acquire:取 {token, providerId} 调 release(幂等)
|
|
33
|
+
* - 排队中未 acquire(map 无记录):noop,不抛错、不递减 current */
|
|
34
|
+
releaseByReqId(reqId: string): void;
|
|
22
35
|
getStatus(providerId: string): {
|
|
23
36
|
active: number;
|
|
24
37
|
queued: number;
|
|
@@ -2,6 +2,9 @@ import { SemaphoreQueueFullError, SemaphoreTimeoutError } from "../errors.js";
|
|
|
2
2
|
export { SemaphoreQueueFullError, SemaphoreTimeoutError };
|
|
3
3
|
export class SemaphoreManager {
|
|
4
4
|
entries = new Map();
|
|
5
|
+
/** reqId → {token, providerId} 映射,支持 kill 时按 reqId 同步释放信号量。
|
|
6
|
+
* acquire 成功(含 bypassed/排队 resolve)时存入,release 时按 token.reqId 清理。 */
|
|
7
|
+
reqTokenMap = new Map();
|
|
5
8
|
/** 全局 generation 计数器 — 每次 getOrCreate 分配唯一值,避免 disable+re-enable 后旧 token 匹配新条目 */
|
|
6
9
|
nextGeneration = 0;
|
|
7
10
|
getOrCreate(providerId) {
|
|
@@ -53,17 +56,24 @@ export class SemaphoreManager {
|
|
|
53
56
|
e.resolve();
|
|
54
57
|
}
|
|
55
58
|
}
|
|
56
|
-
|
|
59
|
+
/** 构建 token 并按 reqId 存入 reqTokenMap(统一 bypassed/direct/queued 三路径的记录逻辑) */
|
|
60
|
+
buildAndRecordToken(entry, bypassed, reqId, providerId) {
|
|
61
|
+
const token = { generation: entry.generation, bypassed, released: false, reqId };
|
|
62
|
+
if (reqId)
|
|
63
|
+
this.reqTokenMap.set(reqId, { token, providerId });
|
|
64
|
+
return token;
|
|
65
|
+
}
|
|
66
|
+
async acquire(providerId, signal, onQueued, logger, override, reqId) {
|
|
57
67
|
const entry = this.getOrCreate(providerId);
|
|
58
68
|
const maxConcurrency = override?.max_concurrency ?? entry.config.maxConcurrency;
|
|
59
69
|
const queueTimeoutMs = Math.max(0, override?.queue_timeout_ms ?? entry.config.queueTimeoutMs);
|
|
60
70
|
const maxQueueSize = Math.max(0, override?.max_queue_size ?? entry.config.maxQueueSize);
|
|
61
71
|
if (maxConcurrency === 0)
|
|
62
|
-
return
|
|
72
|
+
return this.buildAndRecordToken(entry, true, reqId, providerId);
|
|
63
73
|
if (entry.current < maxConcurrency) {
|
|
64
74
|
entry.current++;
|
|
65
75
|
logger?.debug?.({ providerId, current: entry.current, maxConcurrency, action: "acquire_direct" }, "Semaphore: acquired directly");
|
|
66
|
-
return
|
|
76
|
+
return this.buildAndRecordToken(entry, false, reqId, providerId);
|
|
67
77
|
}
|
|
68
78
|
if (entry.queue.length >= maxQueueSize) {
|
|
69
79
|
logger?.debug?.({ providerId, queueLength: entry.queue.length, maxQueueSize, action: "acquire_rejected" }, "Semaphore: queue full, rejecting");
|
|
@@ -72,9 +82,11 @@ export class SemaphoreManager {
|
|
|
72
82
|
logger?.debug?.({ providerId, current: entry.current, maxConcurrency, queueLength: entry.queue.length, action: "acquire_queued" }, "Semaphore: entering wait queue");
|
|
73
83
|
onQueued?.();
|
|
74
84
|
return new Promise((resolve, reject) => {
|
|
75
|
-
const token = { generation: entry.generation, bypassed: false };
|
|
76
85
|
const qe = {
|
|
77
86
|
resolve: () => {
|
|
87
|
+
// 关键:在真正获取槽位后才构建并记录 token。
|
|
88
|
+
// 若在 executor 创建 token 后立即记录,排队中被 kill 会误减 current。
|
|
89
|
+
const token = this.buildAndRecordToken(entry, false, reqId, providerId);
|
|
78
90
|
logger?.debug?.({ providerId, current: entry.current, maxConcurrency, queueLength: entry.queue.length, action: "acquire_resolved" }, "Semaphore: left wait queue, acquired");
|
|
79
91
|
resolve(token);
|
|
80
92
|
},
|
|
@@ -107,6 +119,17 @@ export class SemaphoreManager {
|
|
|
107
119
|
});
|
|
108
120
|
}
|
|
109
121
|
release(providerId, token, logger) {
|
|
122
|
+
if (!token)
|
|
123
|
+
return;
|
|
124
|
+
// 幂等:kill 强制释放与自然完成都走此处,已 released 则跳过(防双重递减)
|
|
125
|
+
if (token.released) {
|
|
126
|
+
logger?.debug?.({ providerId, action: "release_idempotent" }, "Semaphore: token already released, skipping");
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
token.released = true;
|
|
130
|
+
// 清理 reqTokenMap(自然完成自动回收,防 map 无限增长)
|
|
131
|
+
if (token.reqId)
|
|
132
|
+
this.reqTokenMap.delete(token.reqId);
|
|
110
133
|
const entry = this.entries.get(providerId);
|
|
111
134
|
if (!entry)
|
|
112
135
|
return;
|
|
@@ -130,6 +153,15 @@ export class SemaphoreManager {
|
|
|
130
153
|
logger?.debug?.({ providerId, current: entry.current, maxConcurrency: entry.config.maxConcurrency, action: "release_decrement" }, "Semaphore: released slot");
|
|
131
154
|
}
|
|
132
155
|
}
|
|
156
|
+
/** 按 reqId 同步释放信号量(kill 路径专用)。
|
|
157
|
+
* - 已 acquire:取 {token, providerId} 调 release(幂等)
|
|
158
|
+
* - 排队中未 acquire(map 无记录):noop,不抛错、不递减 current */
|
|
159
|
+
releaseByReqId(reqId) {
|
|
160
|
+
const record = this.reqTokenMap.get(reqId);
|
|
161
|
+
if (!record)
|
|
162
|
+
return;
|
|
163
|
+
this.release(record.providerId, record.token);
|
|
164
|
+
}
|
|
133
165
|
getStatus(providerId) {
|
|
134
166
|
const entry = this.entries.get(providerId);
|
|
135
167
|
if (!entry)
|
package/dist/core/constants.d.ts
CHANGED
|
@@ -14,5 +14,6 @@ export declare function getProxyApiType(url: string): string | null;
|
|
|
14
14
|
export declare const MS_PER_SECOND = 1000;
|
|
15
15
|
export declare const SECONDS_PER_DAY = 86400;
|
|
16
16
|
export declare const UPSTREAM_SUCCESS = 200;
|
|
17
|
+
export declare const DEFAULT_GET_TIMEOUT_MS = 30000;
|
|
17
18
|
/** 过滤掉不应转发给下游的 hop-by-hop headers */
|
|
18
19
|
export declare function filterHeaders(raw: import("./types.js").RawHeaders): Record<string, string>;
|
package/dist/core/constants.js
CHANGED
|
@@ -27,6 +27,9 @@ export const MS_PER_SECOND = 1000;
|
|
|
27
27
|
export const SECONDS_PER_DAY = 86_400;
|
|
28
28
|
// 上游成功状态码
|
|
29
29
|
export const UPSTREAM_SUCCESS = 200;
|
|
30
|
+
// callGet(admin 探测 /v1/models)默认超时:30s。
|
|
31
|
+
// 仅响应头前超时,与流式 idleTimer 无关。
|
|
32
|
+
export const DEFAULT_GET_TIMEOUT_MS = 30_000;
|
|
30
33
|
/** 过滤掉不应转发给下游的 hop-by-hop headers */
|
|
31
34
|
const SKIP_DOWNSTREAM = new Set([
|
|
32
35
|
"content-length",
|
|
@@ -35,12 +35,16 @@ export declare class RequestTracker {
|
|
|
35
35
|
readonly runtimeCollector: RuntimeCollector;
|
|
36
36
|
private readonly semaphoreManager?;
|
|
37
37
|
private adaptiveStatusProvider?;
|
|
38
|
+
/** kill 时同步释放信号量的回调(绑定到 semaphoreManager.releaseByReqId) */
|
|
39
|
+
private releaseSlotProvider?;
|
|
38
40
|
constructor(deps?: {
|
|
39
41
|
semaphoreManager?: ISemaphoreStatus;
|
|
40
42
|
runtimeCollector?: RuntimeCollector;
|
|
41
43
|
logger?: Logger;
|
|
42
44
|
});
|
|
43
45
|
setAdaptiveStatusProvider(provider: IAdaptiveStatus): void;
|
|
46
|
+
/** 注入信号量释放回调,kill 时同步释放槽位(防 kill 不释放信号量) */
|
|
47
|
+
setReleaseSlotProvider(fn: (reqId: string) => void): void;
|
|
44
48
|
start(req: ActiveRequest): void;
|
|
45
49
|
/** 轻量级节流推送:流式内容变更后 500ms 内批量广播 */
|
|
46
50
|
private scheduleStreamContentPush;
|
|
@@ -63,6 +67,8 @@ export declare class RequestTracker {
|
|
|
63
67
|
registerKillCallback(id: string, callback: () => void): void;
|
|
64
68
|
/** 主动终止指定请求。返回 true 表示成功终止,false 表示请求不存在或已完成 */
|
|
65
69
|
killRequest(id: string): boolean;
|
|
70
|
+
/** 优雅关闭时终止所有 inflight 请求,复用 kill 机制(含信号量释放 + tracker 完成) */
|
|
71
|
+
abortAllInflight(): void;
|
|
66
72
|
getStats(): StatsSnapshot;
|
|
67
73
|
getConcurrency(): ProviderConcurrencySnapshot[];
|
|
68
74
|
getRuntime(): RuntimeMetrics;
|
|
@@ -27,6 +27,8 @@ export class RequestTracker {
|
|
|
27
27
|
runtimeCollector;
|
|
28
28
|
semaphoreManager;
|
|
29
29
|
adaptiveStatusProvider;
|
|
30
|
+
/** kill 时同步释放信号量的回调(绑定到 semaphoreManager.releaseByReqId) */
|
|
31
|
+
releaseSlotProvider;
|
|
30
32
|
constructor(deps) {
|
|
31
33
|
this.semaphoreManager = deps?.semaphoreManager;
|
|
32
34
|
this.runtimeCollector = deps?.runtimeCollector ?? new RuntimeCollector();
|
|
@@ -36,6 +38,10 @@ export class RequestTracker {
|
|
|
36
38
|
setAdaptiveStatusProvider(provider) {
|
|
37
39
|
this.adaptiveStatusProvider = provider;
|
|
38
40
|
}
|
|
41
|
+
/** 注入信号量释放回调,kill 时同步释放槽位(防 kill 不释放信号量) */
|
|
42
|
+
setReleaseSlotProvider(fn) {
|
|
43
|
+
this.releaseSlotProvider = fn;
|
|
44
|
+
}
|
|
39
45
|
// --- Core methods ---
|
|
40
46
|
start(req) {
|
|
41
47
|
this.activeMap.set(req.id, { ...req });
|
|
@@ -201,12 +207,21 @@ export class RequestTracker {
|
|
|
201
207
|
this.killCallbacks.delete(id);
|
|
202
208
|
this.logger?.info?.({ reqId: id }, "Tracker: killRequest");
|
|
203
209
|
callback();
|
|
210
|
+
// 同步释放信号量槽位(releaseByReqId 幂等:未 acquire 的排队请求 noop)
|
|
211
|
+
this.releaseSlotProvider?.(id);
|
|
204
212
|
// transport 可能尚未 resolve(上游未响应时 StreamProxy 不存在),强制完成请求
|
|
205
213
|
if (this.activeMap.has(id)) {
|
|
206
214
|
this.complete(id, { status: "failed" });
|
|
207
215
|
}
|
|
208
216
|
return true;
|
|
209
217
|
}
|
|
218
|
+
/** 优雅关闭时终止所有 inflight 请求,复用 kill 机制(含信号量释放 + tracker 完成) */
|
|
219
|
+
abortAllInflight() {
|
|
220
|
+
const ids = [...this.killCallbacks.keys()];
|
|
221
|
+
for (const id of ids) {
|
|
222
|
+
this.killRequest(id);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
210
225
|
// --- Stats / monitoring ---
|
|
211
226
|
getStats() {
|
|
212
227
|
return this.statsAggregator.getStats();
|
package/dist/core/types.d.ts
CHANGED
|
@@ -91,7 +91,7 @@ export type TransportResult = {
|
|
|
91
91
|
providerId: string;
|
|
92
92
|
};
|
|
93
93
|
timeoutMs?: number;
|
|
94
|
-
abortReason?: "idle_timeout" | "client_disconnect" | "loop_detection";
|
|
94
|
+
abortReason?: "idle_timeout" | "client_disconnect" | "loop_detection" | "pipe_error";
|
|
95
95
|
} | {
|
|
96
96
|
kind: "error";
|
|
97
97
|
statusCode: number;
|
package/dist/db/providers.d.ts
CHANGED
|
@@ -24,9 +24,18 @@ export interface Provider {
|
|
|
24
24
|
created_at: string;
|
|
25
25
|
updated_at: string;
|
|
26
26
|
}
|
|
27
|
-
/** 默认流式超时
|
|
28
|
-
export declare const DEFAULT_STREAM_TIMEOUT_MS =
|
|
29
|
-
/**
|
|
27
|
+
/** 默认流式超时 5 分钟 */
|
|
28
|
+
export declare const DEFAULT_STREAM_TIMEOUT_MS = 300000;
|
|
29
|
+
/** 默认非流式超时 10 分钟 */
|
|
30
|
+
export declare const DEFAULT_NON_STREAM_TIMEOUT_MS = 600000;
|
|
31
|
+
/** 从 provider 的 models JSON 中查找指定模型的流式/非流式超时值。
|
|
32
|
+
* stream: entry.stream_timeout_ms ?? DEFAULT_STREAM_TIMEOUT_MS,0→Infinity
|
|
33
|
+
* nonStream: entry.non_stream_timeout_ms ?? DEFAULT_NON_STREAM_TIMEOUT_MS,0→Infinity */
|
|
34
|
+
export declare function getModelTimeouts(provider: Provider, backendModel: string): {
|
|
35
|
+
stream: number;
|
|
36
|
+
nonStream: number;
|
|
37
|
+
};
|
|
38
|
+
/** @deprecated 改用 getModelTimeouts。保留为薄包装以兼容现有调用方(iteration-setup 等)。 */
|
|
30
39
|
export declare function getModelStreamTimeout(provider: Provider, backendModel: string): number;
|
|
31
40
|
export declare const PROVIDER_CONCURRENCY_DEFAULTS: {
|
|
32
41
|
readonly max_concurrency: 0;
|
package/dist/db/providers.js
CHANGED
|
@@ -1,20 +1,31 @@
|
|
|
1
1
|
import { randomUUID } from "crypto";
|
|
2
2
|
import { buildUpdateQuery, deleteById } from "./helpers.js";
|
|
3
3
|
import { parseModels } from "../config/model-context.js";
|
|
4
|
-
/** 默认流式超时
|
|
5
|
-
export const DEFAULT_STREAM_TIMEOUT_MS =
|
|
6
|
-
/**
|
|
7
|
-
export
|
|
4
|
+
/** 默认流式超时 5 分钟 */
|
|
5
|
+
export const DEFAULT_STREAM_TIMEOUT_MS = 300_000;
|
|
6
|
+
/** 默认非流式超时 10 分钟 */
|
|
7
|
+
export const DEFAULT_NON_STREAM_TIMEOUT_MS = 600_000;
|
|
8
|
+
/** 0 表示禁用超时(返回 Infinity);undefined/null/未设置 使用默认值 */
|
|
9
|
+
function resolveTimeout(value, fallback) {
|
|
10
|
+
return value === 0 ? Number.POSITIVE_INFINITY : value ?? fallback;
|
|
11
|
+
}
|
|
12
|
+
/** 从 provider 的 models JSON 中查找指定模型的流式/非流式超时值。
|
|
13
|
+
* stream: entry.stream_timeout_ms ?? DEFAULT_STREAM_TIMEOUT_MS,0→Infinity
|
|
14
|
+
* nonStream: entry.non_stream_timeout_ms ?? DEFAULT_NON_STREAM_TIMEOUT_MS,0→Infinity */
|
|
15
|
+
export function getModelTimeouts(provider, backendModel) {
|
|
8
16
|
const entries = parseModels(provider.models);
|
|
9
17
|
const entry = entries.find(m => m.name === backendModel);
|
|
10
|
-
if (!entry)
|
|
11
|
-
return DEFAULT_STREAM_TIMEOUT_MS;
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
+
if (!entry) {
|
|
19
|
+
return { stream: DEFAULT_STREAM_TIMEOUT_MS, nonStream: DEFAULT_NON_STREAM_TIMEOUT_MS };
|
|
20
|
+
}
|
|
21
|
+
return {
|
|
22
|
+
stream: resolveTimeout(entry.stream_timeout_ms, DEFAULT_STREAM_TIMEOUT_MS),
|
|
23
|
+
nonStream: resolveTimeout(entry.non_stream_timeout_ms, DEFAULT_NON_STREAM_TIMEOUT_MS),
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
/** @deprecated 改用 getModelTimeouts。保留为薄包装以兼容现有调用方(iteration-setup 等)。 */
|
|
27
|
+
export function getModelStreamTimeout(provider, backendModel) {
|
|
28
|
+
return getModelTimeouts(provider, backendModel).stream;
|
|
18
29
|
}
|
|
19
30
|
export const PROVIDER_CONCURRENCY_DEFAULTS = {
|
|
20
31
|
max_concurrency: 0,
|
package/dist/index.js
CHANGED
|
@@ -69,6 +69,8 @@ export async function buildApp(options) {
|
|
|
69
69
|
const pluginRegistry = container.resolve(SERVICE_KEYS.pluginRegistry);
|
|
70
70
|
// Wire adaptive controller to tracker
|
|
71
71
|
tracker.setAdaptiveStatusProvider(adaptiveController);
|
|
72
|
+
// 绑定信号量释放回调:kill 时按 reqId 同步释放槽位(防 kill 不释放信号量)
|
|
73
|
+
tracker.setReleaseSlotProvider((reqId) => semaphoreManager.releaseByReqId(reqId));
|
|
72
74
|
// 从 DB 读取已有 provider 的并发配置,初始化信号量/adaptive/tracker 缓存
|
|
73
75
|
initializeProviderState(db, semaphoreManager, adaptiveController, tracker);
|
|
74
76
|
// Step 3: 注册 auth + proxy handlers + 构建 StateRegistry
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { resolveEndpoint } from "../routing/resolve-endpoint.js";
|
|
2
2
|
import { sanitizeHeadersForLog } from "../proxy-logging.js";
|
|
3
3
|
import { buildUpstreamHeaders, buildUpstreamUrl } from "../proxy-core.js";
|
|
4
|
-
import {
|
|
4
|
+
import { getModelTimeouts } from "../../db/providers.js";
|
|
5
5
|
import { buildTransportFn } from "../transport/transport-fn.js";
|
|
6
6
|
import { parseModels } from "../../config/model-context.js";
|
|
7
7
|
import { applyProviderPatches } from "../patch/index.js";
|
|
@@ -90,7 +90,8 @@ export function buildIterationSetup(params) {
|
|
|
90
90
|
const transportFn = buildTransportFn({
|
|
91
91
|
provider, apiKey, body: patchedBody, cliHdrs, reply, upstreamPath: effectiveUpstreamPath, apiType: effectiveApiType,
|
|
92
92
|
isStream, startTime, logId, effectiveModel: clientModel,
|
|
93
|
-
|
|
93
|
+
nonStreamTimeoutMs: getModelTimeouts(provider, resolved.backend_model).nonStream,
|
|
94
|
+
streamTimeoutMs: getModelTimeouts(provider, resolved.backend_model).stream,
|
|
94
95
|
tracker, matcher, request,
|
|
95
96
|
streamLoopEnabled, formatTransform, responseTransform, injectedHeaders,
|
|
96
97
|
timeoutContext: { modelId: resolved.backend_model, providerId: provider.id },
|
|
@@ -44,7 +44,7 @@ export interface HandleContext {
|
|
|
44
44
|
failoverThreshold?: number;
|
|
45
45
|
isFailover?: boolean;
|
|
46
46
|
ruleMatcher?: RetryRuleMatcher;
|
|
47
|
-
transportFn: (target: Target) => Promise<TransportResult>;
|
|
47
|
+
transportFn: (target: Target, signal?: AbortSignal) => Promise<TransportResult>;
|
|
48
48
|
}
|
|
49
49
|
/**
|
|
50
50
|
* 工厂函数,消除 openai/anthropic 创建 orchestrator 的重复代码。
|
|
@@ -32,11 +32,18 @@ export class ProxyOrchestrator {
|
|
|
32
32
|
async handle(request, reply, apiType, config, ctx) {
|
|
33
33
|
const providerId = config.provider.id;
|
|
34
34
|
const controller = new AbortController();
|
|
35
|
-
//
|
|
36
|
-
request.raw
|
|
37
|
-
|
|
35
|
+
// 客户端断连检测:监听 reply.raw(响应端),用 writableEnded 判断响应未完成才 abort。
|
|
36
|
+
// 旧逻辑监听 request.raw + readableEnded 对 POST 请求恒为 true(body 已读完),close 永不 abort。
|
|
37
|
+
//
|
|
38
|
+
// 注意:failover 循环会复用同一 reply 多次调用 handle(),每次都 new 一个独立的
|
|
39
|
+
// AbortController。此处必须每次都挂载新的 close listener,让该迭代的 controller
|
|
40
|
+
// 绑定到 close 事件。若用 WeakSet 去重(旧实现),迭代 2+ 的 controller 永远不
|
|
41
|
+
// 会因客户端断连 abort,导致上游连接泄漏 + Promise 永挂。controller.abort() 幂等,
|
|
42
|
+
// 多 listener 各 abort 各自的 controller互不干扰;listener 数量受 MAX_FAILOVER_ITERATIONS
|
|
43
|
+
// 上界约束(通常 ≤5),reply.raw 关闭后随对象 GC 一起回收,无永久泄漏。
|
|
44
|
+
reply.raw.on("close", () => {
|
|
45
|
+
if (!reply.raw.writableEnded)
|
|
38
46
|
controller.abort();
|
|
39
|
-
}
|
|
40
47
|
});
|
|
41
48
|
const trackerReq = this.buildActiveRequest(request, config, apiType);
|
|
42
49
|
let wasEverQueued = false;
|
|
@@ -59,8 +66,8 @@ export class ProxyOrchestrator {
|
|
|
59
66
|
trackerReq.queued = false;
|
|
60
67
|
this.deps.trackerScope.markQueued(trackerReq.id, false);
|
|
61
68
|
}
|
|
62
|
-
return this.executeResilience(config, ctx);
|
|
63
|
-
}, config.concurrencyOverride);
|
|
69
|
+
return this.executeResilience(config, ctx, controller.signal);
|
|
70
|
+
}, config.concurrencyOverride, trackerReq.id);
|
|
64
71
|
}, (result) => this.extractTrackStatus(result), (result) => result.attempts.map(a => ({
|
|
65
72
|
statusCode: a.statusCode,
|
|
66
73
|
error: a.error,
|
|
@@ -73,7 +80,10 @@ export class ProxyOrchestrator {
|
|
|
73
80
|
// 如果有重试尝试(非 throw 类型),说明 resilience 层的重试规则匹配了,
|
|
74
81
|
// 意味着这是一个"有意义的失败"——即使上游返回 200 body error 也应该计入退避
|
|
75
82
|
const retryRuleMatched = status === "failed" && result.attempts.length > 1;
|
|
76
|
-
|
|
83
|
+
// 客户端断连不计入 provider 失败统计,避免误降并发
|
|
84
|
+
if (!controller.signal.aborted) {
|
|
85
|
+
this.deps.adaptiveController?.onRequestComplete(providerId, { success: status === "completed", statusCode, retryRuleMatched, requestId: config.trackerId, wasQueued: wasEverQueued });
|
|
86
|
+
}
|
|
77
87
|
this.sendResponse(reply, result.result, ctx);
|
|
78
88
|
return result;
|
|
79
89
|
}
|
|
@@ -81,11 +91,18 @@ export class ProxyOrchestrator {
|
|
|
81
91
|
if (e instanceof ProviderSwitchNeeded) {
|
|
82
92
|
const lastResult = e.lastResult;
|
|
83
93
|
const statusCode = lastResult && "statusCode" in lastResult ? lastResult.statusCode : undefined;
|
|
84
|
-
|
|
94
|
+
// 客户端断连不计入 provider 失败统计
|
|
95
|
+
if (!controller.signal.aborted) {
|
|
96
|
+
this.deps.adaptiveController?.onRequestComplete(providerId, { success: false, statusCode, retryRuleMatched: true, requestId: config.trackerId, wasQueued: wasEverQueued });
|
|
97
|
+
}
|
|
85
98
|
}
|
|
86
99
|
else if (e instanceof SemaphoreTimeoutError || e instanceof SemaphoreQueueFullError) {
|
|
87
100
|
// 信号量超时或队列满:说明并发压力大,上报给自适应控制器
|
|
88
|
-
|
|
101
|
+
// 客户端断连触发的 acquire abort 走 AbortError 而非 SemaphoreError;
|
|
102
|
+
// queueTimeout 与断连竞态时归类为非 provider 失败更合理
|
|
103
|
+
if (!controller.signal.aborted) {
|
|
104
|
+
this.deps.adaptiveController?.onRequestComplete(providerId, { success: false, statusCode: 429, requestId: config.trackerId });
|
|
105
|
+
}
|
|
89
106
|
}
|
|
90
107
|
throw e;
|
|
91
108
|
}
|
|
@@ -111,7 +128,7 @@ export class ProxyOrchestrator {
|
|
|
111
128
|
mappingReason: config.mappingReason,
|
|
112
129
|
};
|
|
113
130
|
}
|
|
114
|
-
async executeResilience(config, ctx) {
|
|
131
|
+
async executeResilience(config, ctx, signal) {
|
|
115
132
|
if (!ctx?.transportFn)
|
|
116
133
|
throw new Error("HandleContext.transportFn is required");
|
|
117
134
|
const resilienceConfig = {
|
|
@@ -121,7 +138,7 @@ export class ProxyOrchestrator {
|
|
|
121
138
|
ruleMatcher: ctx.ruleMatcher,
|
|
122
139
|
providerId: config.provider.id,
|
|
123
140
|
};
|
|
124
|
-
return this.deps.resilience.execute(() => [config.resolved], ctx.transportFn, resilienceConfig);
|
|
141
|
+
return this.deps.resilience.execute(() => [config.resolved], ctx.transportFn, resilienceConfig, signal);
|
|
125
142
|
}
|
|
126
143
|
sendResponse(reply, result, ctx) {
|
|
127
144
|
if (result.kind === "stream_success" || result.kind === "stream_abort" || result.kind === "throw") {
|
|
@@ -56,5 +56,7 @@ export interface ResilienceState {
|
|
|
56
56
|
}
|
|
57
57
|
export declare class ResilienceLayer {
|
|
58
58
|
decide(result: TransportResult, state: ResilienceState, config: ResilienceConfig): ResilienceDecision;
|
|
59
|
-
|
|
59
|
+
/** 客户端断连短路结果(不重试、不触发 failover/adaptive 退避) */
|
|
60
|
+
private clientAbortedResult;
|
|
61
|
+
execute(targets: () => Target[], fn: (target: Target, signal?: AbortSignal) => Promise<TransportResult>, config: ResilienceConfig, signal?: AbortSignal): Promise<ResilienceResult>;
|
|
60
62
|
}
|
|
@@ -136,7 +136,16 @@ export class ResilienceLayer {
|
|
|
136
136
|
}
|
|
137
137
|
return { action: "done" };
|
|
138
138
|
}
|
|
139
|
-
|
|
139
|
+
/** 客户端断连短路结果(不重试、不触发 failover/adaptive 退避) */
|
|
140
|
+
clientAbortedResult(allAttempts, excludedTargets) {
|
|
141
|
+
return {
|
|
142
|
+
result: { kind: "throw", error: new Error("client aborted") },
|
|
143
|
+
attempts: allAttempts,
|
|
144
|
+
excludedTargets,
|
|
145
|
+
finalDecision: { action: "abort", reason: "client_aborted" },
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
async execute(targets, fn, config, signal) {
|
|
140
149
|
const allAttempts = [];
|
|
141
150
|
const excludedTargets = [];
|
|
142
151
|
const perTargetCounts = new Map();
|
|
@@ -148,6 +157,9 @@ export class ResilienceLayer {
|
|
|
148
157
|
perTargetCounts.set(targetKey(t), (perTargetCounts.get(targetKey(t)) ?? 0) + 1);
|
|
149
158
|
};
|
|
150
159
|
while (true) {
|
|
160
|
+
// 客户端断连短路:不重试、不触发 failover
|
|
161
|
+
if (signal?.aborted)
|
|
162
|
+
return this.clientAbortedResult(allAttempts, excludedTargets);
|
|
151
163
|
if (globalAttemptIndex >= (config.iterationCap ?? DEFAULT_ITERATION_CAP)) {
|
|
152
164
|
return {
|
|
153
165
|
result: lastResult ?? { kind: "error", statusCode: 502, body: "Iteration cap exceeded", headers: {}, sentHeaders: {}, sentBody: "" },
|
|
@@ -171,7 +183,7 @@ export class ResilienceLayer {
|
|
|
171
183
|
const start = Date.now();
|
|
172
184
|
let transportResult;
|
|
173
185
|
try {
|
|
174
|
-
transportResult = await fn(currentTarget);
|
|
186
|
+
transportResult = await fn(currentTarget, signal);
|
|
175
187
|
}
|
|
176
188
|
catch (err) {
|
|
177
189
|
const errMsg = err instanceof Error ? err.message : JSON.stringify(err);
|
|
@@ -211,6 +223,9 @@ export class ResilienceLayer {
|
|
|
211
223
|
case "retry":
|
|
212
224
|
globalAttemptIndex++;
|
|
213
225
|
await sleep(decision.delayMs);
|
|
226
|
+
// sleep 期间客户端可能断连,再次检查避免无效重试
|
|
227
|
+
if (signal?.aborted)
|
|
228
|
+
return this.clientAbortedResult(allAttempts, excludedTargets);
|
|
214
229
|
continue;
|
|
215
230
|
case "failover":
|
|
216
231
|
excludedTargets.push(decision.excludeTarget);
|
|
@@ -5,7 +5,7 @@ import type { ActiveRequest, AttemptSnapshot } from "../../core/monitor/types.js
|
|
|
5
5
|
export declare class SemaphoreScope {
|
|
6
6
|
private manager;
|
|
7
7
|
constructor(manager: SemaphoreManager);
|
|
8
|
-
withSlot<T>(providerId: string, signal: AbortSignal, onQueued: () => void, fn: () => Promise<T>, concurrencyOverride?: ConcurrencyOverride): Promise<T>;
|
|
8
|
+
withSlot<T>(providerId: string, signal: AbortSignal, onQueued: () => void, fn: () => Promise<T>, concurrencyOverride?: ConcurrencyOverride, reqId?: string): Promise<T>;
|
|
9
9
|
}
|
|
10
10
|
export declare class TrackerScope {
|
|
11
11
|
private tracker;
|
|
@@ -3,8 +3,8 @@ export class SemaphoreScope {
|
|
|
3
3
|
constructor(manager) {
|
|
4
4
|
this.manager = manager;
|
|
5
5
|
}
|
|
6
|
-
async withSlot(providerId, signal, onQueued, fn, concurrencyOverride) {
|
|
7
|
-
const token = await this.manager.acquire(providerId, signal, onQueued, undefined, concurrencyOverride);
|
|
6
|
+
async withSlot(providerId, signal, onQueued, fn, concurrencyOverride, reqId) {
|
|
7
|
+
const token = await this.manager.acquire(providerId, signal, onQueued, undefined, concurrencyOverride, reqId);
|
|
8
8
|
try {
|
|
9
9
|
return await fn();
|
|
10
10
|
}
|
|
@@ -1,6 +1,18 @@
|
|
|
1
1
|
import type { Agent } from "http";
|
|
2
2
|
import type { RawHeaders, TransportResult } from "../types.js";
|
|
3
3
|
export { callStream } from "./stream.js";
|
|
4
|
+
/** 非流式/流式调用通用可选项:客户端断连信号 + 上游无活动超时。 */
|
|
5
|
+
export interface TransportCallOpts {
|
|
6
|
+
signal?: AbortSignal;
|
|
7
|
+
}
|
|
8
|
+
/** callNonStream 选项:timeoutMs=0/Infinity 表示禁用超时。 */
|
|
9
|
+
export interface NonStreamCallOpts extends TransportCallOpts {
|
|
10
|
+
timeoutMs?: number;
|
|
11
|
+
}
|
|
12
|
+
/** callGet 选项:仅超时(admin 探测,无客户端 signal 关联)。 */
|
|
13
|
+
export interface GetCallOpts {
|
|
14
|
+
timeoutMs?: number;
|
|
15
|
+
}
|
|
4
16
|
export interface UpstreamRequestOptions {
|
|
5
17
|
hostname: string;
|
|
6
18
|
port: number;
|
|
@@ -15,7 +27,7 @@ export declare function buildRequestOptions(url: URL, headers: Record<string, st
|
|
|
15
27
|
export type BuildHeadersFn = (cliHdrs: RawHeaders, key: string, bytes?: number) => Record<string, string>;
|
|
16
28
|
export declare function callNonStream(backend: {
|
|
17
29
|
base_url: string;
|
|
18
|
-
}, apiKey: string, body: Record<string, unknown>, clientHeaders: RawHeaders, upstreamPath: string, buildHeaders: BuildHeadersFn, agent?: Agent): Promise<TransportResult>;
|
|
30
|
+
}, apiKey: string, body: Record<string, unknown>, clientHeaders: RawHeaders, upstreamPath: string, buildHeaders: BuildHeadersFn, agent?: Agent, opts?: NonStreamCallOpts): Promise<TransportResult>;
|
|
19
31
|
export interface GetTransportResult {
|
|
20
32
|
statusCode: number;
|
|
21
33
|
body: string;
|
|
@@ -23,4 +35,4 @@ export interface GetTransportResult {
|
|
|
23
35
|
}
|
|
24
36
|
export declare function callGet(backend: {
|
|
25
37
|
base_url: string;
|
|
26
|
-
}, apiKey: string, clientHeaders: RawHeaders, upstreamPath: string, buildHeaders: (cliHdrs: RawHeaders, key: string) => Record<string, string>, agent?: Agent): Promise<GetTransportResult>;
|
|
38
|
+
}, apiKey: string, clientHeaders: RawHeaders, upstreamPath: string, buildHeaders: (cliHdrs: RawHeaders, key: string) => Record<string, string>, agent?: Agent, opts?: GetCallOpts): Promise<GetTransportResult>;
|