llm-simple-router 1.1.2-beta.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/admin/providers.js +4 -6
- package/dist/admin/quick-setup.js +0 -2
- package/dist/app/register-routes.js +0 -2
- package/dist/config/model-context.d.ts +0 -2
- package/dist/config/model-context.js +0 -4
- package/dist/core/concurrency/semaphore.d.ts +2 -15
- package/dist/core/concurrency/semaphore.js +4 -36
- package/dist/core/constants.d.ts +0 -1
- package/dist/core/constants.js +0 -3
- package/dist/core/monitor/request-tracker.d.ts +0 -6
- package/dist/core/monitor/request-tracker.js +0 -15
- package/dist/core/types.d.ts +1 -1
- package/dist/db/providers.d.ts +3 -12
- package/dist/db/providers.js +12 -23
- package/dist/index.js +0 -2
- package/dist/proxy/handler/iteration-setup.js +2 -3
- package/dist/proxy/orchestration/orchestrator.d.ts +1 -1
- package/dist/proxy/orchestration/orchestrator.js +11 -28
- package/dist/proxy/orchestration/resilience.d.ts +1 -3
- package/dist/proxy/orchestration/resilience.js +2 -17
- package/dist/proxy/orchestration/scope.d.ts +1 -1
- package/dist/proxy/orchestration/scope.js +2 -2
- package/dist/proxy/transport/http.d.ts +2 -14
- package/dist/proxy/transport/http.js +2 -24
- package/dist/proxy/transport/stream.d.ts +3 -56
- package/dist/proxy/transport/stream.js +44 -97
- package/dist/proxy/transport/transport-fn.d.ts +1 -2
- package/dist/proxy/transport/transport-fn.js +3 -3
- package/frontend-dist/assets/AuthLayout-jELzICkx.js +1 -0
- package/frontend-dist/assets/Card-uC_v0CEa.js +1 -0
- package/frontend-dist/assets/CardContent-CP3OiCj4.js +1 -0
- package/frontend-dist/assets/CardTitle-DGxuW5DZ.js +1 -0
- package/frontend-dist/assets/CascadingModelSelect-Dzk7rxIN.js +1 -0
- package/frontend-dist/assets/Checkbox-C1aVqGdC.js +1 -0
- package/frontend-dist/assets/CollapsibleContent-pBG4UkLo.js +1 -0
- package/frontend-dist/assets/CollapsibleTrigger-D5TkgXmz.js +1 -0
- package/frontend-dist/assets/ConcurrencyControl-5GweS-rY.js +1 -0
- package/frontend-dist/assets/Dashboard-wjd3d3qk.js +3 -0
- package/frontend-dist/assets/{Input-DE4enS7w.js → Input-DMDfXTXB.js} +1 -1
- package/frontend-dist/assets/Label-BQXea0mo.js +1 -0
- package/frontend-dist/assets/Login-DNpCjxrY.js +1 -0
- package/frontend-dist/assets/Logs-BuL2Z0sF.js +1 -0
- package/frontend-dist/assets/ModelMappings-DeRhu-2N.js +1 -0
- package/frontend-dist/assets/Monitor-V30dnACo.js +1 -0
- package/frontend-dist/assets/Providers-BkkQhSTb.js +1 -0
- package/frontend-dist/assets/ProxyEnhancement-DjgebwfU.js +1 -0
- package/frontend-dist/assets/QuickSetup-BTVjEiU7.js +1 -0
- package/frontend-dist/assets/RetryRules-DFBHBG-B.js +1 -0
- package/frontend-dist/assets/RouterKeys-DZADvMfh.js +1 -0
- package/frontend-dist/assets/{RovingFocusItem-A91xgo2R.js → RovingFocusItem-5H5eE6G2.js} +1 -1
- package/frontend-dist/assets/Schedules-DUubZ2uN.js +1 -0
- package/frontend-dist/assets/Separator-BqIs_Dy3.js +1 -0
- package/frontend-dist/assets/Settings-CO59WLOZ.js +6 -0
- package/frontend-dist/assets/Setup-Br7JZKNp.js +1 -0
- package/frontend-dist/assets/Skeleton-gVyjaP-y.js +1 -0
- package/frontend-dist/assets/Switch-D1ER0j6H.js +1 -0
- package/frontend-dist/assets/TableHeader-MqyrNSsx.js +1 -0
- package/frontend-dist/assets/TabsTrigger-DdjWJbUq.js +1 -0
- package/frontend-dist/assets/UnifiedRequestDialog-DRthlI6j.js +3 -0
- package/frontend-dist/assets/{VisuallyHiddenInput-Cd7hG3YN.js → VisuallyHiddenInput-DrNFhnVL.js} +1 -1
- package/frontend-dist/assets/arrow-down-BFgGYafs.js +1 -0
- package/frontend-dist/assets/badge-Db4OYMEf.js +1 -0
- package/frontend-dist/assets/{button-BMRic68D.js → button-BBiWml8B.js} +2 -2
- package/frontend-dist/assets/chevron-right-DYwStkJr.js +1 -0
- package/frontend-dist/assets/dialog-DRYeWncC.js +1 -0
- package/frontend-dist/assets/{image-b28E-pBB.js → image-zYdpUIEA.js} +1 -1
- package/frontend-dist/assets/index-DTujoAWx.js +58 -0
- package/frontend-dist/assets/index-DyQ39g4W.css +1 -0
- package/frontend-dist/assets/model-patches-DIy-rFuq.js +1 -0
- package/frontend-dist/assets/{pencil-Bn5p7Xpp.js → pencil-C3-MFg-d.js} +1 -1
- package/frontend-dist/assets/plus-xmIDnujf.js +1 -0
- package/frontend-dist/assets/quickSetup-CqxQRMCR.js +1 -0
- package/frontend-dist/assets/quickSetup-DplqYrvf.js +1 -0
- package/frontend-dist/assets/search-BxNrTsG8.js +1 -0
- package/frontend-dist/assets/{sparkles-BkvPvXQI.js → sparkles-B5RWZZuf.js} +1 -1
- package/frontend-dist/assets/transform-domain-KBixlLXR.js +1 -0
- package/frontend-dist/assets/{trash-2-CGAE8Kz-.js → trash-2-Dn3T5-Z1.js} +1 -1
- package/frontend-dist/assets/{useClipboard-DOLc7gxm.js → useClipboard-Bx3CrPal.js} +1 -1
- package/frontend-dist/assets/useLogRetention-CdccNhYN.js +1 -0
- package/frontend-dist/assets/{useProviderGroups-wtBbdLPh.js → useProviderGroups-Og5FpCPe.js} +1 -1
- package/frontend-dist/index.html +3 -3
- package/package.json +1 -1
- package/frontend-dist/assets/AuthLayout-C1nGmLnY.js +0 -1
- package/frontend-dist/assets/Card-C04zsqNb.js +0 -1
- package/frontend-dist/assets/CardContent-pt1Tz5Eu.js +0 -1
- package/frontend-dist/assets/CardTitle-D7P2Er05.js +0 -1
- package/frontend-dist/assets/CascadingModelSelect-D1OO6jiA.js +0 -1
- package/frontend-dist/assets/Checkbox-DdHHiyon.js +0 -1
- package/frontend-dist/assets/CollapsibleContent-BkyV9lcW.js +0 -1
- package/frontend-dist/assets/CollapsibleTrigger-D6DzndqD.js +0 -1
- package/frontend-dist/assets/ConcurrencyControl-CTFSQ3LT.js +0 -1
- package/frontend-dist/assets/Dashboard-CkSNlLM8.js +0 -3
- package/frontend-dist/assets/Label-BFbbk9mm.js +0 -1
- package/frontend-dist/assets/Login-C4bqFO4q.js +0 -1
- package/frontend-dist/assets/Logs-BLAnG0F_.js +0 -1
- package/frontend-dist/assets/ModelMappings-DtRHzF3y.js +0 -1
- package/frontend-dist/assets/Monitor-BX57d4C3.js +0 -1
- package/frontend-dist/assets/Providers-BnaYMg6t.js +0 -1
- package/frontend-dist/assets/ProxyEnhancement-C2ti1nKT.js +0 -1
- package/frontend-dist/assets/QuickSetup-_2WYWs-J.js +0 -1
- package/frontend-dist/assets/RetryRules-BVU_G7_j.js +0 -1
- package/frontend-dist/assets/RouterKeys-BaP31wPX.js +0 -1
- package/frontend-dist/assets/Schedules-GGyNe1Ry.js +0 -1
- package/frontend-dist/assets/Separator-DfjkUh-8.js +0 -1
- package/frontend-dist/assets/Settings-BbKwyzRN.js +0 -6
- package/frontend-dist/assets/Setup-gKVh8Ebb.js +0 -1
- package/frontend-dist/assets/Skeleton-BoUJHwG3.js +0 -1
- package/frontend-dist/assets/Switch-BBEFeiLG.js +0 -1
- package/frontend-dist/assets/TableHeader-BcrqF2sy.js +0 -1
- package/frontend-dist/assets/TabsTrigger-B-LFjORu.js +0 -1
- package/frontend-dist/assets/UnifiedRequestDialog-CPgzOr26.js +0 -3
- package/frontend-dist/assets/arrow-down-SgVJU83o.js +0 -1
- package/frontend-dist/assets/badge-BHCmR54Q.js +0 -1
- package/frontend-dist/assets/chevron-right-B4kS73Wt.js +0 -1
- package/frontend-dist/assets/dialog-_wNvbwyb.js +0 -1
- package/frontend-dist/assets/index-C921Z_pU.js +0 -58
- package/frontend-dist/assets/index-Db9D0WPf.css +0 -1
- package/frontend-dist/assets/model-patches-CbZ0jpI7.js +0 -1
- package/frontend-dist/assets/plus-DNAK4a63.js +0 -1
- package/frontend-dist/assets/quickSetup-jgJgPUcH.js +0 -1
- package/frontend-dist/assets/quickSetup-qTjp3Z6J.js +0 -1
- package/frontend-dist/assets/search-BgJZV_3f.js +0 -1
- package/frontend-dist/assets/transform-domain-DP2JiRmo.js +0 -1
- package/frontend-dist/assets/useLogRetention-CGqqwY32.js +0 -1
package/dist/admin/providers.js
CHANGED
|
@@ -75,8 +75,6 @@ function extractModelOverrides(models) {
|
|
|
75
75
|
const entry = { name, patches: (m.patches ?? []).map(normalizePatchName) };
|
|
76
76
|
if (m.stream_timeout_ms != null)
|
|
77
77
|
entry.stream_timeout_ms = m.stream_timeout_ms;
|
|
78
|
-
if (m.non_stream_timeout_ms != null)
|
|
79
|
-
entry.non_stream_timeout_ms = m.non_stream_timeout_ms;
|
|
80
78
|
if (m.capabilities != null && Array.isArray(m.capabilities))
|
|
81
79
|
entry.capabilities = m.capabilities;
|
|
82
80
|
entries.push(entry);
|
|
@@ -126,8 +124,8 @@ const CreateProviderSchema = Type.Object({
|
|
|
126
124
|
endpoints: Type.Optional(Type.Array(EndpointSchema, { minItems: 1 })),
|
|
127
125
|
models: Type.Optional(Type.Array(Type.Union([
|
|
128
126
|
Type.String(),
|
|
129
|
-
Type.Object({ name: Type.String(), context_window: Type.Optional(Type.Number()), patches: Type.Optional(Type.Array(Type.String())), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })),
|
|
130
|
-
Type.Object({ id: Type.String(), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 }))
|
|
127
|
+
Type.Object({ name: Type.String(), context_window: Type.Optional(Type.Number()), patches: Type.Optional(Type.Array(Type.String())), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), capabilities: Type.Optional(Type.Array(Type.String())) }),
|
|
128
|
+
Type.Object({ id: Type.String(), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })) })
|
|
131
129
|
]))),
|
|
132
130
|
is_active: Type.Optional(Type.Number()),
|
|
133
131
|
max_concurrency: Type.Optional(Type.Integer({ minimum: 0 })),
|
|
@@ -148,8 +146,8 @@ const UpdateProviderSchema = Type.Object({
|
|
|
148
146
|
endpoints: Type.Optional(Type.Array(EndpointSchema, { minItems: 1 })),
|
|
149
147
|
models: Type.Optional(Type.Array(Type.Union([
|
|
150
148
|
Type.String(),
|
|
151
|
-
Type.Object({ name: Type.String(), context_window: Type.Optional(Type.Number()), patches: Type.Optional(Type.Array(Type.String())), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })),
|
|
152
|
-
Type.Object({ id: Type.String(), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 }))
|
|
149
|
+
Type.Object({ name: Type.String(), context_window: Type.Optional(Type.Number()), patches: Type.Optional(Type.Array(Type.String())), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })), capabilities: Type.Optional(Type.Array(Type.String())) }),
|
|
150
|
+
Type.Object({ id: Type.String(), stream_timeout_ms: Type.Optional(Type.Number({ minimum: 0, maximum: 86_400_000 })) })
|
|
153
151
|
]))),
|
|
154
152
|
is_active: Type.Optional(Type.Number()),
|
|
155
153
|
max_concurrency: Type.Optional(Type.Integer({ minimum: 0 })),
|
|
@@ -54,7 +54,6 @@ const QuickSetupProviderSchema = Type.Object({
|
|
|
54
54
|
context_window: Type.Optional(Type.Number()),
|
|
55
55
|
patches: Type.Optional(Type.Array(Type.String())),
|
|
56
56
|
stream_timeout_ms: Type.Optional(Type.Number()),
|
|
57
|
-
non_stream_timeout_ms: Type.Optional(Type.Number()),
|
|
58
57
|
capabilities: Type.Optional(Type.Array(Type.String())),
|
|
59
58
|
})),
|
|
60
59
|
endpoints: Type.Optional(Type.Array(QuickSetupEndpointSchema, { minItems: 1 })),
|
|
@@ -123,7 +122,6 @@ export const adminQuickSetupRoutes = (app, options, done) => {
|
|
|
123
122
|
...(m.context_window != null ? { context_window: m.context_window } : {}),
|
|
124
123
|
...(m.patches && m.patches.length > 0 ? { patches: m.patches } : {}),
|
|
125
124
|
...(m.stream_timeout_ms != null ? { stream_timeout_ms: m.stream_timeout_ms } : {}),
|
|
126
|
-
...(m.non_stream_timeout_ms != null ? { non_stream_timeout_ms: m.non_stream_timeout_ms } : {}),
|
|
127
125
|
...(m.capabilities && m.capabilities.length > 0 ? { capabilities: m.capabilities } : {}),
|
|
128
126
|
}));
|
|
129
127
|
const adaptiveEnabled = body.provider.concurrency_mode === 'auto' ? 1 : 0;
|
|
@@ -77,8 +77,6 @@ export function registerRoutes(app, opts) {
|
|
|
77
77
|
dbSizeMonitor.stop();
|
|
78
78
|
tracker.stopPushInterval();
|
|
79
79
|
tracker.closeAllClients();
|
|
80
|
-
// 先终止所有 inflight 请求(复用 kill 机制同步释放信号量),再清空信号量配置
|
|
81
|
-
tracker.abortAllInflight();
|
|
82
80
|
semaphoreManager.removeAll();
|
|
83
81
|
proxyAgentFactory.invalidateAll();
|
|
84
82
|
const sessionTracker = container.resolve(SERVICE_KEYS.sessionTracker);
|
|
@@ -3,7 +3,6 @@ export interface ModelInfo {
|
|
|
3
3
|
context_window: number | null;
|
|
4
4
|
patches: string[];
|
|
5
5
|
stream_timeout_ms?: number;
|
|
6
|
-
non_stream_timeout_ms?: number;
|
|
7
6
|
capabilities?: string[];
|
|
8
7
|
}
|
|
9
8
|
export interface ModelEntry {
|
|
@@ -11,7 +10,6 @@ export interface ModelEntry {
|
|
|
11
10
|
context_window?: number;
|
|
12
11
|
patches?: string[];
|
|
13
12
|
stream_timeout_ms?: number;
|
|
14
|
-
non_stream_timeout_ms?: number;
|
|
15
13
|
capabilities?: string[];
|
|
16
14
|
}
|
|
17
15
|
export declare const MODEL_CONTEXT_WINDOWS: Record<string, number>;
|
|
@@ -247,8 +247,6 @@ export function parseModels(raw) {
|
|
|
247
247
|
};
|
|
248
248
|
if (obj.stream_timeout_ms != null)
|
|
249
249
|
entry.stream_timeout_ms = obj.stream_timeout_ms;
|
|
250
|
-
if (obj.non_stream_timeout_ms != null)
|
|
251
|
-
entry.non_stream_timeout_ms = obj.non_stream_timeout_ms;
|
|
252
250
|
// capabilities: 显式 > model-directory > 硬编码白名单 > 默认 ["text"]
|
|
253
251
|
entry.capabilities = obj.capabilities ?? lookupCapabilities(modelName);
|
|
254
252
|
return entry;
|
|
@@ -269,8 +267,6 @@ export function buildModelInfoList(modelEntries, overrides) {
|
|
|
269
267
|
};
|
|
270
268
|
if (entry.stream_timeout_ms != null)
|
|
271
269
|
info.stream_timeout_ms = entry.stream_timeout_ms;
|
|
272
|
-
if (entry.non_stream_timeout_ms != null)
|
|
273
|
-
info.non_stream_timeout_ms = entry.non_stream_timeout_ms;
|
|
274
270
|
if (entry.capabilities != null)
|
|
275
271
|
info.capabilities = entry.capabilities;
|
|
276
272
|
return info;
|
|
@@ -6,32 +6,19 @@ export interface AcquireToken {
|
|
|
6
6
|
readonly generation: number;
|
|
7
7
|
/** acquire 时 maxConcurrency=0(不计数),release 时跳过递减 */
|
|
8
8
|
readonly bypassed: boolean;
|
|
9
|
-
/** 幂等标志:release 置 true,重复 release 直接跳过(防 kill 与自然完成双重递减) */
|
|
10
|
-
released: boolean;
|
|
11
|
-
/** 关联请求 ID,用于 releaseByReqId 反查及自然完成时清理 reqTokenMap */
|
|
12
|
-
readonly reqId?: string;
|
|
13
9
|
}
|
|
14
10
|
export declare class SemaphoreManager {
|
|
15
11
|
private readonly entries;
|
|
16
|
-
/** reqId → {token, providerId} 映射,支持 kill 时按 reqId 同步释放信号量。
|
|
17
|
-
* acquire 成功(含 bypassed/排队 resolve)时存入,release 时按 token.reqId 清理。 */
|
|
18
|
-
private readonly reqTokenMap;
|
|
19
12
|
/** 全局 generation 计数器 — 每次 getOrCreate 分配唯一值,避免 disable+re-enable 后旧 token 匹配新条目 */
|
|
20
13
|
private nextGeneration;
|
|
21
14
|
private getOrCreate;
|
|
22
15
|
updateConfig(providerId: string, config: ConcurrencyConfig): void;
|
|
23
|
-
/** 构建 token 并按 reqId 存入 reqTokenMap(统一 bypassed/direct/queued 三路径的记录逻辑) */
|
|
24
|
-
private buildAndRecordToken;
|
|
25
16
|
acquire(providerId: string, signal?: AbortSignal, onQueued?: () => void, logger?: Logger, override?: {
|
|
26
17
|
max_concurrency?: number;
|
|
27
18
|
queue_timeout_ms?: number;
|
|
28
19
|
max_queue_size?: number;
|
|
29
|
-
}
|
|
30
|
-
release(providerId: string, token: AcquireToken
|
|
31
|
-
/** 按 reqId 同步释放信号量(kill 路径专用)。
|
|
32
|
-
* - 已 acquire:取 {token, providerId} 调 release(幂等)
|
|
33
|
-
* - 排队中未 acquire(map 无记录):noop,不抛错、不递减 current */
|
|
34
|
-
releaseByReqId(reqId: string): void;
|
|
20
|
+
}): Promise<AcquireToken>;
|
|
21
|
+
release(providerId: string, token: AcquireToken, logger?: Logger): void;
|
|
35
22
|
getStatus(providerId: string): {
|
|
36
23
|
active: number;
|
|
37
24
|
queued: number;
|
|
@@ -2,9 +2,6 @@ import { SemaphoreQueueFullError, SemaphoreTimeoutError } from "../errors.js";
|
|
|
2
2
|
export { SemaphoreQueueFullError, SemaphoreTimeoutError };
|
|
3
3
|
export class SemaphoreManager {
|
|
4
4
|
entries = new Map();
|
|
5
|
-
/** reqId → {token, providerId} 映射,支持 kill 时按 reqId 同步释放信号量。
|
|
6
|
-
* acquire 成功(含 bypassed/排队 resolve)时存入,release 时按 token.reqId 清理。 */
|
|
7
|
-
reqTokenMap = new Map();
|
|
8
5
|
/** 全局 generation 计数器 — 每次 getOrCreate 分配唯一值,避免 disable+re-enable 后旧 token 匹配新条目 */
|
|
9
6
|
nextGeneration = 0;
|
|
10
7
|
getOrCreate(providerId) {
|
|
@@ -56,24 +53,17 @@ export class SemaphoreManager {
|
|
|
56
53
|
e.resolve();
|
|
57
54
|
}
|
|
58
55
|
}
|
|
59
|
-
|
|
60
|
-
buildAndRecordToken(entry, bypassed, reqId, providerId) {
|
|
61
|
-
const token = { generation: entry.generation, bypassed, released: false, reqId };
|
|
62
|
-
if (reqId)
|
|
63
|
-
this.reqTokenMap.set(reqId, { token, providerId });
|
|
64
|
-
return token;
|
|
65
|
-
}
|
|
66
|
-
async acquire(providerId, signal, onQueued, logger, override, reqId) {
|
|
56
|
+
async acquire(providerId, signal, onQueued, logger, override) {
|
|
67
57
|
const entry = this.getOrCreate(providerId);
|
|
68
58
|
const maxConcurrency = override?.max_concurrency ?? entry.config.maxConcurrency;
|
|
69
59
|
const queueTimeoutMs = Math.max(0, override?.queue_timeout_ms ?? entry.config.queueTimeoutMs);
|
|
70
60
|
const maxQueueSize = Math.max(0, override?.max_queue_size ?? entry.config.maxQueueSize);
|
|
71
61
|
if (maxConcurrency === 0)
|
|
72
|
-
return
|
|
62
|
+
return { generation: entry.generation, bypassed: true };
|
|
73
63
|
if (entry.current < maxConcurrency) {
|
|
74
64
|
entry.current++;
|
|
75
65
|
logger?.debug?.({ providerId, current: entry.current, maxConcurrency, action: "acquire_direct" }, "Semaphore: acquired directly");
|
|
76
|
-
return
|
|
66
|
+
return { generation: entry.generation, bypassed: false };
|
|
77
67
|
}
|
|
78
68
|
if (entry.queue.length >= maxQueueSize) {
|
|
79
69
|
logger?.debug?.({ providerId, queueLength: entry.queue.length, maxQueueSize, action: "acquire_rejected" }, "Semaphore: queue full, rejecting");
|
|
@@ -82,11 +72,9 @@ export class SemaphoreManager {
|
|
|
82
72
|
logger?.debug?.({ providerId, current: entry.current, maxConcurrency, queueLength: entry.queue.length, action: "acquire_queued" }, "Semaphore: entering wait queue");
|
|
83
73
|
onQueued?.();
|
|
84
74
|
return new Promise((resolve, reject) => {
|
|
75
|
+
const token = { generation: entry.generation, bypassed: false };
|
|
85
76
|
const qe = {
|
|
86
77
|
resolve: () => {
|
|
87
|
-
// 关键:在真正获取槽位后才构建并记录 token。
|
|
88
|
-
// 若在 executor 创建 token 后立即记录,排队中被 kill 会误减 current。
|
|
89
|
-
const token = this.buildAndRecordToken(entry, false, reqId, providerId);
|
|
90
78
|
logger?.debug?.({ providerId, current: entry.current, maxConcurrency, queueLength: entry.queue.length, action: "acquire_resolved" }, "Semaphore: left wait queue, acquired");
|
|
91
79
|
resolve(token);
|
|
92
80
|
},
|
|
@@ -119,17 +107,6 @@ export class SemaphoreManager {
|
|
|
119
107
|
});
|
|
120
108
|
}
|
|
121
109
|
release(providerId, token, logger) {
|
|
122
|
-
if (!token)
|
|
123
|
-
return;
|
|
124
|
-
// 幂等:kill 强制释放与自然完成都走此处,已 released 则跳过(防双重递减)
|
|
125
|
-
if (token.released) {
|
|
126
|
-
logger?.debug?.({ providerId, action: "release_idempotent" }, "Semaphore: token already released, skipping");
|
|
127
|
-
return;
|
|
128
|
-
}
|
|
129
|
-
token.released = true;
|
|
130
|
-
// 清理 reqTokenMap(自然完成自动回收,防 map 无限增长)
|
|
131
|
-
if (token.reqId)
|
|
132
|
-
this.reqTokenMap.delete(token.reqId);
|
|
133
110
|
const entry = this.entries.get(providerId);
|
|
134
111
|
if (!entry)
|
|
135
112
|
return;
|
|
@@ -153,15 +130,6 @@ export class SemaphoreManager {
|
|
|
153
130
|
logger?.debug?.({ providerId, current: entry.current, maxConcurrency: entry.config.maxConcurrency, action: "release_decrement" }, "Semaphore: released slot");
|
|
154
131
|
}
|
|
155
132
|
}
|
|
156
|
-
/** 按 reqId 同步释放信号量(kill 路径专用)。
|
|
157
|
-
* - 已 acquire:取 {token, providerId} 调 release(幂等)
|
|
158
|
-
* - 排队中未 acquire(map 无记录):noop,不抛错、不递减 current */
|
|
159
|
-
releaseByReqId(reqId) {
|
|
160
|
-
const record = this.reqTokenMap.get(reqId);
|
|
161
|
-
if (!record)
|
|
162
|
-
return;
|
|
163
|
-
this.release(record.providerId, record.token);
|
|
164
|
-
}
|
|
165
133
|
getStatus(providerId) {
|
|
166
134
|
const entry = this.entries.get(providerId);
|
|
167
135
|
if (!entry)
|
package/dist/core/constants.d.ts
CHANGED
|
@@ -14,6 +14,5 @@ export declare function getProxyApiType(url: string): string | null;
|
|
|
14
14
|
export declare const MS_PER_SECOND = 1000;
|
|
15
15
|
export declare const SECONDS_PER_DAY = 86400;
|
|
16
16
|
export declare const UPSTREAM_SUCCESS = 200;
|
|
17
|
-
export declare const DEFAULT_GET_TIMEOUT_MS = 30000;
|
|
18
17
|
/** 过滤掉不应转发给下游的 hop-by-hop headers */
|
|
19
18
|
export declare function filterHeaders(raw: import("./types.js").RawHeaders): Record<string, string>;
|
package/dist/core/constants.js
CHANGED
|
@@ -27,9 +27,6 @@ export const MS_PER_SECOND = 1000;
|
|
|
27
27
|
export const SECONDS_PER_DAY = 86_400;
|
|
28
28
|
// 上游成功状态码
|
|
29
29
|
export const UPSTREAM_SUCCESS = 200;
|
|
30
|
-
// callGet(admin 探测 /v1/models)默认超时:30s。
|
|
31
|
-
// 仅响应头前超时,与流式 idleTimer 无关。
|
|
32
|
-
export const DEFAULT_GET_TIMEOUT_MS = 30_000;
|
|
33
30
|
/** 过滤掉不应转发给下游的 hop-by-hop headers */
|
|
34
31
|
const SKIP_DOWNSTREAM = new Set([
|
|
35
32
|
"content-length",
|
|
@@ -35,16 +35,12 @@ export declare class RequestTracker {
|
|
|
35
35
|
readonly runtimeCollector: RuntimeCollector;
|
|
36
36
|
private readonly semaphoreManager?;
|
|
37
37
|
private adaptiveStatusProvider?;
|
|
38
|
-
/** kill 时同步释放信号量的回调(绑定到 semaphoreManager.releaseByReqId) */
|
|
39
|
-
private releaseSlotProvider?;
|
|
40
38
|
constructor(deps?: {
|
|
41
39
|
semaphoreManager?: ISemaphoreStatus;
|
|
42
40
|
runtimeCollector?: RuntimeCollector;
|
|
43
41
|
logger?: Logger;
|
|
44
42
|
});
|
|
45
43
|
setAdaptiveStatusProvider(provider: IAdaptiveStatus): void;
|
|
46
|
-
/** 注入信号量释放回调,kill 时同步释放槽位(防 kill 不释放信号量) */
|
|
47
|
-
setReleaseSlotProvider(fn: (reqId: string) => void): void;
|
|
48
44
|
start(req: ActiveRequest): void;
|
|
49
45
|
/** 轻量级节流推送:流式内容变更后 500ms 内批量广播 */
|
|
50
46
|
private scheduleStreamContentPush;
|
|
@@ -67,8 +63,6 @@ export declare class RequestTracker {
|
|
|
67
63
|
registerKillCallback(id: string, callback: () => void): void;
|
|
68
64
|
/** 主动终止指定请求。返回 true 表示成功终止,false 表示请求不存在或已完成 */
|
|
69
65
|
killRequest(id: string): boolean;
|
|
70
|
-
/** 优雅关闭时终止所有 inflight 请求,复用 kill 机制(含信号量释放 + tracker 完成) */
|
|
71
|
-
abortAllInflight(): void;
|
|
72
66
|
getStats(): StatsSnapshot;
|
|
73
67
|
getConcurrency(): ProviderConcurrencySnapshot[];
|
|
74
68
|
getRuntime(): RuntimeMetrics;
|
|
@@ -27,8 +27,6 @@ export class RequestTracker {
|
|
|
27
27
|
runtimeCollector;
|
|
28
28
|
semaphoreManager;
|
|
29
29
|
adaptiveStatusProvider;
|
|
30
|
-
/** kill 时同步释放信号量的回调(绑定到 semaphoreManager.releaseByReqId) */
|
|
31
|
-
releaseSlotProvider;
|
|
32
30
|
constructor(deps) {
|
|
33
31
|
this.semaphoreManager = deps?.semaphoreManager;
|
|
34
32
|
this.runtimeCollector = deps?.runtimeCollector ?? new RuntimeCollector();
|
|
@@ -38,10 +36,6 @@ export class RequestTracker {
|
|
|
38
36
|
setAdaptiveStatusProvider(provider) {
|
|
39
37
|
this.adaptiveStatusProvider = provider;
|
|
40
38
|
}
|
|
41
|
-
/** 注入信号量释放回调,kill 时同步释放槽位(防 kill 不释放信号量) */
|
|
42
|
-
setReleaseSlotProvider(fn) {
|
|
43
|
-
this.releaseSlotProvider = fn;
|
|
44
|
-
}
|
|
45
39
|
// --- Core methods ---
|
|
46
40
|
start(req) {
|
|
47
41
|
this.activeMap.set(req.id, { ...req });
|
|
@@ -207,21 +201,12 @@ export class RequestTracker {
|
|
|
207
201
|
this.killCallbacks.delete(id);
|
|
208
202
|
this.logger?.info?.({ reqId: id }, "Tracker: killRequest");
|
|
209
203
|
callback();
|
|
210
|
-
// 同步释放信号量槽位(releaseByReqId 幂等:未 acquire 的排队请求 noop)
|
|
211
|
-
this.releaseSlotProvider?.(id);
|
|
212
204
|
// transport 可能尚未 resolve(上游未响应时 StreamProxy 不存在),强制完成请求
|
|
213
205
|
if (this.activeMap.has(id)) {
|
|
214
206
|
this.complete(id, { status: "failed" });
|
|
215
207
|
}
|
|
216
208
|
return true;
|
|
217
209
|
}
|
|
218
|
-
/** 优雅关闭时终止所有 inflight 请求,复用 kill 机制(含信号量释放 + tracker 完成) */
|
|
219
|
-
abortAllInflight() {
|
|
220
|
-
const ids = [...this.killCallbacks.keys()];
|
|
221
|
-
for (const id of ids) {
|
|
222
|
-
this.killRequest(id);
|
|
223
|
-
}
|
|
224
|
-
}
|
|
225
210
|
// --- Stats / monitoring ---
|
|
226
211
|
getStats() {
|
|
227
212
|
return this.statsAggregator.getStats();
|
package/dist/core/types.d.ts
CHANGED
|
@@ -91,7 +91,7 @@ export type TransportResult = {
|
|
|
91
91
|
providerId: string;
|
|
92
92
|
};
|
|
93
93
|
timeoutMs?: number;
|
|
94
|
-
abortReason?: "idle_timeout" | "client_disconnect" | "loop_detection"
|
|
94
|
+
abortReason?: "idle_timeout" | "client_disconnect" | "loop_detection";
|
|
95
95
|
} | {
|
|
96
96
|
kind: "error";
|
|
97
97
|
statusCode: number;
|
package/dist/db/providers.d.ts
CHANGED
|
@@ -24,18 +24,9 @@ export interface Provider {
|
|
|
24
24
|
created_at: string;
|
|
25
25
|
updated_at: string;
|
|
26
26
|
}
|
|
27
|
-
/** 默认流式超时
|
|
28
|
-
export declare const DEFAULT_STREAM_TIMEOUT_MS =
|
|
29
|
-
/**
|
|
30
|
-
export declare const DEFAULT_NON_STREAM_TIMEOUT_MS = 600000;
|
|
31
|
-
/** 从 provider 的 models JSON 中查找指定模型的流式/非流式超时值。
|
|
32
|
-
* stream: entry.stream_timeout_ms ?? DEFAULT_STREAM_TIMEOUT_MS,0→Infinity
|
|
33
|
-
* nonStream: entry.non_stream_timeout_ms ?? DEFAULT_NON_STREAM_TIMEOUT_MS,0→Infinity */
|
|
34
|
-
export declare function getModelTimeouts(provider: Provider, backendModel: string): {
|
|
35
|
-
stream: number;
|
|
36
|
-
nonStream: number;
|
|
37
|
-
};
|
|
38
|
-
/** @deprecated 改用 getModelTimeouts。保留为薄包装以兼容现有调用方(iteration-setup 等)。 */
|
|
27
|
+
/** 默认流式超时 10 分钟 */
|
|
28
|
+
export declare const DEFAULT_STREAM_TIMEOUT_MS = 600000;
|
|
29
|
+
/** 从 provider 的 models JSON 中查找指定模型的超时值 */
|
|
39
30
|
export declare function getModelStreamTimeout(provider: Provider, backendModel: string): number;
|
|
40
31
|
export declare const PROVIDER_CONCURRENCY_DEFAULTS: {
|
|
41
32
|
readonly max_concurrency: 0;
|
package/dist/db/providers.js
CHANGED
|
@@ -1,31 +1,20 @@
|
|
|
1
1
|
import { randomUUID } from "crypto";
|
|
2
2
|
import { buildUpdateQuery, deleteById } from "./helpers.js";
|
|
3
3
|
import { parseModels } from "../config/model-context.js";
|
|
4
|
-
/** 默认流式超时
|
|
5
|
-
export const DEFAULT_STREAM_TIMEOUT_MS =
|
|
6
|
-
/**
|
|
7
|
-
export
|
|
8
|
-
/** 0 表示禁用超时(返回 Infinity);undefined/null/未设置 使用默认值 */
|
|
9
|
-
function resolveTimeout(value, fallback) {
|
|
10
|
-
return value === 0 ? Number.POSITIVE_INFINITY : value ?? fallback;
|
|
11
|
-
}
|
|
12
|
-
/** 从 provider 的 models JSON 中查找指定模型的流式/非流式超时值。
|
|
13
|
-
* stream: entry.stream_timeout_ms ?? DEFAULT_STREAM_TIMEOUT_MS,0→Infinity
|
|
14
|
-
* nonStream: entry.non_stream_timeout_ms ?? DEFAULT_NON_STREAM_TIMEOUT_MS,0→Infinity */
|
|
15
|
-
export function getModelTimeouts(provider, backendModel) {
|
|
4
|
+
/** 默认流式超时 10 分钟 */
|
|
5
|
+
export const DEFAULT_STREAM_TIMEOUT_MS = 600_000;
|
|
6
|
+
/** 从 provider 的 models JSON 中查找指定模型的超时值 */
|
|
7
|
+
export function getModelStreamTimeout(provider, backendModel) {
|
|
16
8
|
const entries = parseModels(provider.models);
|
|
17
9
|
const entry = entries.find(m => m.name === backendModel);
|
|
18
|
-
if (!entry)
|
|
19
|
-
return
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
/** @deprecated 改用 getModelTimeouts。保留为薄包装以兼容现有调用方(iteration-setup 等)。 */
|
|
27
|
-
export function getModelStreamTimeout(provider, backendModel) {
|
|
28
|
-
return getModelTimeouts(provider, backendModel).stream;
|
|
10
|
+
if (!entry)
|
|
11
|
+
return DEFAULT_STREAM_TIMEOUT_MS;
|
|
12
|
+
const timeout = entry.stream_timeout_ms;
|
|
13
|
+
// stream_timeout_ms: 0 表示禁用超时,返回 Infinity;
|
|
14
|
+
// undefined/null/未设置 表示使用默认值
|
|
15
|
+
if (timeout === 0)
|
|
16
|
+
return Number.POSITIVE_INFINITY;
|
|
17
|
+
return timeout ?? DEFAULT_STREAM_TIMEOUT_MS;
|
|
29
18
|
}
|
|
30
19
|
export const PROVIDER_CONCURRENCY_DEFAULTS = {
|
|
31
20
|
max_concurrency: 0,
|
package/dist/index.js
CHANGED
|
@@ -69,8 +69,6 @@ export async function buildApp(options) {
|
|
|
69
69
|
const pluginRegistry = container.resolve(SERVICE_KEYS.pluginRegistry);
|
|
70
70
|
// Wire adaptive controller to tracker
|
|
71
71
|
tracker.setAdaptiveStatusProvider(adaptiveController);
|
|
72
|
-
// 绑定信号量释放回调:kill 时按 reqId 同步释放槽位(防 kill 不释放信号量)
|
|
73
|
-
tracker.setReleaseSlotProvider((reqId) => semaphoreManager.releaseByReqId(reqId));
|
|
74
72
|
// 从 DB 读取已有 provider 的并发配置,初始化信号量/adaptive/tracker 缓存
|
|
75
73
|
initializeProviderState(db, semaphoreManager, adaptiveController, tracker);
|
|
76
74
|
// Step 3: 注册 auth + proxy handlers + 构建 StateRegistry
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { resolveEndpoint } from "../routing/resolve-endpoint.js";
|
|
2
2
|
import { sanitizeHeadersForLog } from "../proxy-logging.js";
|
|
3
3
|
import { buildUpstreamHeaders, buildUpstreamUrl } from "../proxy-core.js";
|
|
4
|
-
import {
|
|
4
|
+
import { getModelStreamTimeout } from "../../db/providers.js";
|
|
5
5
|
import { buildTransportFn } from "../transport/transport-fn.js";
|
|
6
6
|
import { parseModels } from "../../config/model-context.js";
|
|
7
7
|
import { applyProviderPatches } from "../patch/index.js";
|
|
@@ -90,8 +90,7 @@ export function buildIterationSetup(params) {
|
|
|
90
90
|
const transportFn = buildTransportFn({
|
|
91
91
|
provider, apiKey, body: patchedBody, cliHdrs, reply, upstreamPath: effectiveUpstreamPath, apiType: effectiveApiType,
|
|
92
92
|
isStream, startTime, logId, effectiveModel: clientModel,
|
|
93
|
-
|
|
94
|
-
streamTimeoutMs: getModelTimeouts(provider, resolved.backend_model).stream,
|
|
93
|
+
streamTimeoutMs: getModelStreamTimeout(provider, resolved.backend_model),
|
|
95
94
|
tracker, matcher, request,
|
|
96
95
|
streamLoopEnabled, formatTransform, responseTransform, injectedHeaders,
|
|
97
96
|
timeoutContext: { modelId: resolved.backend_model, providerId: provider.id },
|
|
@@ -44,7 +44,7 @@ export interface HandleContext {
|
|
|
44
44
|
failoverThreshold?: number;
|
|
45
45
|
isFailover?: boolean;
|
|
46
46
|
ruleMatcher?: RetryRuleMatcher;
|
|
47
|
-
transportFn: (target: Target
|
|
47
|
+
transportFn: (target: Target) => Promise<TransportResult>;
|
|
48
48
|
}
|
|
49
49
|
/**
|
|
50
50
|
* 工厂函数,消除 openai/anthropic 创建 orchestrator 的重复代码。
|
|
@@ -32,18 +32,11 @@ export class ProxyOrchestrator {
|
|
|
32
32
|
async handle(request, reply, apiType, config, ctx) {
|
|
33
33
|
const providerId = config.provider.id;
|
|
34
34
|
const controller = new AbortController();
|
|
35
|
-
//
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
// 注意:failover 循环会复用同一 reply 多次调用 handle(),每次都 new 一个独立的
|
|
39
|
-
// AbortController。此处必须每次都挂载新的 close listener,让该迭代的 controller
|
|
40
|
-
// 绑定到 close 事件。若用 WeakSet 去重(旧实现),迭代 2+ 的 controller 永远不
|
|
41
|
-
// 会因客户端断连 abort,导致上游连接泄漏 + Promise 永挂。controller.abort() 幂等,
|
|
42
|
-
// 多 listener 各 abort 各自的 controller互不干扰;listener 数量受 MAX_FAILOVER_ITERATIONS
|
|
43
|
-
// 上界约束(通常 ≤5),reply.raw 关闭后随对象 GC 一起回收,无永久泄漏。
|
|
44
|
-
reply.raw.on("close", () => {
|
|
45
|
-
if (!reply.raw.writableEnded)
|
|
35
|
+
// 客户端断连时自动 abort(保留原有行为)
|
|
36
|
+
request.raw.on("close", () => {
|
|
37
|
+
if (!request.raw.readableEnded) {
|
|
46
38
|
controller.abort();
|
|
39
|
+
}
|
|
47
40
|
});
|
|
48
41
|
const trackerReq = this.buildActiveRequest(request, config, apiType);
|
|
49
42
|
let wasEverQueued = false;
|
|
@@ -66,8 +59,8 @@ export class ProxyOrchestrator {
|
|
|
66
59
|
trackerReq.queued = false;
|
|
67
60
|
this.deps.trackerScope.markQueued(trackerReq.id, false);
|
|
68
61
|
}
|
|
69
|
-
return this.executeResilience(config, ctx
|
|
70
|
-
}, config.concurrencyOverride
|
|
62
|
+
return this.executeResilience(config, ctx);
|
|
63
|
+
}, config.concurrencyOverride);
|
|
71
64
|
}, (result) => this.extractTrackStatus(result), (result) => result.attempts.map(a => ({
|
|
72
65
|
statusCode: a.statusCode,
|
|
73
66
|
error: a.error,
|
|
@@ -80,10 +73,7 @@ export class ProxyOrchestrator {
|
|
|
80
73
|
// 如果有重试尝试(非 throw 类型),说明 resilience 层的重试规则匹配了,
|
|
81
74
|
// 意味着这是一个"有意义的失败"——即使上游返回 200 body error 也应该计入退避
|
|
82
75
|
const retryRuleMatched = status === "failed" && result.attempts.length > 1;
|
|
83
|
-
|
|
84
|
-
if (!controller.signal.aborted) {
|
|
85
|
-
this.deps.adaptiveController?.onRequestComplete(providerId, { success: status === "completed", statusCode, retryRuleMatched, requestId: config.trackerId, wasQueued: wasEverQueued });
|
|
86
|
-
}
|
|
76
|
+
this.deps.adaptiveController?.onRequestComplete(providerId, { success: status === "completed", statusCode, retryRuleMatched, requestId: config.trackerId, wasQueued: wasEverQueued });
|
|
87
77
|
this.sendResponse(reply, result.result, ctx);
|
|
88
78
|
return result;
|
|
89
79
|
}
|
|
@@ -91,18 +81,11 @@ export class ProxyOrchestrator {
|
|
|
91
81
|
if (e instanceof ProviderSwitchNeeded) {
|
|
92
82
|
const lastResult = e.lastResult;
|
|
93
83
|
const statusCode = lastResult && "statusCode" in lastResult ? lastResult.statusCode : undefined;
|
|
94
|
-
|
|
95
|
-
if (!controller.signal.aborted) {
|
|
96
|
-
this.deps.adaptiveController?.onRequestComplete(providerId, { success: false, statusCode, retryRuleMatched: true, requestId: config.trackerId, wasQueued: wasEverQueued });
|
|
97
|
-
}
|
|
84
|
+
this.deps.adaptiveController?.onRequestComplete(providerId, { success: false, statusCode, retryRuleMatched: true, requestId: config.trackerId, wasQueued: wasEverQueued });
|
|
98
85
|
}
|
|
99
86
|
else if (e instanceof SemaphoreTimeoutError || e instanceof SemaphoreQueueFullError) {
|
|
100
87
|
// 信号量超时或队列满:说明并发压力大,上报给自适应控制器
|
|
101
|
-
|
|
102
|
-
// queueTimeout 与断连竞态时归类为非 provider 失败更合理
|
|
103
|
-
if (!controller.signal.aborted) {
|
|
104
|
-
this.deps.adaptiveController?.onRequestComplete(providerId, { success: false, statusCode: 429, requestId: config.trackerId });
|
|
105
|
-
}
|
|
88
|
+
this.deps.adaptiveController?.onRequestComplete(providerId, { success: false, statusCode: 429, requestId: config.trackerId });
|
|
106
89
|
}
|
|
107
90
|
throw e;
|
|
108
91
|
}
|
|
@@ -128,7 +111,7 @@ export class ProxyOrchestrator {
|
|
|
128
111
|
mappingReason: config.mappingReason,
|
|
129
112
|
};
|
|
130
113
|
}
|
|
131
|
-
async executeResilience(config, ctx
|
|
114
|
+
async executeResilience(config, ctx) {
|
|
132
115
|
if (!ctx?.transportFn)
|
|
133
116
|
throw new Error("HandleContext.transportFn is required");
|
|
134
117
|
const resilienceConfig = {
|
|
@@ -138,7 +121,7 @@ export class ProxyOrchestrator {
|
|
|
138
121
|
ruleMatcher: ctx.ruleMatcher,
|
|
139
122
|
providerId: config.provider.id,
|
|
140
123
|
};
|
|
141
|
-
return this.deps.resilience.execute(() => [config.resolved], ctx.transportFn, resilienceConfig
|
|
124
|
+
return this.deps.resilience.execute(() => [config.resolved], ctx.transportFn, resilienceConfig);
|
|
142
125
|
}
|
|
143
126
|
sendResponse(reply, result, ctx) {
|
|
144
127
|
if (result.kind === "stream_success" || result.kind === "stream_abort" || result.kind === "throw") {
|
|
@@ -56,7 +56,5 @@ export interface ResilienceState {
|
|
|
56
56
|
}
|
|
57
57
|
export declare class ResilienceLayer {
|
|
58
58
|
decide(result: TransportResult, state: ResilienceState, config: ResilienceConfig): ResilienceDecision;
|
|
59
|
-
|
|
60
|
-
private clientAbortedResult;
|
|
61
|
-
execute(targets: () => Target[], fn: (target: Target, signal?: AbortSignal) => Promise<TransportResult>, config: ResilienceConfig, signal?: AbortSignal): Promise<ResilienceResult>;
|
|
59
|
+
execute(targets: () => Target[], fn: (target: Target) => Promise<TransportResult>, config: ResilienceConfig): Promise<ResilienceResult>;
|
|
62
60
|
}
|
|
@@ -136,16 +136,7 @@ export class ResilienceLayer {
|
|
|
136
136
|
}
|
|
137
137
|
return { action: "done" };
|
|
138
138
|
}
|
|
139
|
-
|
|
140
|
-
clientAbortedResult(allAttempts, excludedTargets) {
|
|
141
|
-
return {
|
|
142
|
-
result: { kind: "throw", error: new Error("client aborted") },
|
|
143
|
-
attempts: allAttempts,
|
|
144
|
-
excludedTargets,
|
|
145
|
-
finalDecision: { action: "abort", reason: "client_aborted" },
|
|
146
|
-
};
|
|
147
|
-
}
|
|
148
|
-
async execute(targets, fn, config, signal) {
|
|
139
|
+
async execute(targets, fn, config) {
|
|
149
140
|
const allAttempts = [];
|
|
150
141
|
const excludedTargets = [];
|
|
151
142
|
const perTargetCounts = new Map();
|
|
@@ -157,9 +148,6 @@ export class ResilienceLayer {
|
|
|
157
148
|
perTargetCounts.set(targetKey(t), (perTargetCounts.get(targetKey(t)) ?? 0) + 1);
|
|
158
149
|
};
|
|
159
150
|
while (true) {
|
|
160
|
-
// 客户端断连短路:不重试、不触发 failover
|
|
161
|
-
if (signal?.aborted)
|
|
162
|
-
return this.clientAbortedResult(allAttempts, excludedTargets);
|
|
163
151
|
if (globalAttemptIndex >= (config.iterationCap ?? DEFAULT_ITERATION_CAP)) {
|
|
164
152
|
return {
|
|
165
153
|
result: lastResult ?? { kind: "error", statusCode: 502, body: "Iteration cap exceeded", headers: {}, sentHeaders: {}, sentBody: "" },
|
|
@@ -183,7 +171,7 @@ export class ResilienceLayer {
|
|
|
183
171
|
const start = Date.now();
|
|
184
172
|
let transportResult;
|
|
185
173
|
try {
|
|
186
|
-
transportResult = await fn(currentTarget
|
|
174
|
+
transportResult = await fn(currentTarget);
|
|
187
175
|
}
|
|
188
176
|
catch (err) {
|
|
189
177
|
const errMsg = err instanceof Error ? err.message : JSON.stringify(err);
|
|
@@ -223,9 +211,6 @@ export class ResilienceLayer {
|
|
|
223
211
|
case "retry":
|
|
224
212
|
globalAttemptIndex++;
|
|
225
213
|
await sleep(decision.delayMs);
|
|
226
|
-
// sleep 期间客户端可能断连,再次检查避免无效重试
|
|
227
|
-
if (signal?.aborted)
|
|
228
|
-
return this.clientAbortedResult(allAttempts, excludedTargets);
|
|
229
214
|
continue;
|
|
230
215
|
case "failover":
|
|
231
216
|
excludedTargets.push(decision.excludeTarget);
|
|
@@ -5,7 +5,7 @@ import type { ActiveRequest, AttemptSnapshot } from "../../core/monitor/types.js
|
|
|
5
5
|
export declare class SemaphoreScope {
|
|
6
6
|
private manager;
|
|
7
7
|
constructor(manager: SemaphoreManager);
|
|
8
|
-
withSlot<T>(providerId: string, signal: AbortSignal, onQueued: () => void, fn: () => Promise<T>, concurrencyOverride?: ConcurrencyOverride
|
|
8
|
+
withSlot<T>(providerId: string, signal: AbortSignal, onQueued: () => void, fn: () => Promise<T>, concurrencyOverride?: ConcurrencyOverride): Promise<T>;
|
|
9
9
|
}
|
|
10
10
|
export declare class TrackerScope {
|
|
11
11
|
private tracker;
|
|
@@ -3,8 +3,8 @@ export class SemaphoreScope {
|
|
|
3
3
|
constructor(manager) {
|
|
4
4
|
this.manager = manager;
|
|
5
5
|
}
|
|
6
|
-
async withSlot(providerId, signal, onQueued, fn, concurrencyOverride
|
|
7
|
-
const token = await this.manager.acquire(providerId, signal, onQueued, undefined, concurrencyOverride
|
|
6
|
+
async withSlot(providerId, signal, onQueued, fn, concurrencyOverride) {
|
|
7
|
+
const token = await this.manager.acquire(providerId, signal, onQueued, undefined, concurrencyOverride);
|
|
8
8
|
try {
|
|
9
9
|
return await fn();
|
|
10
10
|
}
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
import type { Agent } from "http";
|
|
2
2
|
import type { RawHeaders, TransportResult } from "../types.js";
|
|
3
3
|
export { callStream } from "./stream.js";
|
|
4
|
-
/** 非流式/流式调用通用可选项:客户端断连信号 + 上游无活动超时。 */
|
|
5
|
-
export interface TransportCallOpts {
|
|
6
|
-
signal?: AbortSignal;
|
|
7
|
-
}
|
|
8
|
-
/** callNonStream 选项:timeoutMs=0/Infinity 表示禁用超时。 */
|
|
9
|
-
export interface NonStreamCallOpts extends TransportCallOpts {
|
|
10
|
-
timeoutMs?: number;
|
|
11
|
-
}
|
|
12
|
-
/** callGet 选项:仅超时(admin 探测,无客户端 signal 关联)。 */
|
|
13
|
-
export interface GetCallOpts {
|
|
14
|
-
timeoutMs?: number;
|
|
15
|
-
}
|
|
16
4
|
export interface UpstreamRequestOptions {
|
|
17
5
|
hostname: string;
|
|
18
6
|
port: number;
|
|
@@ -27,7 +15,7 @@ export declare function buildRequestOptions(url: URL, headers: Record<string, st
|
|
|
27
15
|
export type BuildHeadersFn = (cliHdrs: RawHeaders, key: string, bytes?: number) => Record<string, string>;
|
|
28
16
|
export declare function callNonStream(backend: {
|
|
29
17
|
base_url: string;
|
|
30
|
-
}, apiKey: string, body: Record<string, unknown>, clientHeaders: RawHeaders, upstreamPath: string, buildHeaders: BuildHeadersFn, agent?: Agent
|
|
18
|
+
}, apiKey: string, body: Record<string, unknown>, clientHeaders: RawHeaders, upstreamPath: string, buildHeaders: BuildHeadersFn, agent?: Agent): Promise<TransportResult>;
|
|
31
19
|
export interface GetTransportResult {
|
|
32
20
|
statusCode: number;
|
|
33
21
|
body: string;
|
|
@@ -35,4 +23,4 @@ export interface GetTransportResult {
|
|
|
35
23
|
}
|
|
36
24
|
export declare function callGet(backend: {
|
|
37
25
|
base_url: string;
|
|
38
|
-
}, apiKey: string, clientHeaders: RawHeaders, upstreamPath: string, buildHeaders: (cliHdrs: RawHeaders, key: string) => Record<string, string>, agent?: Agent
|
|
26
|
+
}, apiKey: string, clientHeaders: RawHeaders, upstreamPath: string, buildHeaders: (cliHdrs: RawHeaders, key: string) => Record<string, string>, agent?: Agent): Promise<GetTransportResult>;
|