llm-simple-router 0.6.4 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/recommended-retry-rules.json +1 -2
- package/dist/admin/providers.d.ts +2 -0
- package/dist/admin/providers.js +30 -1
- package/dist/admin/proxy-enhancement.js +6 -0
- package/dist/admin/routes.d.ts +2 -0
- package/dist/admin/routes.js +1 -1
- package/dist/config.d.ts +0 -2
- package/dist/config.js +0 -5
- package/dist/db/migrations/033_add_adaptive_concurrency.sql +3 -0
- package/dist/db/providers.d.ts +3 -1
- package/dist/db/providers.js +3 -3
- package/dist/index.js +15 -11
- package/dist/metrics/sse-metrics-transform.d.ts +1 -1
- package/dist/metrics/sse-metrics-transform.js +1 -2
- package/dist/monitor/request-tracker.d.ts +3 -0
- package/dist/monitor/request-tracker.js +7 -0
- package/dist/monitor/types.d.ts +2 -0
- package/dist/proxy/adaptive-controller.d.ts +42 -0
- package/dist/proxy/adaptive-controller.js +130 -0
- package/dist/proxy/anthropic.d.ts +2 -0
- package/dist/proxy/anthropic.js +2 -2
- package/dist/proxy/enhancement/enhancement-handler.d.ts +2 -1
- package/dist/proxy/enhancement/enhancement-handler.js +2 -2
- package/dist/proxy/enhancement-config.d.ts +2 -0
- package/dist/proxy/enhancement-config.js +4 -0
- package/dist/proxy/openai.d.ts +2 -0
- package/dist/proxy/openai.js +2 -2
- package/dist/proxy/orchestrator.d.ts +3 -1
- package/dist/proxy/orchestrator.js +32 -18
- package/dist/proxy/patch/deepseek/index.d.ts +1 -1
- package/dist/proxy/patch/deepseek/index.js +3 -3
- package/dist/proxy/patch/deepseek/patch-orphan-tool-results.d.ts +4 -3
- package/dist/proxy/patch/deepseek/patch-orphan-tool-results.js +30 -23
- package/dist/proxy/patch/deepseek/patch-thinking-blocks.d.ts +4 -8
- package/dist/proxy/patch/deepseek/patch-thinking-blocks.js +12 -57
- package/dist/proxy/patch/index.d.ts +3 -12
- package/dist/proxy/patch/index.js +4 -13
- package/dist/proxy/proxy-handler.js +10 -9
- package/dist/proxy/stream-proxy.js +0 -2
- package/dist/proxy/transport-fn.d.ts +1 -2
- package/dist/proxy/transport-fn.js +5 -11
- package/frontend-dist/assets/{CardContent-CpiBn1Oc.js → CardContent-jQcfCC7J.js} +1 -1
- package/frontend-dist/assets/{CardTitle-dwtgd_nl.js → CardTitle-BrCTvULL.js} +1 -1
- package/frontend-dist/assets/{CascadingModelSelect-CxEXwaeM.js → CascadingModelSelect-BFh67j5d.js} +1 -1
- package/frontend-dist/assets/{Checkbox-D2U4I-pO.js → Checkbox-Bbt7JpdE.js} +1 -1
- package/frontend-dist/assets/{CollapsibleTrigger-B2AdbZBh.js → CollapsibleTrigger-DMnEA0qC.js} +1 -1
- package/frontend-dist/assets/{Collection-BJZSFJsF.js → Collection-CVk3TPHc.js} +1 -1
- package/frontend-dist/assets/{Dashboard-D3cDhJNh.js → Dashboard-Coftbg4B.js} +1 -1
- package/frontend-dist/assets/{DialogTitle-BTuQdRm1.js → DialogTitle-BbOAZzPQ.js} +1 -1
- package/frontend-dist/assets/{Input-BYULYPCe.js → Input-DdHY9q0w.js} +1 -1
- package/frontend-dist/assets/{Label-sImW5XUw.js → Label-DRQv_Dr_.js} +1 -1
- package/frontend-dist/assets/{Login-B0kGGZFi.js → Login-SV3ctFnJ.js} +1 -1
- package/frontend-dist/assets/{Logs-BpAeeJRi.js → Logs-BG45kX6E.js} +1 -1
- package/frontend-dist/assets/{ModelMappings-CsHLYqQB.js → ModelMappings-DEaBnRU3.js} +1 -1
- package/frontend-dist/assets/Monitor-ZHOt11n-.js +1 -0
- package/frontend-dist/assets/{PopoverTrigger-CIN3yOIw.js → PopoverTrigger-z-Z3EjBk.js} +1 -1
- package/frontend-dist/assets/{PopperContent-BoOYHCag.js → PopperContent-DPC-6a3n.js} +1 -1
- package/frontend-dist/assets/Providers-DpY6pAcg.js +1 -0
- package/frontend-dist/assets/ProxyEnhancement-D6KBDXMp.js +5 -0
- package/frontend-dist/assets/{RetryRules-Ce5HfNcc.js → RetryRules-DWI7_WLZ.js} +1 -1
- package/frontend-dist/assets/{RouterKeys-CeSGvjll.js → RouterKeys-CZ1657eX.js} +1 -1
- package/frontend-dist/assets/{RovingFocusItem-gAYs0l8Z.js → RovingFocusItem-BREE2YEV.js} +1 -1
- package/frontend-dist/assets/{Schedules-DphkPAWD.js → Schedules-BVPsBRPi.js} +1 -1
- package/frontend-dist/assets/{SelectValue-Cbbd2Xbm.js → SelectValue-H8hwQwbk.js} +1 -1
- package/frontend-dist/assets/{Settings-DIP7VawX.js → Settings-DHYaYRgU.js} +1 -1
- package/frontend-dist/assets/{Setup-_d_M-Qi6.js → Setup-yOYNKkOG.js} +1 -1
- package/frontend-dist/assets/{Switch-BmmYsqAx.js → Switch-CojD3rTH.js} +1 -1
- package/frontend-dist/assets/{TableHeader-C1mpCsyo.js → TableHeader-awoHTsWN.js} +1 -1
- package/frontend-dist/assets/{TabsTrigger-kN1usMvC.js → TabsTrigger-DTKSFj85.js} +1 -1
- package/frontend-dist/assets/{Teleport-CRn-gy0B.js → Teleport-DehYAXud.js} +1 -1
- package/frontend-dist/assets/{TooltipTrigger-rlKo7E3A.js → TooltipTrigger-C2dl_dml.js} +1 -1
- package/frontend-dist/assets/UnifiedRequestDialog-BjEigSaR.css +1 -0
- package/frontend-dist/assets/UnifiedRequestDialog-C8A-uSTR.js +3 -0
- package/frontend-dist/assets/{VisuallyHidden-CQvCw9gB.js → VisuallyHidden-C8oaGi2S.js} +1 -1
- package/frontend-dist/assets/{VisuallyHiddenInput-B-DnnaWN.js → VisuallyHiddenInput-BMc813t2.js} +1 -1
- package/frontend-dist/assets/{alert-dialog-M3PhwD75.js → alert-dialog-C8TZQmU6.js} +1 -1
- package/frontend-dist/assets/arrow-down-D-cQXxau.js +1 -0
- package/frontend-dist/assets/{badge-7WIbGMsE.js → badge-BVh2WpA5.js} +1 -1
- package/frontend-dist/assets/{button-q9xTxPJh.js → button-N59D1BGa.js} +2 -2
- package/frontend-dist/assets/check-dDgrw3T3.js +1 -0
- package/frontend-dist/assets/{copy-C1a8OrYP.js → copy-DTOecxa9.js} +1 -1
- package/frontend-dist/assets/{dialog-B6RnMgGx.js → dialog-kA7AUNoc.js} +1 -1
- package/frontend-dist/assets/{file-text-CsuQUjXR.js → file-text-DzZCFO7y.js} +1 -1
- package/frontend-dist/assets/{index-Dh7qL0Qt.js → index-B5upNblU.js} +1 -1
- package/frontend-dist/assets/index-xjdbFKXJ.css +1 -0
- package/frontend-dist/assets/{lib-D1G8Xa05.js → lib-ClDokUbt.js} +1 -1
- package/frontend-dist/assets/loader-circle-DVHRL-38.js +1 -0
- package/frontend-dist/assets/{useClipboard-CBONMfzU.js → useClipboard-DU1ne-Jw.js} +1 -1
- package/frontend-dist/assets/{useFocusGuards-DwFX8o1a.js → useFocusGuards-Btmdbg_F.js} +1 -1
- package/frontend-dist/assets/useFormControl-C5Kjziuj.js +1 -0
- package/frontend-dist/assets/{useLogRetention-C2IbjXjr.js → useLogRetention--EGNWXig.js} +1 -1
- package/frontend-dist/assets/useNonce-Cp31yRzV.js +1 -0
- package/frontend-dist/assets/x-DMktsI_w.js +1 -0
- package/frontend-dist/index.html +20 -20
- package/package.json +1 -1
- package/frontend-dist/assets/Monitor-BmMFWFJg.js +0 -1
- package/frontend-dist/assets/Providers-DrepCc4A.js +0 -1
- package/frontend-dist/assets/ProxyEnhancement-dK_mOQ3m.js +0 -5
- package/frontend-dist/assets/UnifiedRequestDialog-C-Ui-fav.css +0 -1
- package/frontend-dist/assets/UnifiedRequestDialog-C7pEDa9D.js +0 -4
- package/frontend-dist/assets/arrow-down-BHq-drH-.js +0 -1
- package/frontend-dist/assets/check-CccuM1Sj.js +0 -1
- package/frontend-dist/assets/index-Ce7hFHTt.css +0 -1
- package/frontend-dist/assets/loader-circle-CVhxR0Tt.js +0 -1
- package/frontend-dist/assets/useFormControl-DC32gW1A.js +0 -1
- package/frontend-dist/assets/useNonce-t7XaR4bX.js +0 -1
- package/frontend-dist/assets/x-Bq5KcbWI.js +0 -1
|
@@ -6,6 +6,5 @@
|
|
|
6
6
|
{ "name": "ZAI 操作失败 (code 500)", "status_code": 400, "body_pattern": "\"type\"\\s*:\\s*\"error\".*\"code\"\\s*:\\s*\"500\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000 },
|
|
7
7
|
{ "name": "ZAI 速率限制 (HTTP 200, code 1302)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1302\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000 },
|
|
8
8
|
{ "name": "ZAI SSE 错误 (HTTP 200, code 500)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"500\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000 },
|
|
9
|
-
{ "name": "ZAI SSE 错误 (HTTP 200, code 1234)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1234\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000 }
|
|
10
|
-
{ "name": "ZAI 过载限流 (HTTP 200, code 1305)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1305\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000 }
|
|
9
|
+
{ "name": "ZAI SSE 错误 (HTTP 200, code 1234)", "status_code": 200, "body_pattern": "\"error\".*\"code\"\\s*:\\s*\"1234\"", "retry_strategy": "exponential", "retry_delay_ms": 5000, "max_retries": 10, "max_delay_ms": 60000 }
|
|
11
10
|
]
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
import { FastifyPluginCallback } from "fastify";
|
|
2
2
|
import Database from "better-sqlite3";
|
|
3
3
|
import { ProviderSemaphoreManager } from "../proxy/semaphore.js";
|
|
4
|
+
import type { AdaptiveConcurrencyController } from "../proxy/adaptive-controller.js";
|
|
4
5
|
import type { RequestTracker } from "../monitor/request-tracker.js";
|
|
5
6
|
interface ProviderRoutesOptions {
|
|
6
7
|
db: Database.Database;
|
|
7
8
|
semaphoreManager?: ProviderSemaphoreManager;
|
|
8
9
|
tracker?: RequestTracker;
|
|
10
|
+
adaptiveController?: AdaptiveConcurrencyController;
|
|
9
11
|
}
|
|
10
12
|
export declare const adminProviderRoutes: FastifyPluginCallback<ProviderRoutesOptions>;
|
|
11
13
|
export {};
|
package/dist/admin/providers.js
CHANGED
|
@@ -77,6 +77,7 @@ const CreateProviderSchema = Type.Object({
|
|
|
77
77
|
max_concurrency: Type.Optional(Type.Integer({ minimum: 0 })),
|
|
78
78
|
queue_timeout_ms: Type.Optional(Type.Integer({ minimum: 0 })),
|
|
79
79
|
max_queue_size: Type.Optional(Type.Integer({ minimum: 1 })),
|
|
80
|
+
adaptive_enabled: Type.Optional(Type.Integer({ minimum: 0, maximum: 1 })),
|
|
80
81
|
});
|
|
81
82
|
const UpdateProviderSchema = Type.Object({
|
|
82
83
|
name: Type.Optional(Type.String({ minLength: 1 })),
|
|
@@ -91,9 +92,10 @@ const UpdateProviderSchema = Type.Object({
|
|
|
91
92
|
max_concurrency: Type.Optional(Type.Integer({ minimum: 0 })),
|
|
92
93
|
queue_timeout_ms: Type.Optional(Type.Integer({ minimum: 0 })),
|
|
93
94
|
max_queue_size: Type.Optional(Type.Integer({ minimum: 1 })),
|
|
95
|
+
adaptive_enabled: Type.Optional(Type.Integer({ minimum: 0, maximum: 1 })),
|
|
94
96
|
});
|
|
95
97
|
export const adminProviderRoutes = (app, options, done) => {
|
|
96
|
-
const { db, semaphoreManager, tracker } = options;
|
|
98
|
+
const { db, semaphoreManager, tracker, adaptiveController } = options;
|
|
97
99
|
app.get("/admin/api/providers", async (_request, reply) => {
|
|
98
100
|
const encryptionKey = getSetting(db, "encryption_key");
|
|
99
101
|
const providers = getAllProviders(db);
|
|
@@ -111,6 +113,7 @@ export const adminProviderRoutes = (app, options, done) => {
|
|
|
111
113
|
max_concurrency: s.max_concurrency,
|
|
112
114
|
queue_timeout_ms: s.queue_timeout_ms,
|
|
113
115
|
max_queue_size: s.max_queue_size,
|
|
116
|
+
adaptive_enabled: s.adaptive_enabled,
|
|
114
117
|
concurrency_status: semaphoreManager?.getStatus(s.id) ?? { active: 0, queued: 0 },
|
|
115
118
|
created_at: s.created_at,
|
|
116
119
|
updated_at: s.updated_at,
|
|
@@ -128,6 +131,7 @@ export const adminProviderRoutes = (app, options, done) => {
|
|
|
128
131
|
}
|
|
129
132
|
const encryptedKey = encrypt(body.api_key, getSetting(db, "encryption_key"));
|
|
130
133
|
const { names: normalizedModels, overrides: contextOverrides } = extractModelOverrides((body.models ?? []));
|
|
134
|
+
const isAdaptiveEnabled = body.adaptive_enabled ?? 0;
|
|
131
135
|
const id = createProvider(db, {
|
|
132
136
|
name: body.name,
|
|
133
137
|
api_type: body.api_type,
|
|
@@ -139,6 +143,7 @@ export const adminProviderRoutes = (app, options, done) => {
|
|
|
139
143
|
max_concurrency: body.max_concurrency ?? PROVIDER_CONCURRENCY_DEFAULTS.max_concurrency,
|
|
140
144
|
queue_timeout_ms: body.queue_timeout_ms ?? PROVIDER_CONCURRENCY_DEFAULTS.queue_timeout_ms,
|
|
141
145
|
max_queue_size: body.max_queue_size ?? PROVIDER_CONCURRENCY_DEFAULTS.max_queue_size,
|
|
146
|
+
adaptive_enabled: isAdaptiveEnabled,
|
|
142
147
|
});
|
|
143
148
|
if (contextOverrides.length > 0) {
|
|
144
149
|
setModelInfoForProvider(db, id, contextOverrides.map(o => ({ model_name: o.name, context_window: o.context_window })));
|
|
@@ -148,6 +153,12 @@ export const adminProviderRoutes = (app, options, done) => {
|
|
|
148
153
|
queueTimeoutMs: body.queue_timeout_ms ?? PROVIDER_CONCURRENCY_DEFAULTS.queue_timeout_ms,
|
|
149
154
|
maxQueueSize: body.max_queue_size ?? PROVIDER_CONCURRENCY_DEFAULTS.max_queue_size,
|
|
150
155
|
});
|
|
156
|
+
adaptiveController?.syncProvider(id, {
|
|
157
|
+
adaptive_enabled: isAdaptiveEnabled,
|
|
158
|
+
max_concurrency: body.max_concurrency ?? PROVIDER_CONCURRENCY_DEFAULTS.max_concurrency,
|
|
159
|
+
queue_timeout_ms: body.queue_timeout_ms ?? PROVIDER_CONCURRENCY_DEFAULTS.queue_timeout_ms,
|
|
160
|
+
max_queue_size: body.max_queue_size ?? PROVIDER_CONCURRENCY_DEFAULTS.max_queue_size,
|
|
161
|
+
});
|
|
151
162
|
tracker?.updateProviderConfig(id, {
|
|
152
163
|
name: body.name,
|
|
153
164
|
maxConcurrency: body.max_concurrency ?? PROVIDER_CONCURRENCY_DEFAULTS.max_concurrency,
|
|
@@ -191,6 +202,8 @@ export const adminProviderRoutes = (app, options, done) => {
|
|
|
191
202
|
fields.queue_timeout_ms = body.queue_timeout_ms;
|
|
192
203
|
if (body.max_queue_size !== undefined)
|
|
193
204
|
fields.max_queue_size = body.max_queue_size;
|
|
205
|
+
if (body.adaptive_enabled !== undefined)
|
|
206
|
+
fields.adaptive_enabled = body.adaptive_enabled;
|
|
194
207
|
if (body.api_key) {
|
|
195
208
|
fields.api_key = encrypt(body.api_key, getSetting(db, "encryption_key"));
|
|
196
209
|
fields.api_key_preview = body.api_key.length > API_KEY_PREVIEW_MIN_LENGTH ? `${body.api_key.slice(0, API_KEY_PREVIEW_PREFIX_LEN)}...${body.api_key.slice(-API_KEY_PREVIEW_PREFIX_LEN)}` : "****";
|
|
@@ -208,6 +221,14 @@ export const adminProviderRoutes = (app, options, done) => {
|
|
|
208
221
|
maxQueueSize: updated.max_queue_size,
|
|
209
222
|
});
|
|
210
223
|
}
|
|
224
|
+
if (body.adaptive_enabled !== undefined || body.max_concurrency !== undefined || body.queue_timeout_ms !== undefined || body.max_queue_size !== undefined) {
|
|
225
|
+
adaptiveController?.syncProvider(id, {
|
|
226
|
+
adaptive_enabled: updated.adaptive_enabled,
|
|
227
|
+
max_concurrency: updated.max_concurrency,
|
|
228
|
+
queue_timeout_ms: updated.queue_timeout_ms,
|
|
229
|
+
max_queue_size: updated.max_queue_size,
|
|
230
|
+
});
|
|
231
|
+
}
|
|
211
232
|
tracker?.updateProviderConfig(id, {
|
|
212
233
|
name: body.name ?? existing.name,
|
|
213
234
|
maxConcurrency: updated.max_concurrency,
|
|
@@ -272,8 +293,16 @@ export const adminProviderRoutes = (app, options, done) => {
|
|
|
272
293
|
}
|
|
273
294
|
deleteProvider(db, id);
|
|
274
295
|
semaphoreManager?.remove(id);
|
|
296
|
+
adaptiveController?.remove(id);
|
|
275
297
|
tracker?.removeProviderConfig(id);
|
|
276
298
|
return reply.send({ success: true });
|
|
277
299
|
});
|
|
300
|
+
app.get("/admin/api/providers/:id/adaptive-status", async (request, reply) => {
|
|
301
|
+
const { id } = request.params;
|
|
302
|
+
const status = adaptiveController?.getStatus(id);
|
|
303
|
+
if (!status)
|
|
304
|
+
return reply.code(HTTP_NOT_FOUND).send({ error: "Not found or adaptive not enabled" });
|
|
305
|
+
return status;
|
|
306
|
+
});
|
|
278
307
|
done();
|
|
279
308
|
};
|
|
@@ -3,6 +3,8 @@ import { setSetting } from "../db/settings.js";
|
|
|
3
3
|
import { loadEnhancementConfig } from "../proxy/enhancement-config.js";
|
|
4
4
|
const UpdateProxyEnhancementSchema = Type.Object({
|
|
5
5
|
claude_code_enabled: Type.Boolean(),
|
|
6
|
+
tool_call_loop_enabled: Type.Boolean(),
|
|
7
|
+
stream_loop_enabled: Type.Boolean(),
|
|
6
8
|
});
|
|
7
9
|
const SessionParamsSchema = Type.Object({
|
|
8
10
|
keyId: Type.String(),
|
|
@@ -16,12 +18,16 @@ export const adminProxyEnhancementRoutes = (app, options, done) => {
|
|
|
16
18
|
const config = loadEnhancementConfig(db);
|
|
17
19
|
return reply.send({
|
|
18
20
|
claude_code_enabled: config.claude_code_enabled,
|
|
21
|
+
tool_call_loop_enabled: config.tool_call_loop_enabled,
|
|
22
|
+
stream_loop_enabled: config.stream_loop_enabled,
|
|
19
23
|
});
|
|
20
24
|
});
|
|
21
25
|
app.put("/admin/api/proxy-enhancement", { schema: { body: UpdateProxyEnhancementSchema } }, async (request, reply) => {
|
|
22
26
|
const body = request.body;
|
|
23
27
|
const config = {
|
|
24
28
|
claude_code_enabled: body.claude_code_enabled,
|
|
29
|
+
tool_call_loop_enabled: body.tool_call_loop_enabled,
|
|
30
|
+
stream_loop_enabled: body.stream_loop_enabled,
|
|
25
31
|
};
|
|
26
32
|
setSetting(db, "proxy_enhancement", JSON.stringify(config));
|
|
27
33
|
return reply.send({ success: true });
|
package/dist/admin/routes.d.ts
CHANGED
|
@@ -3,11 +3,13 @@ import Database from "better-sqlite3";
|
|
|
3
3
|
import { RetryRuleMatcher } from "../proxy/retry-rules.js";
|
|
4
4
|
import type { RequestTracker } from "../monitor/request-tracker.js";
|
|
5
5
|
import { ProviderSemaphoreManager } from "../proxy/semaphore.js";
|
|
6
|
+
import type { AdaptiveConcurrencyController } from "../proxy/adaptive-controller.js";
|
|
6
7
|
interface AdminRoutesOptions {
|
|
7
8
|
db: Database.Database;
|
|
8
9
|
matcher: RetryRuleMatcher | null;
|
|
9
10
|
tracker?: RequestTracker;
|
|
10
11
|
semaphoreManager?: ProviderSemaphoreManager;
|
|
12
|
+
adaptiveController?: AdaptiveConcurrencyController;
|
|
11
13
|
}
|
|
12
14
|
export declare const adminRoutes: FastifyPluginCallback<AdminRoutesOptions>;
|
|
13
15
|
export {};
|
package/dist/admin/routes.js
CHANGED
|
@@ -21,7 +21,7 @@ export const adminRoutes = (app, options, done) => {
|
|
|
21
21
|
app.register(adminSetupRoutes, { db: options.db });
|
|
22
22
|
app.register(adminAuthPlugin, { db: options.db });
|
|
23
23
|
app.register(adminLoginRoutes, { db: options.db });
|
|
24
|
-
app.register(adminProviderRoutes, { db: options.db, semaphoreManager: options.semaphoreManager, tracker: options.tracker });
|
|
24
|
+
app.register(adminProviderRoutes, { db: options.db, semaphoreManager: options.semaphoreManager, tracker: options.tracker, adaptiveController: options.adaptiveController });
|
|
25
25
|
app.register(adminMappingRoutes, { db: options.db });
|
|
26
26
|
app.register(adminGroupRoutes, { db: options.db });
|
|
27
27
|
app.register(adminScheduleRoutes, { db: options.db });
|
package/dist/config.d.ts
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import type { LoopPreventionConfig } from "./proxy/loop-prevention/types.js";
|
|
2
1
|
export interface Config {
|
|
3
2
|
PORT: number;
|
|
4
3
|
DB_PATH: string;
|
|
@@ -6,7 +5,6 @@ export interface Config {
|
|
|
6
5
|
TZ: string;
|
|
7
6
|
STREAM_TIMEOUT_MS: number;
|
|
8
7
|
RETRY_BASE_DELAY_MS: number;
|
|
9
|
-
LOOP_PREVENTION: LoopPreventionConfig;
|
|
10
8
|
}
|
|
11
9
|
export declare function resetConfig(): void;
|
|
12
10
|
export declare function getBaseConfig(): Config;
|
package/dist/config.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { homedir } from "node:os";
|
|
2
2
|
import { join } from "node:path";
|
|
3
|
-
import { DEFAULT_LOOP_PREVENTION_CONFIG } from "./proxy/loop-prevention/types.js";
|
|
4
3
|
let cachedConfig = null;
|
|
5
4
|
function getDefaultDbPath() {
|
|
6
5
|
if (process.env.DB_PATH)
|
|
@@ -20,10 +19,6 @@ export function getBaseConfig() {
|
|
|
20
19
|
TZ: process.env.TZ || "Asia/Shanghai",
|
|
21
20
|
STREAM_TIMEOUT_MS: parseInt(process.env.STREAM_TIMEOUT_MS || "3000000", 10),
|
|
22
21
|
RETRY_BASE_DELAY_MS: parseInt(process.env.RETRY_BASE_DELAY_MS || "1000", 10),
|
|
23
|
-
LOOP_PREVENTION: {
|
|
24
|
-
...DEFAULT_LOOP_PREVENTION_CONFIG,
|
|
25
|
-
...(process.env.LOOP_PREVENTION ? JSON.parse(process.env.LOOP_PREVENTION) : {}),
|
|
26
|
-
},
|
|
27
22
|
};
|
|
28
23
|
return cachedConfig;
|
|
29
24
|
}
|
package/dist/db/providers.d.ts
CHANGED
|
@@ -11,6 +11,7 @@ export interface Provider {
|
|
|
11
11
|
max_concurrency: number;
|
|
12
12
|
queue_timeout_ms: number;
|
|
13
13
|
max_queue_size: number;
|
|
14
|
+
adaptive_enabled: number;
|
|
14
15
|
created_at: string;
|
|
15
16
|
updated_at: string;
|
|
16
17
|
}
|
|
@@ -33,8 +34,9 @@ export declare function createProvider(db: Database.Database, provider: {
|
|
|
33
34
|
max_concurrency?: number;
|
|
34
35
|
queue_timeout_ms?: number;
|
|
35
36
|
max_queue_size?: number;
|
|
37
|
+
adaptive_enabled?: number;
|
|
36
38
|
}): string;
|
|
37
|
-
export declare function updateProvider(db: Database.Database, id: string, fields: Partial<Pick<Provider, "name" | "api_type" | "base_url" | "api_key" | "api_key_preview" | "models" | "is_active" | "max_concurrency" | "queue_timeout_ms" | "max_queue_size">>): void;
|
|
39
|
+
export declare function updateProvider(db: Database.Database, id: string, fields: Partial<Pick<Provider, "name" | "api_type" | "base_url" | "api_key" | "api_key_preview" | "models" | "is_active" | "max_concurrency" | "queue_timeout_ms" | "max_queue_size" | "adaptive_enabled">>): void;
|
|
38
40
|
export declare function deleteProvider(db: Database.Database, id: string): void;
|
|
39
41
|
export declare function getActiveProviderByName(db: Database.Database, name: string): {
|
|
40
42
|
id: string;
|
package/dist/db/providers.js
CHANGED
|
@@ -6,7 +6,7 @@ export const PROVIDER_CONCURRENCY_DEFAULTS = {
|
|
|
6
6
|
max_queue_size: 100,
|
|
7
7
|
};
|
|
8
8
|
const PROVIDER_FIELDS = new Set([
|
|
9
|
-
"name", "api_type", "base_url", "api_key", "api_key_preview", "models", "is_active", "max_concurrency", "queue_timeout_ms", "max_queue_size",
|
|
9
|
+
"name", "api_type", "base_url", "api_key", "api_key_preview", "models", "is_active", "max_concurrency", "queue_timeout_ms", "max_queue_size", "adaptive_enabled",
|
|
10
10
|
]);
|
|
11
11
|
export function getActiveProviders(db, apiType) {
|
|
12
12
|
return db
|
|
@@ -22,8 +22,8 @@ export function getProviderById(db, id) {
|
|
|
22
22
|
export function createProvider(db, provider) {
|
|
23
23
|
const id = randomUUID();
|
|
24
24
|
const now = new Date().toISOString();
|
|
25
|
-
db.prepare(`INSERT INTO providers (id, name, api_type, base_url, api_key, api_key_preview, models, is_active, max_concurrency, queue_timeout_ms, max_queue_size, created_at, updated_at)
|
|
26
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(id, provider.name, provider.api_type, provider.base_url, provider.api_key, provider.api_key_preview ?? null, provider.models ?? "[]", provider.is_active ?? 1, provider.max_concurrency ?? PROVIDER_CONCURRENCY_DEFAULTS.max_concurrency, provider.queue_timeout_ms ?? PROVIDER_CONCURRENCY_DEFAULTS.queue_timeout_ms, provider.max_queue_size ?? PROVIDER_CONCURRENCY_DEFAULTS.max_queue_size, now, now);
|
|
25
|
+
db.prepare(`INSERT INTO providers (id, name, api_type, base_url, api_key, api_key_preview, models, is_active, max_concurrency, queue_timeout_ms, max_queue_size, adaptive_enabled, created_at, updated_at)
|
|
26
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(id, provider.name, provider.api_type, provider.base_url, provider.api_key, provider.api_key_preview ?? null, provider.models ?? "[]", provider.is_active ?? 1, provider.max_concurrency ?? PROVIDER_CONCURRENCY_DEFAULTS.max_concurrency, provider.queue_timeout_ms ?? PROVIDER_CONCURRENCY_DEFAULTS.queue_timeout_ms, provider.max_queue_size ?? PROVIDER_CONCURRENCY_DEFAULTS.max_queue_size, provider.adaptive_enabled ?? 0, now, now);
|
|
27
27
|
return id;
|
|
28
28
|
}
|
|
29
29
|
export function updateProvider(db, id, fields) {
|
package/dist/index.js
CHANGED
|
@@ -20,12 +20,12 @@ import { anthropicProxy } from "./proxy/anthropic.js";
|
|
|
20
20
|
import { adminRoutes } from "./admin/routes.js";
|
|
21
21
|
import { RetryRuleMatcher } from "./proxy/retry-rules.js";
|
|
22
22
|
import { ProviderSemaphoreManager } from "./proxy/semaphore.js";
|
|
23
|
+
import { AdaptiveConcurrencyController } from "./proxy/adaptive-controller.js";
|
|
23
24
|
import { RequestTracker } from "./monitor/request-tracker.js";
|
|
24
25
|
import { modelState } from "./proxy/model-state.js";
|
|
25
26
|
import { UsageWindowTracker } from "./proxy/usage-window-tracker.js";
|
|
26
27
|
import { SessionTracker } from "./proxy/loop-prevention/session-tracker.js";
|
|
27
28
|
import { DEFAULT_LOOP_PREVENTION_CONFIG } from "./proxy/loop-prevention/types.js";
|
|
28
|
-
import { setLoopPreventionConfig } from "./proxy/transport-fn.js";
|
|
29
29
|
import { scheduleLogCleanup } from "./db/log-cleaner.js";
|
|
30
30
|
import { scheduleDbSizeMonitor } from "./db/db-size-monitor.js";
|
|
31
31
|
import { startUpgradeChecker, stopUpgradeChecker } from "./admin/upgrade.js";
|
|
@@ -156,21 +156,23 @@ export async function buildApp(options) {
|
|
|
156
156
|
const semaphoreManager = new ProviderSemaphoreManager();
|
|
157
157
|
const tracker = new RequestTracker({ semaphoreManager, logger: app.log });
|
|
158
158
|
tracker.startPushInterval();
|
|
159
|
+
const adaptiveController = new AdaptiveConcurrencyController(semaphoreManager, app.log);
|
|
160
|
+
tracker.setAdaptiveController(adaptiveController);
|
|
159
161
|
// 5h 用量窗口追踪器,启动时自动补齐缺失窗口
|
|
160
162
|
const usageWindowTracker = new UsageWindowTracker(db);
|
|
161
163
|
usageWindowTracker.reconcileOnStartup();
|
|
162
|
-
|
|
163
|
-
const sessionTracker = new SessionTracker(
|
|
164
|
-
// buildApp() 默认启用循环预防。
|
|
165
|
-
// 用户可通过环境变量 LOOP_PREVENTION='{"enabled":false}' 关闭。
|
|
166
|
-
// 直接注册插件的测试不使用 buildApp(),不受此影响。
|
|
167
|
-
setLoopPreventionConfig(process.env.LOOP_PREVENTION
|
|
168
|
-
? loopConfig
|
|
169
|
-
: { ...loopConfig, enabled: true });
|
|
164
|
+
// Session tracker(工具调用循环检测用),始终创建但检测受 proxy_enhancement 配置控制
|
|
165
|
+
const sessionTracker = new SessionTracker(DEFAULT_LOOP_PREVENTION_CONFIG.sessionTracker);
|
|
170
166
|
// 从 DB 读取已有 provider 的并发配置,初始化信号量管理器和 tracker
|
|
171
167
|
const allProviders = getAllProviders(db);
|
|
172
168
|
for (const p of allProviders) {
|
|
173
|
-
if (p.
|
|
169
|
+
if (p.adaptive_enabled) {
|
|
170
|
+
adaptiveController.init(p.id, { max: p.max_concurrency }, {
|
|
171
|
+
queueTimeoutMs: p.queue_timeout_ms,
|
|
172
|
+
maxQueueSize: p.max_queue_size,
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
else if (p.max_concurrency > 0) {
|
|
174
176
|
semaphoreManager.updateConfig(p.id, {
|
|
175
177
|
maxConcurrency: p.max_concurrency,
|
|
176
178
|
queueTimeoutMs: p.queue_timeout_ms,
|
|
@@ -194,6 +196,7 @@ export async function buildApp(options) {
|
|
|
194
196
|
tracker,
|
|
195
197
|
usageWindowTracker,
|
|
196
198
|
sessionTracker,
|
|
199
|
+
adaptiveController,
|
|
197
200
|
});
|
|
198
201
|
app.register(anthropicProxy, {
|
|
199
202
|
db,
|
|
@@ -204,8 +207,9 @@ export async function buildApp(options) {
|
|
|
204
207
|
tracker,
|
|
205
208
|
usageWindowTracker,
|
|
206
209
|
sessionTracker,
|
|
210
|
+
adaptiveController,
|
|
207
211
|
});
|
|
208
|
-
app.register(adminRoutes, { db, matcher, tracker, semaphoreManager });
|
|
212
|
+
app.register(adminRoutes, { db, matcher, tracker, semaphoreManager, adaptiveController });
|
|
209
213
|
// 前端静态文件服务(生产环境)
|
|
210
214
|
const frontendDist = path.resolve(process.env.FRONTEND_DIST || path.join(__dirname, "../frontend-dist"));
|
|
211
215
|
if (existsSync(frontendDist)) {
|
|
@@ -31,7 +31,7 @@ export declare class SSEMetricsTransform extends Transform {
|
|
|
31
31
|
_transform(chunk: Buffer, _encoding: BufferEncoding, callback: TransformCallback): void;
|
|
32
32
|
_flush(callback: TransformCallback): void;
|
|
33
33
|
getExtractor(): MetricsExtractor;
|
|
34
|
-
/**
|
|
34
|
+
/** 从 SSE 事件中提取内容文本,触发 onContentDelta 回调 */
|
|
35
35
|
private emitContentDelta;
|
|
36
36
|
/**
|
|
37
37
|
* 从 SSE data 字段中提取实际内容文本(thinking / text / tool JSON delta)。
|
|
@@ -47,7 +47,6 @@ export class SSEMetricsTransform extends Transform {
|
|
|
47
47
|
this.extractor.processEvent(event);
|
|
48
48
|
this.emitContentDelta(event);
|
|
49
49
|
}
|
|
50
|
-
// flush 无条件推送最终状态,确保消费者能拿到完整指标
|
|
51
50
|
if (this.onMetrics && !this.flushed) {
|
|
52
51
|
this.flushed = true;
|
|
53
52
|
this.lastCallbackTime = Date.now();
|
|
@@ -58,7 +57,7 @@ export class SSEMetricsTransform extends Transform {
|
|
|
58
57
|
getExtractor() {
|
|
59
58
|
return this.extractor;
|
|
60
59
|
}
|
|
61
|
-
/**
|
|
60
|
+
/** 从 SSE 事件中提取内容文本,触发 onContentDelta 回调 */
|
|
62
61
|
emitContentDelta(event) {
|
|
63
62
|
if (!this.onContentDelta || !event.data)
|
|
64
63
|
return;
|
|
@@ -2,6 +2,7 @@ import type { ServerResponse } from "node:http";
|
|
|
2
2
|
import { StatsAggregator } from "./stats-aggregator.js";
|
|
3
3
|
import { RuntimeCollector } from "./runtime-collector.js";
|
|
4
4
|
import type { ProviderSemaphoreManager } from "../proxy/semaphore.js";
|
|
5
|
+
import type { AdaptiveConcurrencyController } from "../proxy/adaptive-controller.js";
|
|
5
6
|
import type { ActiveRequest, AttemptSnapshot, ProviderConcurrencySnapshot, RuntimeMetrics, StatsSnapshot } from "./types.js";
|
|
6
7
|
export interface TrackerLogger {
|
|
7
8
|
debug(obj: Record<string, unknown>, msg: string): void;
|
|
@@ -22,11 +23,13 @@ export declare class RequestTracker {
|
|
|
22
23
|
readonly statsAggregator: StatsAggregator;
|
|
23
24
|
readonly runtimeCollector: RuntimeCollector;
|
|
24
25
|
private readonly semaphoreManager?;
|
|
26
|
+
private adaptiveController?;
|
|
25
27
|
constructor(deps?: {
|
|
26
28
|
semaphoreManager?: ProviderSemaphoreManager;
|
|
27
29
|
runtimeCollector?: RuntimeCollector;
|
|
28
30
|
logger?: TrackerLogger;
|
|
29
31
|
});
|
|
32
|
+
setAdaptiveController(ctrl: AdaptiveConcurrencyController): void;
|
|
30
33
|
start(req: ActiveRequest): void;
|
|
31
34
|
/** 轻量级节流推送:流式内容变更后 500ms 内批量广播 */
|
|
32
35
|
private scheduleStreamContentPush;
|
|
@@ -22,12 +22,16 @@ export class RequestTracker {
|
|
|
22
22
|
statsAggregator;
|
|
23
23
|
runtimeCollector;
|
|
24
24
|
semaphoreManager;
|
|
25
|
+
adaptiveController;
|
|
25
26
|
constructor(deps) {
|
|
26
27
|
this.semaphoreManager = deps?.semaphoreManager;
|
|
27
28
|
this.runtimeCollector = deps?.runtimeCollector ?? new RuntimeCollector();
|
|
28
29
|
this.statsAggregator = new StatsAggregator();
|
|
29
30
|
this.logger = deps?.logger;
|
|
30
31
|
}
|
|
32
|
+
setAdaptiveController(ctrl) {
|
|
33
|
+
this.adaptiveController = ctrl;
|
|
34
|
+
}
|
|
31
35
|
// --- Core methods ---
|
|
32
36
|
start(req) {
|
|
33
37
|
this.activeMap.set(req.id, { ...req });
|
|
@@ -142,6 +146,7 @@ export class RequestTracker {
|
|
|
142
146
|
const result = [];
|
|
143
147
|
for (const [providerId, config] of this.providerConfigCache) {
|
|
144
148
|
const status = this.semaphoreManager.getStatus(providerId);
|
|
149
|
+
const adaptiveState = this.adaptiveController?.getStatus(providerId);
|
|
145
150
|
result.push({
|
|
146
151
|
providerId,
|
|
147
152
|
providerName: config.name,
|
|
@@ -150,6 +155,8 @@ export class RequestTracker {
|
|
|
150
155
|
queued: status.queued,
|
|
151
156
|
queueTimeoutMs: config.queueTimeoutMs,
|
|
152
157
|
maxQueueSize: config.maxQueueSize,
|
|
158
|
+
adaptiveEnabled: adaptiveState !== undefined,
|
|
159
|
+
adaptiveLimit: adaptiveState?.currentLimit,
|
|
153
160
|
});
|
|
154
161
|
}
|
|
155
162
|
return result;
|
package/dist/monitor/types.d.ts
CHANGED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type { ProviderSemaphoreManager } from "./semaphore.js";
|
|
2
|
+
export interface AdaptiveState {
|
|
3
|
+
currentLimit: number;
|
|
4
|
+
probeActive: boolean;
|
|
5
|
+
consecutiveSuccesses: number;
|
|
6
|
+
consecutiveFailures: number;
|
|
7
|
+
cooldownUntil: number;
|
|
8
|
+
}
|
|
9
|
+
interface AdaptiveResult {
|
|
10
|
+
success: boolean;
|
|
11
|
+
statusCode?: number;
|
|
12
|
+
}
|
|
13
|
+
export interface ProviderAdaptiveConfig {
|
|
14
|
+
adaptive_enabled: number;
|
|
15
|
+
max_concurrency: number;
|
|
16
|
+
queue_timeout_ms: number;
|
|
17
|
+
max_queue_size: number;
|
|
18
|
+
}
|
|
19
|
+
export interface ControllerLogger {
|
|
20
|
+
debug(obj: Record<string, unknown>, msg: string): void;
|
|
21
|
+
warn(obj: Record<string, unknown>, msg: string): void;
|
|
22
|
+
}
|
|
23
|
+
export declare class AdaptiveConcurrencyController {
|
|
24
|
+
private semaphoreManager;
|
|
25
|
+
private logger?;
|
|
26
|
+
private readonly entries;
|
|
27
|
+
constructor(semaphoreManager: ProviderSemaphoreManager, logger?: ControllerLogger | undefined);
|
|
28
|
+
init(providerId: string, config: {
|
|
29
|
+
max: number;
|
|
30
|
+
}, semParams: {
|
|
31
|
+
queueTimeoutMs: number;
|
|
32
|
+
maxQueueSize: number;
|
|
33
|
+
}): void;
|
|
34
|
+
remove(providerId: string): void;
|
|
35
|
+
onRequestComplete(providerId: string, result: AdaptiveResult): void;
|
|
36
|
+
getStatus(providerId: string): AdaptiveState | undefined;
|
|
37
|
+
syncProvider(providerId: string, p: ProviderAdaptiveConfig): void;
|
|
38
|
+
private transitionSuccess;
|
|
39
|
+
private transitionFailure;
|
|
40
|
+
private syncToSemaphore;
|
|
41
|
+
}
|
|
42
|
+
export {};
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
const SUCCESS_THRESHOLD = 3;
|
|
2
|
+
const FAILURE_THRESHOLD = 3;
|
|
3
|
+
const DECREASE_STEP = 2;
|
|
4
|
+
const COOLDOWN_MS = 30_000;
|
|
5
|
+
const RATE_LIMIT_STATUS = 429;
|
|
6
|
+
const HALF_DIVISOR = 2;
|
|
7
|
+
const ADAPTIVE_MIN = 1;
|
|
8
|
+
export class AdaptiveConcurrencyController {
|
|
9
|
+
semaphoreManager;
|
|
10
|
+
logger;
|
|
11
|
+
entries = new Map();
|
|
12
|
+
constructor(semaphoreManager, logger) {
|
|
13
|
+
this.semaphoreManager = semaphoreManager;
|
|
14
|
+
this.logger = logger;
|
|
15
|
+
}
|
|
16
|
+
init(providerId, config, semParams) {
|
|
17
|
+
this.entries.set(providerId, {
|
|
18
|
+
state: {
|
|
19
|
+
currentLimit: ADAPTIVE_MIN,
|
|
20
|
+
probeActive: false,
|
|
21
|
+
consecutiveSuccesses: 0,
|
|
22
|
+
consecutiveFailures: 0,
|
|
23
|
+
cooldownUntil: 0,
|
|
24
|
+
},
|
|
25
|
+
max: config.max,
|
|
26
|
+
queueTimeoutMs: semParams.queueTimeoutMs,
|
|
27
|
+
maxQueueSize: semParams.maxQueueSize,
|
|
28
|
+
});
|
|
29
|
+
this.syncToSemaphore(providerId);
|
|
30
|
+
}
|
|
31
|
+
remove(providerId) {
|
|
32
|
+
this.entries.delete(providerId);
|
|
33
|
+
}
|
|
34
|
+
onRequestComplete(providerId, result) {
|
|
35
|
+
const entry = this.entries.get(providerId);
|
|
36
|
+
if (!entry)
|
|
37
|
+
return;
|
|
38
|
+
if (result.success) {
|
|
39
|
+
this.transitionSuccess(providerId, entry);
|
|
40
|
+
}
|
|
41
|
+
else {
|
|
42
|
+
this.transitionFailure(providerId, entry, result.statusCode);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
getStatus(providerId) {
|
|
46
|
+
return this.entries.get(providerId)?.state;
|
|
47
|
+
}
|
|
48
|
+
syncProvider(providerId, p) {
|
|
49
|
+
if (p.adaptive_enabled) {
|
|
50
|
+
const existing = this.entries.get(providerId);
|
|
51
|
+
if (existing) {
|
|
52
|
+
existing.max = p.max_concurrency;
|
|
53
|
+
existing.queueTimeoutMs = p.queue_timeout_ms;
|
|
54
|
+
existing.maxQueueSize = p.max_queue_size;
|
|
55
|
+
existing.state.currentLimit = Math.min(Math.max(existing.state.currentLimit, ADAPTIVE_MIN), existing.max);
|
|
56
|
+
this.syncToSemaphore(providerId);
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
this.init(providerId, { max: p.max_concurrency }, {
|
|
60
|
+
queueTimeoutMs: p.queue_timeout_ms, maxQueueSize: p.max_queue_size,
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
else {
|
|
65
|
+
this.remove(providerId);
|
|
66
|
+
// 禁用自适应后恢复信号量到原始 max_concurrency
|
|
67
|
+
this.semaphoreManager.updateConfig(providerId, {
|
|
68
|
+
maxConcurrency: p.max_concurrency,
|
|
69
|
+
queueTimeoutMs: p.queue_timeout_ms,
|
|
70
|
+
maxQueueSize: p.max_queue_size,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
transitionSuccess(providerId, entry) {
|
|
75
|
+
const s = entry.state;
|
|
76
|
+
s.consecutiveSuccesses++;
|
|
77
|
+
s.consecutiveFailures = 0;
|
|
78
|
+
if (Date.now() < s.cooldownUntil)
|
|
79
|
+
return;
|
|
80
|
+
if (s.consecutiveSuccesses >= SUCCESS_THRESHOLD) {
|
|
81
|
+
if (!s.probeActive) {
|
|
82
|
+
s.probeActive = true;
|
|
83
|
+
s.consecutiveSuccesses = 0;
|
|
84
|
+
this.logger?.debug({ providerId, currentLimit: s.currentLimit, action: "probe_open" }, "Adaptive: probe window opened");
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
s.currentLimit = Math.min(s.currentLimit + 1, entry.max);
|
|
88
|
+
s.consecutiveSuccesses = 0;
|
|
89
|
+
this.logger?.debug({ providerId, currentLimit: s.currentLimit, max: entry.max, action: "limit_increased" }, "Adaptive: limit increased by 1");
|
|
90
|
+
}
|
|
91
|
+
this.syncToSemaphore(providerId);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
transitionFailure(providerId, entry, statusCode) {
|
|
95
|
+
const s = entry.state;
|
|
96
|
+
s.consecutiveFailures++;
|
|
97
|
+
s.consecutiveSuccesses = 0;
|
|
98
|
+
if (statusCode === RATE_LIMIT_STATUS) {
|
|
99
|
+
const prevLimit = s.currentLimit;
|
|
100
|
+
s.currentLimit = Math.max(Math.floor(s.currentLimit / HALF_DIVISOR), ADAPTIVE_MIN);
|
|
101
|
+
s.probeActive = false;
|
|
102
|
+
s.cooldownUntil = Date.now() + COOLDOWN_MS;
|
|
103
|
+
s.consecutiveFailures = 0;
|
|
104
|
+
this.syncToSemaphore(providerId);
|
|
105
|
+
this.logger?.warn({ providerId, prevLimit, newLimit: s.currentLimit, cooldownMs: COOLDOWN_MS, action: "rate_limit_backoff" }, "Adaptive: 429 rate limit, halved concurrency and entered cooldown");
|
|
106
|
+
}
|
|
107
|
+
else if (s.consecutiveFailures >= FAILURE_THRESHOLD) {
|
|
108
|
+
const prevLimit = s.currentLimit;
|
|
109
|
+
s.currentLimit = Math.max(s.currentLimit - DECREASE_STEP, ADAPTIVE_MIN);
|
|
110
|
+
s.probeActive = false;
|
|
111
|
+
s.consecutiveFailures = 0;
|
|
112
|
+
this.syncToSemaphore(providerId);
|
|
113
|
+
this.logger?.warn({ providerId, prevLimit, newLimit: s.currentLimit, action: "failure_backoff" }, "Adaptive: sustained failures, decreased concurrency");
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
syncToSemaphore(providerId) {
|
|
117
|
+
const entry = this.entries.get(providerId);
|
|
118
|
+
if (!entry)
|
|
119
|
+
return;
|
|
120
|
+
// probeActive 时额外加 1 个探针槽位,但不超过 max
|
|
121
|
+
const effectiveLimit = entry.state.probeActive
|
|
122
|
+
? Math.min(Math.max(entry.state.currentLimit + 1, ADAPTIVE_MIN), entry.max)
|
|
123
|
+
: Math.max(entry.state.currentLimit, ADAPTIVE_MIN);
|
|
124
|
+
this.semaphoreManager.updateConfig(providerId, {
|
|
125
|
+
maxConcurrency: effectiveLimit,
|
|
126
|
+
queueTimeoutMs: entry.queueTimeoutMs,
|
|
127
|
+
maxQueueSize: entry.maxQueueSize,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
}
|
|
@@ -4,6 +4,7 @@ import { RetryRuleMatcher } from "./retry-rules.js";
|
|
|
4
4
|
import { ProviderSemaphoreManager } from "./semaphore.js";
|
|
5
5
|
import type { RequestTracker } from "../monitor/request-tracker.js";
|
|
6
6
|
import type { UsageWindowTracker } from "./usage-window-tracker.js";
|
|
7
|
+
import type { AdaptiveConcurrencyController } from "./adaptive-controller.js";
|
|
7
8
|
export interface AnthropicProxyOptions {
|
|
8
9
|
db: Database.Database;
|
|
9
10
|
streamTimeoutMs: number;
|
|
@@ -13,5 +14,6 @@ export interface AnthropicProxyOptions {
|
|
|
13
14
|
tracker?: RequestTracker;
|
|
14
15
|
usageWindowTracker?: UsageWindowTracker;
|
|
15
16
|
sessionTracker?: import("./loop-prevention/session-tracker.js").SessionTracker;
|
|
17
|
+
adaptiveController?: AdaptiveConcurrencyController;
|
|
16
18
|
}
|
|
17
19
|
export declare const anthropicProxy: FastifyPluginCallback<AnthropicProxyOptions>;
|
package/dist/proxy/anthropic.js
CHANGED
|
@@ -18,8 +18,8 @@ const ANTHROPIC_ERROR_TYPE = {
|
|
|
18
18
|
};
|
|
19
19
|
const anthropicErrors = createErrorFormatter((kind, message) => ({ type: "error", error: { type: ANTHROPIC_ERROR_TYPE[kind], message } }));
|
|
20
20
|
const anthropicProxyRaw = (app, opts, done) => {
|
|
21
|
-
const { db, streamTimeoutMs, retryBaseDelayMs, matcher, semaphoreManager, tracker, usageWindowTracker, sessionTracker } = opts;
|
|
22
|
-
const orchestrator = createOrchestrator(semaphoreManager, tracker);
|
|
21
|
+
const { db, streamTimeoutMs, retryBaseDelayMs, matcher, semaphoreManager, tracker, usageWindowTracker, sessionTracker, adaptiveController } = opts;
|
|
22
|
+
const orchestrator = createOrchestrator(semaphoreManager, tracker, adaptiveController);
|
|
23
23
|
app.post(MESSAGES_PATH, async (request, reply) => {
|
|
24
24
|
if (!orchestrator) {
|
|
25
25
|
const body = request.body;
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { FastifyRequest } from "fastify";
|
|
2
2
|
import Database from "better-sqlite3";
|
|
3
|
+
import { type EnhancementConfig } from "../enhancement-config.js";
|
|
3
4
|
export interface InterceptResponse {
|
|
4
5
|
statusCode: number;
|
|
5
6
|
body: unknown;
|
|
@@ -18,6 +19,6 @@ export interface EnhancementResult {
|
|
|
18
19
|
* 在代理转发前应用代理增强逻辑(指令解析 + 会话记忆 + 模型替换 + 命令拦截)。
|
|
19
20
|
* 仅当 proxy_enhancement.claude_code_enabled 开启时生效。
|
|
20
21
|
*/
|
|
21
|
-
export declare function applyEnhancement(db: Database.Database, request: FastifyRequest, clientModel: string, sessionId?: string): EnhancementResult;
|
|
22
|
+
export declare function applyEnhancement(db: Database.Database, request: FastifyRequest, clientModel: string, sessionId?: string, enhancementConfig?: EnhancementConfig): EnhancementResult;
|
|
22
23
|
/** 生成注入到非流式响应中的模型信息标签 */
|
|
23
24
|
export declare function buildModelInfoTag(effectiveModel: string): string;
|
|
@@ -60,9 +60,9 @@ function buildDisplayModels(db, allowedModelsRaw) {
|
|
|
60
60
|
* 在代理转发前应用代理增强逻辑(指令解析 + 会话记忆 + 模型替换 + 命令拦截)。
|
|
61
61
|
* 仅当 proxy_enhancement.claude_code_enabled 开启时生效。
|
|
62
62
|
*/
|
|
63
|
-
export function applyEnhancement(db, request, clientModel, sessionId) {
|
|
63
|
+
export function applyEnhancement(db, request, clientModel, sessionId, enhancementConfig) {
|
|
64
64
|
const nullResult = { effectiveModel: clientModel, originalModel: null, interceptResponse: null };
|
|
65
|
-
const enhancement = loadEnhancementConfig(db);
|
|
65
|
+
const enhancement = enhancementConfig ?? loadEnhancementConfig(db);
|
|
66
66
|
if (!enhancement.claude_code_enabled) {
|
|
67
67
|
return nullResult;
|
|
68
68
|
}
|