llm-simple-router 0.2.0 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/dist/admin/monitor.d.ts +7 -0
  2. package/dist/admin/monitor.js +25 -0
  3. package/dist/admin/providers.d.ts +4 -0
  4. package/dist/admin/providers.js +57 -9
  5. package/dist/admin/retry-rules.js +6 -3
  6. package/dist/admin/routes.d.ts +4 -0
  7. package/dist/admin/routes.js +3 -1
  8. package/dist/admin/setup.js +8 -5
  9. package/dist/cli.js +0 -0
  10. package/dist/db/index.d.ts +1 -1
  11. package/dist/db/index.js +1 -1
  12. package/dist/db/mappings.js +6 -2
  13. package/dist/db/migrations/017_add_provider_concurrency.sql +3 -0
  14. package/dist/db/providers.d.ts +12 -1
  15. package/dist/db/providers.js +8 -3
  16. package/dist/db/retry-rules.js +4 -1
  17. package/dist/db/router-keys.js +3 -1
  18. package/dist/index.js +36 -5
  19. package/dist/metrics/sse-metrics-transform.d.ts +17 -1
  20. package/dist/metrics/sse-metrics-transform.js +33 -2
  21. package/dist/middleware/auth.js +5 -4
  22. package/dist/monitor/request-tracker.d.ts +49 -0
  23. package/dist/monitor/request-tracker.js +279 -0
  24. package/dist/monitor/runtime-collector.d.ts +11 -0
  25. package/dist/monitor/runtime-collector.js +41 -0
  26. package/dist/monitor/stats-aggregator.d.ts +22 -0
  27. package/dist/monitor/stats-aggregator.js +166 -0
  28. package/dist/monitor/types.d.ts +84 -0
  29. package/dist/monitor/types.js +1 -0
  30. package/dist/proxy/anthropic.d.ts +4 -0
  31. package/dist/proxy/anthropic.js +10 -2
  32. package/dist/proxy/enhancement-handler.js +3 -1
  33. package/dist/proxy/mapping-resolver.js +6 -2
  34. package/dist/proxy/openai.d.ts +4 -0
  35. package/dist/proxy/openai.js +10 -2
  36. package/dist/proxy/proxy-core.d.ts +6 -0
  37. package/dist/proxy/proxy-core.js +176 -85
  38. package/dist/proxy/retry.d.ts +1 -1
  39. package/dist/proxy/retry.js +3 -2
  40. package/dist/proxy/semaphore.d.ts +27 -0
  41. package/dist/proxy/semaphore.js +125 -0
  42. package/dist/utils/password.js +2 -1
  43. package/frontend-dist/assets/CardContent-B40ArIqh.js +1 -0
  44. package/frontend-dist/assets/{CardHeader-D5lVaeAA.js → CardHeader-BjkSQf27.js} +1 -1
  45. package/frontend-dist/assets/CardTitle-DjG2kSF3.js +1 -0
  46. package/frontend-dist/assets/Checkbox-Cw0rq2D9.js +1 -0
  47. package/frontend-dist/assets/CollapsibleTrigger-BvYqNbGA.js +1 -0
  48. package/frontend-dist/assets/Collection-CQ4pV54w.js +3 -0
  49. package/frontend-dist/assets/Dashboard-CsOTBnSa.js +3 -0
  50. package/frontend-dist/assets/DialogTitle-PS2W-IfG.js +1 -0
  51. package/frontend-dist/assets/Input-toxjzsir.js +1 -0
  52. package/frontend-dist/assets/Label-fZNDEQjf.js +1 -0
  53. package/frontend-dist/assets/LogResponseViewer-B9kSncNr.js +3 -0
  54. package/frontend-dist/assets/Login-DRm9DHq1.js +1 -0
  55. package/frontend-dist/assets/Logs-NHxebwmP.js +1 -0
  56. package/frontend-dist/assets/ModelMappings-DV0RPnO2.js +1 -0
  57. package/frontend-dist/assets/Monitor-B5TYWb2n.js +1 -0
  58. package/frontend-dist/assets/PopperContent-BvKlcZEO.js +1 -0
  59. package/frontend-dist/assets/Providers-D1Rauu-D.js +1 -0
  60. package/frontend-dist/assets/ProxyEnhancement-B2OliarO.js +5 -0
  61. package/frontend-dist/assets/RetryRules-BWu2gicT.js +1 -0
  62. package/frontend-dist/assets/RouterKeys-BP6XJCVa.js +1 -0
  63. package/frontend-dist/assets/RovingFocusItem-DHfpgdA0.js +1 -0
  64. package/frontend-dist/assets/SelectValue-CFf_mD9E.js +1 -0
  65. package/frontend-dist/assets/Setup-BMjCT-Tl.js +1 -0
  66. package/frontend-dist/assets/Switch-BGCQ7puL.js +1 -0
  67. package/frontend-dist/assets/TableHeader-DAOs6nSA.js +1 -0
  68. package/frontend-dist/assets/TabsTrigger-DBAYM66g.js +1 -0
  69. package/frontend-dist/assets/VisuallyHidden-Dh7svQf3.js +1 -0
  70. package/frontend-dist/assets/VisuallyHiddenInput-BOaRSEmd.js +1 -0
  71. package/frontend-dist/assets/alert-dialog-CUNSZqpB.js +1 -0
  72. package/frontend-dist/assets/button-CfQs66fX.js +1 -0
  73. package/frontend-dist/assets/client-DvdghFBq.js +12 -0
  74. package/frontend-dist/assets/createLucideIcon-DCD7INQf.js +1 -0
  75. package/frontend-dist/assets/dialog-DQFRGKR6.js +1 -0
  76. package/frontend-dist/assets/index--5JhZIwi.js +1 -0
  77. package/frontend-dist/assets/index-Bx15k8FA.css +1 -0
  78. package/frontend-dist/assets/lib-BJNsNHLO.js +1 -0
  79. package/frontend-dist/assets/ohash.D__AXeF1-CNudYmrX.js +1 -0
  80. package/frontend-dist/assets/useClipboard-aPMKfK25.js +1 -0
  81. package/frontend-dist/assets/useForwardExpose-u2vjohek.js +1 -0
  82. package/frontend-dist/assets/useNonce-ClXGIm-8.js +1 -0
  83. package/frontend-dist/assets/x-ILQhskuj.js +1 -0
  84. package/frontend-dist/index.html +7 -6
  85. package/package.json +1 -1
  86. package/dist/admin/services.d.ts +0 -7
  87. package/dist/admin/services.js +0 -63
  88. package/frontend-dist/assets/CardContent-BE9fukPi.js +0 -1
  89. package/frontend-dist/assets/CardTitle-H-zwhi3Z.js +0 -1
  90. package/frontend-dist/assets/Checkbox--1gw0dYW.js +0 -1
  91. package/frontend-dist/assets/CollapsibleTrigger-D_ptA35Y.js +0 -1
  92. package/frontend-dist/assets/Dashboard-D4AwkULO.js +0 -3
  93. package/frontend-dist/assets/Label-GiPfoz7u.js +0 -1
  94. package/frontend-dist/assets/Login-BUet1sbM.js +0 -1
  95. package/frontend-dist/assets/Logs-yztb_F9t.js +0 -3
  96. package/frontend-dist/assets/ModelMappings-MbZhdPNv.js +0 -1
  97. package/frontend-dist/assets/Providers-BjsqH6A2.js +0 -1
  98. package/frontend-dist/assets/RetryRules-C2vvJvLr.js +0 -1
  99. package/frontend-dist/assets/RouterKeys-DavrgpAQ.js +0 -1
  100. package/frontend-dist/assets/RovingFocusItem-DnIa_lwH.js +0 -1
  101. package/frontend-dist/assets/SelectValue-BB0Ckbjh.js +0 -1
  102. package/frontend-dist/assets/TableHeader-D2GkiqRx.js +0 -1
  103. package/frontend-dist/assets/alert-dialog-CWjBke-O.js +0 -1
  104. package/frontend-dist/assets/badge-_ZHrMEpC.js +0 -3
  105. package/frontend-dist/assets/button-C4_mChkc.js +0 -1
  106. package/frontend-dist/assets/client-BWw0R36V.js +0 -12
  107. package/frontend-dist/assets/dialog-CUHMcTqp.js +0 -1
  108. package/frontend-dist/assets/index-DEl48bm9.css +0 -1
  109. package/frontend-dist/assets/index-UZK1BnPG.js +0 -1
  110. package/frontend-dist/assets/lib-Qs8xoTas.js +0 -1
  111. package/frontend-dist/assets/useForwardExpose-B-xauF1X.js +0 -1
  112. package/frontend-dist/assets/x-JBJB26JV.js +0 -1
@@ -1,11 +1,15 @@
1
1
  import Database from "better-sqlite3";
2
2
  import type { FastifyPluginCallback } from "fastify";
3
3
  import { RetryRuleMatcher } from "./retry-rules.js";
4
+ import { ProviderSemaphoreManager } from "./semaphore.js";
5
+ import type { RequestTracker } from "../monitor/request-tracker.js";
4
6
  export interface AnthropicProxyOptions {
5
7
  db: Database.Database;
6
8
  streamTimeoutMs: number;
7
9
  retryMaxAttempts: number;
8
10
  retryBaseDelayMs: number;
9
11
  matcher?: RetryRuleMatcher;
12
+ semaphoreManager?: ProviderSemaphoreManager;
13
+ tracker?: RequestTracker;
10
14
  }
11
15
  export declare const anthropicProxy: FastifyPluginCallback<AnthropicProxyOptions>;
@@ -22,11 +22,19 @@ const anthropicErrors = {
22
22
  statusCode: 502,
23
23
  body: { type: "error", error: { type: "upstream_error", message: "Failed to connect to upstream service" } },
24
24
  }),
25
+ concurrencyQueueFull: (providerId) => ({
26
+ statusCode: 503,
27
+ body: { type: "error", error: { type: "api_error", message: `Provider '${providerId}' concurrency queue is full` } },
28
+ }),
29
+ concurrencyTimeout: (providerId, timeoutMs) => ({
30
+ statusCode: 504,
31
+ body: { type: "error", error: { type: "api_error", message: `Provider '${providerId}' concurrency wait timeout (${timeoutMs}ms)` } },
32
+ }),
25
33
  };
26
34
  const anthropicProxyRaw = (app, opts, done) => {
27
- const { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher } = opts;
35
+ const { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher, semaphoreManager, tracker } = opts;
28
36
  app.post(MESSAGES_PATH, async (request, reply) => {
29
- const deps = { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher };
37
+ const deps = { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher, semaphoreManager, tracker };
30
38
  return handleProxyPost(request, reply, "anthropic", MESSAGES_PATH, anthropicErrors, deps);
31
39
  });
32
40
  done();
@@ -130,7 +130,9 @@ function buildSelectModelResponse(db, allowedModelsRaw, selectedModel) {
130
130
  if (parsed.length > 0)
131
131
  allowedSet = new Set(parsed);
132
132
  }
133
- catch { /* 忽略解析失败 */ }
133
+ catch {
134
+ return buildTextResponse("model-list", "(解析 allowed_models 失败)");
135
+ }
134
136
  }
135
137
  const filtered = allowedSet
136
138
  ? providerModels.filter(m => allowedSet.has(m.backend_model))
@@ -29,7 +29,9 @@ export function resolveMapping(db, clientModel, context) {
29
29
  return { backend_model: backendModel, provider_id: provider.id };
30
30
  }
31
31
  }
32
- catch { /* 忽略解析失败 */ }
32
+ catch {
33
+ return null;
34
+ }
33
35
  }
34
36
  // 明确的 provider/model 格式解析失败,不再 fallback 到 mapping group
35
37
  return null;
@@ -45,7 +47,9 @@ export function resolveMapping(db, clientModel, context) {
45
47
  return { backend_model: clientModel, provider_id: p.id };
46
48
  }
47
49
  }
48
- catch { /* 忽略解析失败 */ }
50
+ catch {
51
+ break;
52
+ }
49
53
  }
50
54
  return null;
51
55
  }
@@ -1,11 +1,15 @@
1
1
  import type { FastifyPluginCallback } from "fastify";
2
2
  import Database from "better-sqlite3";
3
3
  import { RetryRuleMatcher } from "./retry-rules.js";
4
+ import { ProviderSemaphoreManager } from "./semaphore.js";
5
+ import type { RequestTracker } from "../monitor/request-tracker.js";
4
6
  export interface OpenaiProxyOptions {
5
7
  db: Database.Database;
6
8
  streamTimeoutMs: number;
7
9
  retryMaxAttempts: number;
8
10
  retryBaseDelayMs: number;
9
11
  matcher?: RetryRuleMatcher;
12
+ semaphoreManager?: ProviderSemaphoreManager;
13
+ tracker?: RequestTracker;
10
14
  }
11
15
  export declare const openaiProxy: FastifyPluginCallback<OpenaiProxyOptions>;
@@ -28,14 +28,22 @@ const openaiErrors = {
28
28
  statusCode: 502,
29
29
  body: { error: { message: "Failed to connect to upstream service", type: "upstream_error", code: "upstream_connection_failed" } },
30
30
  }),
31
+ concurrencyQueueFull: (providerId) => ({
32
+ statusCode: 503,
33
+ body: { error: { message: `Provider '${providerId}' concurrency queue is full`, type: "server_error", code: "concurrency_queue_full" } },
34
+ }),
35
+ concurrencyTimeout: (providerId, timeoutMs) => ({
36
+ statusCode: 504,
37
+ body: { error: { message: `Provider '${providerId}' concurrency wait timeout (${timeoutMs}ms)`, type: "server_error", code: "concurrency_timeout" } },
38
+ }),
31
39
  };
32
40
  function sendError(reply, e) {
33
41
  return reply.status(e.statusCode).send(e.body);
34
42
  }
35
43
  const openaiProxyRaw = (app, opts, done) => {
36
- const { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher } = opts;
44
+ const { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher, semaphoreManager, tracker } = opts;
37
45
  app.post(CHAT_COMPLETIONS_PATH, async (request, reply) => {
38
- const deps = { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher };
46
+ const deps = { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher, semaphoreManager, tracker };
39
47
  return handleProxyPost(request, reply, "openai", CHAT_COMPLETIONS_PATH, openaiErrors, deps, {
40
48
  beforeSendProxy: (body, isStream) => {
41
49
  if (isStream && !body.stream_options) {
@@ -3,6 +3,8 @@ import Database from "better-sqlite3";
3
3
  import type { Provider } from "../db/index.js";
4
4
  import type { RetryRuleMatcher } from "./retry-rules.js";
5
5
  import { type ProxyResult, type StreamProxyResult, type GetProxyResult } from "./upstream-call.js";
6
+ import { ProviderSemaphoreManager } from "./semaphore.js";
7
+ import type { RequestTracker } from "../monitor/request-tracker.js";
6
8
  export type RawHeaders = Record<string, string | string[] | undefined>;
7
9
  export interface ProxyErrorResponse {
8
10
  statusCode: number;
@@ -14,6 +16,8 @@ export interface ProxyErrorFormatter {
14
16
  providerUnavailable(): ProxyErrorResponse;
15
17
  providerTypeMismatch(): ProxyErrorResponse;
16
18
  upstreamConnectionFailed(): ProxyErrorResponse;
19
+ concurrencyQueueFull(providerId: string): ProxyErrorResponse;
20
+ concurrencyTimeout(providerId: string, timeoutMs: number): ProxyErrorResponse;
17
21
  }
18
22
  export interface ProxyHandlerDeps {
19
23
  db: Database.Database;
@@ -21,6 +25,8 @@ export interface ProxyHandlerDeps {
21
25
  retryMaxAttempts: number;
22
26
  retryBaseDelayMs: number;
23
27
  matcher?: RetryRuleMatcher;
28
+ semaphoreManager?: ProviderSemaphoreManager;
29
+ tracker?: RequestTracker;
24
30
  }
25
31
  export type { ProxyResult, StreamProxyResult, GetProxyResult };
26
32
  export declare const SKIP_UPSTREAM: Set<string>;
@@ -10,9 +10,12 @@ import { SSEMetricsTransform } from "../metrics/sse-metrics-transform.js";
10
10
  import { proxyNonStream as upstreamNonStream, proxyStream as upstreamStream, proxyGetRequest as upstreamGet, } from "./upstream-call.js";
11
11
  import { insertSuccessLog, insertRejectedLog } from "./log-helpers.js";
12
12
  import { applyEnhancement, buildModelInfoTag } from "./enhancement-handler.js";
13
+ import { SemaphoreQueueFullError, SemaphoreTimeoutError } from "./semaphore.js";
13
14
  // ---------- Constants ----------
14
15
  const UPSTREAM_SUCCESS = 200;
15
16
  const FAILOVER_FAIL_THRESHOLD = 400;
17
+ const STREAM_CONTENT_MAX_RAW = 8192;
18
+ const STREAM_CONTENT_MAX_TEXT = 4096;
16
19
  // ---------- Header utilities ----------
17
20
  export const SKIP_UPSTREAM = new Set([
18
21
  "host",
@@ -48,8 +51,95 @@ export function buildUpstreamHeaders(clientHeaders, apiKey, payloadBytes) {
48
51
  export function proxyGetRequest(backend, apiKey, clientHeaders, upstreamPath) {
49
52
  return upstreamGet(backend, apiKey, clientHeaders, upstreamPath, buildUpstreamHeaders);
50
53
  }
54
+ // ---------- Helper functions for handleProxyPost ----------
55
+ function handleIntercept(db, apiType, request, reply, interceptResponse, clientModel) {
56
+ const logId = randomUUID();
57
+ const isStream = request.body.stream === true;
58
+ const respBody = JSON.stringify(interceptResponse.body);
59
+ insertRequestLog(db, {
60
+ id: logId, api_type: apiType, model: clientModel, provider_id: "router",
61
+ status_code: interceptResponse.statusCode, latency_ms: 0,
62
+ is_stream: isStream ? 1 : 0, error_message: null,
63
+ created_at: new Date().toISOString(),
64
+ request_body: JSON.stringify(request.body),
65
+ response_body: respBody,
66
+ client_request: JSON.stringify({ headers: request.headers, body: request.body }),
67
+ upstream_request: interceptResponse.meta ? JSON.stringify(interceptResponse.meta) : null,
68
+ client_response: JSON.stringify({ statusCode: interceptResponse.statusCode, body: respBody }),
69
+ is_retry: 0, original_request_id: null,
70
+ router_key_id: request.routerKey?.id ?? null, original_model: null,
71
+ });
72
+ return reply.status(interceptResponse.statusCode).send(interceptResponse.body);
73
+ }
74
+ function logRetryAttempts(db, params, attempts, result, startTime) {
75
+ let lastSuccessLogId = params.logId;
76
+ for (const attempt of attempts) {
77
+ const isOriginal = attempt.attemptIndex === 0;
78
+ const attemptLogId = isOriginal ? params.logId : randomUUID();
79
+ if (attempt.error) {
80
+ insertRequestLog(db, {
81
+ id: attemptLogId, api_type: params.apiType, model: params.model, provider_id: params.providerId,
82
+ status_code: HTTP_BAD_GATEWAY, latency_ms: attempt.latencyMs,
83
+ is_stream: params.isStream ? 1 : 0, error_message: attempt.error,
84
+ created_at: new Date().toISOString(), request_body: params.reqBodyStr,
85
+ client_request: params.clientReq, upstream_request: params.upstreamReqBase,
86
+ is_retry: isOriginal ? 0 : 1, original_request_id: isOriginal ? null : params.logId,
87
+ router_key_id: params.routerKeyId, original_model: params.originalModel,
88
+ });
89
+ }
90
+ else if (attempt.statusCode !== UPSTREAM_SUCCESS) {
91
+ insertRequestLog(db, {
92
+ id: attemptLogId, api_type: params.apiType, model: params.model, provider_id: params.providerId,
93
+ status_code: attempt.statusCode, latency_ms: attempt.latencyMs,
94
+ is_stream: params.isStream ? 1 : 0, error_message: null,
95
+ created_at: new Date().toISOString(), request_body: params.reqBodyStr,
96
+ response_body: attempt.responseBody, client_request: params.clientReq, upstream_request: params.upstreamReqBase,
97
+ upstream_response: JSON.stringify({ statusCode: attempt.statusCode, body: attempt.responseBody }),
98
+ client_response: JSON.stringify({ statusCode: attempt.statusCode, body: attempt.responseBody }),
99
+ is_retry: isOriginal ? 0 : 1, original_request_id: isOriginal ? null : params.logId,
100
+ router_key_id: params.routerKeyId, original_model: params.originalModel,
101
+ });
102
+ }
103
+ else {
104
+ const h = params.isStream
105
+ ? (result.upstreamResponseHeaders ?? {})
106
+ : (result.headers);
107
+ insertSuccessLog(db, { apiType: params.apiType, model: params.model, provider: { id: params.providerId }, isStream: params.isStream, startTime,
108
+ reqBody: params.reqBodyStr, clientReq: params.clientReq, upstreamReq: params.upstreamReqBase, id: attemptLogId,
109
+ status: result.statusCode, respBody: attempt.responseBody, upHdrs: h, cliHdrs: h,
110
+ isRetry: !isOriginal, originalRequestId: isOriginal ? null : params.logId,
111
+ routerKeyId: params.routerKeyId, originalModel: params.originalModel });
112
+ lastSuccessLogId = attemptLogId;
113
+ }
114
+ }
115
+ return lastSuccessLogId;
116
+ }
117
+ function collectMetrics(db, apiType, result, isStream, lastSuccessLogId, providerId, backendModel, request) {
118
+ if (isStream) {
119
+ const streamResult = result;
120
+ if (streamResult.metricsResult) {
121
+ try {
122
+ insertMetrics(db, { ...streamResult.metricsResult, request_log_id: lastSuccessLogId, provider_id: providerId, backend_model: backendModel, api_type: apiType });
123
+ }
124
+ catch (err) {
125
+ request.log.error({ err }, "Failed to insert metrics");
126
+ }
127
+ }
128
+ }
129
+ else {
130
+ try {
131
+ const mr = MetricsExtractor.fromNonStreamResponse(apiType, result.body);
132
+ if (mr)
133
+ insertMetrics(db, { ...mr, request_log_id: lastSuccessLogId, provider_id: providerId, backend_model: backendModel, api_type: apiType });
134
+ }
135
+ catch (err) {
136
+ request.log.error({ err }, "Failed to insert metrics");
137
+ }
138
+ }
139
+ }
51
140
  // ---------- Shared proxy handler ----------
52
141
  const HTTP_BAD_GATEWAY = 502;
142
+ const HTTP_BAD_REQUEST = 400;
53
143
  /**
54
144
  * 共享 POST handler,参数化 apiType/errorFormat/upstreamPath 等差异。
55
145
  * 当分组策略为 failover 时,在 while 循环中依次尝试不同 target,
@@ -62,26 +152,8 @@ export async function handleProxyPost(request, reply, apiType, upstreamPath, err
62
152
  // 代理增强:指令解析 + 模型替换 + 命令拦截
63
153
  const sessionId = request.headers["x-claude-code-session-id"];
64
154
  const { effectiveModel, originalModel, interceptResponse } = applyEnhancement(db, request, clientModel, sessionId);
65
- // 命令拦截(如 select-model):直接返回,不转发上游
66
- if (interceptResponse) {
67
- const logId = randomUUID();
68
- const isStream = request.body.stream === true;
69
- const interceptRespBody = JSON.stringify(interceptResponse.body);
70
- insertRequestLog(db, {
71
- id: logId, api_type: apiType, model: clientModel, provider_id: "router",
72
- status_code: interceptResponse.statusCode, latency_ms: 0,
73
- is_stream: isStream ? 1 : 0, error_message: null,
74
- created_at: new Date().toISOString(),
75
- request_body: JSON.stringify(request.body),
76
- response_body: interceptRespBody,
77
- client_request: JSON.stringify({ headers: request.headers, body: request.body }),
78
- upstream_request: interceptResponse.meta ? JSON.stringify(interceptResponse.meta) : null,
79
- client_response: JSON.stringify({ statusCode: interceptResponse.statusCode, body: interceptRespBody }),
80
- is_retry: 0, original_request_id: null,
81
- router_key_id: request.routerKey?.id ?? null, original_model: null,
82
- });
83
- return reply.status(interceptResponse.statusCode).send(interceptResponse.body);
84
- }
155
+ if (interceptResponse)
156
+ return handleIntercept(db, apiType, request, reply, interceptResponse, clientModel);
85
157
  // 查询分组策略(只查一次)
86
158
  const group = getMappingGroup(db, effectiveModel);
87
159
  const isFailover = group?.strategy === "failover";
@@ -142,6 +214,11 @@ export async function handleProxyPost(request, reply, apiType, upstreamPath, err
142
214
  providerId: resolved.provider_id, originalModel });
143
215
  return reply.status(e.statusCode).send(e.body);
144
216
  }
217
+ deps.tracker?.start({
218
+ id: logId, apiType, model: effectiveModel, providerId: provider.id,
219
+ providerName: provider.name, isStream, startTime, status: "pending",
220
+ retryCount: 0, attempts: [], clientIp: request.ip,
221
+ });
145
222
  body.model = resolved.backend_model;
146
223
  const apiKey = decrypt(provider.api_key, getSetting(db, "encryption_key"));
147
224
  options?.beforeSendProxy?.(body, isStream);
@@ -149,56 +226,85 @@ export async function handleProxyPost(request, reply, apiType, upstreamPath, err
149
226
  const clientReq = JSON.stringify({ headers: cliHdrs, body: originalBody });
150
227
  const retryConfig = buildRetryConfig(retryMaxAttempts, retryBaseDelayMs, matcher);
151
228
  const upstreamReqBase = JSON.stringify({ url: `${provider.base_url}${upstreamPath}`, headers: buildUpstreamHeaders(cliHdrs, apiKey, Buffer.byteLength(reqBodyStr)), body: reqBodyStr });
229
+ // === Semaphore acquire ===
230
+ const semaphoreManager = deps.semaphoreManager;
231
+ let semaphoreReleased = false;
232
+ const releaseSemaphore = () => {
233
+ if (!semaphoreReleased) {
234
+ semaphoreReleased = true;
235
+ semaphoreManager?.release(provider.id);
236
+ }
237
+ };
238
+ if (semaphoreManager) {
239
+ const ac = new AbortController();
240
+ request.raw.on("close", () => ac.abort());
241
+ try {
242
+ await semaphoreManager.acquire(provider.id, ac.signal, () => {
243
+ deps.tracker?.update(logId, { queued: true });
244
+ });
245
+ deps.tracker?.update(logId, { queued: false });
246
+ }
247
+ catch (err) {
248
+ if (err instanceof DOMException && err.name === "AbortError") {
249
+ deps.tracker?.complete(logId, { status: "failed" });
250
+ return reply;
251
+ }
252
+ if (err instanceof SemaphoreQueueFullError) {
253
+ request.log.warn({ providerId: provider.id }, "Concurrency queue full, rejecting request");
254
+ const e = errors.concurrencyQueueFull(provider.id);
255
+ deps.tracker?.complete(logId, { status: "failed", statusCode: e.statusCode });
256
+ return reply.status(e.statusCode).send(e.body);
257
+ }
258
+ if (err instanceof SemaphoreTimeoutError) {
259
+ request.log.warn({ providerId: provider.id, timeoutMs: err.timeoutMs }, "Concurrency wait timed out");
260
+ const e = errors.concurrencyTimeout(provider.id, err.timeoutMs);
261
+ deps.tracker?.complete(logId, { status: "failed", statusCode: e.statusCode });
262
+ return reply.status(e.statusCode).send(e.body);
263
+ }
264
+ throw err;
265
+ }
266
+ }
152
267
  try {
153
268
  const { result: r, attempts } = isStream
154
269
  ? await retryableCall(() => {
155
- const metricsTransform = new SSEMetricsTransform(apiType, startTime);
270
+ const metricsTransform = new SSEMetricsTransform(apiType, startTime, {
271
+ onMetrics: (m) => {
272
+ deps.tracker?.update(logId, {
273
+ streamMetrics: {
274
+ inputTokens: m.input_tokens,
275
+ outputTokens: m.output_tokens,
276
+ ttftMs: m.ttft_ms,
277
+ stopReason: m.stop_reason,
278
+ isComplete: m.is_complete === 1,
279
+ },
280
+ });
281
+ },
282
+ onChunk: (rawLine) => {
283
+ deps.tracker?.appendStreamChunk(logId, rawLine, apiType, STREAM_CONTENT_MAX_RAW, STREAM_CONTENT_MAX_TEXT);
284
+ },
285
+ });
156
286
  return upstreamStream(provider, apiKey, body, cliHdrs, reply, streamTimeoutMs, upstreamPath, buildUpstreamHeaders, metricsTransform);
157
287
  }, retryConfig, reply)
158
288
  : await retryableCall(() => upstreamNonStream(provider, apiKey, body, cliHdrs, upstreamPath, buildUpstreamHeaders), retryConfig, reply);
159
- // 记录所有尝试的日志
160
- let lastSuccessLogId = logId;
161
- for (const attempt of attempts) {
162
- const isOriginal = attempt.attemptIndex === 0;
163
- const attemptLogId = isOriginal ? logId : randomUUID();
164
- if (attempt.error) {
165
- insertRequestLog(db, {
166
- id: attemptLogId, api_type: apiType, model: effectiveModel, provider_id: provider.id,
167
- status_code: HTTP_BAD_GATEWAY, latency_ms: attempt.latencyMs,
168
- is_stream: isStream ? 1 : 0, error_message: attempt.error,
169
- created_at: new Date().toISOString(), request_body: reqBodyStr,
170
- client_request: clientReq, upstream_request: upstreamReqBase,
171
- is_retry: isOriginal ? 0 : 1, original_request_id: isOriginal ? null : logId,
172
- router_key_id: routerKeyId, original_model: originalModel,
173
- });
174
- }
175
- else if (attempt.statusCode !== UPSTREAM_SUCCESS) {
176
- insertRequestLog(db, {
177
- id: attemptLogId, api_type: apiType, model: effectiveModel, provider_id: provider.id,
178
- status_code: attempt.statusCode, latency_ms: attempt.latencyMs,
179
- is_stream: isStream ? 1 : 0, error_message: null,
180
- created_at: new Date().toISOString(), request_body: reqBodyStr,
181
- response_body: attempt.responseBody, client_request: clientReq, upstream_request: upstreamReqBase,
182
- upstream_response: JSON.stringify({ statusCode: attempt.statusCode, body: attempt.responseBody }),
183
- client_response: JSON.stringify({ statusCode: attempt.statusCode, body: attempt.responseBody }),
184
- is_retry: isOriginal ? 0 : 1, original_request_id: isOriginal ? null : logId,
185
- router_key_id: routerKeyId, original_model: originalModel,
186
- });
187
- }
188
- else {
189
- const h = isStream
190
- ? (r.upstreamResponseHeaders ?? {})
191
- : (r.headers);
192
- insertSuccessLog(db, { apiType, model: effectiveModel, provider, isStream, startTime,
193
- reqBody: reqBodyStr, clientReq, upstreamReq: upstreamReqBase, id: attemptLogId,
194
- status: r.statusCode, respBody: attempt.responseBody, upHdrs: h, cliHdrs: h,
195
- isRetry: !isOriginal, originalRequestId: isOriginal ? null : logId,
196
- routerKeyId, originalModel });
197
- lastSuccessLogId = attemptLogId;
198
- }
199
- }
289
+ const trackerAttempts = attempts.map(a => ({
290
+ statusCode: a.statusCode ?? null,
291
+ error: a.error ?? null,
292
+ latencyMs: a.latencyMs,
293
+ providerId: provider.id,
294
+ }));
295
+ deps.tracker?.update(logId, {
296
+ retryCount: Math.max(0, attempts.length - 1),
297
+ attempts: trackerAttempts,
298
+ providerId: provider.id,
299
+ });
300
+ const lastSuccessLogId = logRetryAttempts(db, {
301
+ apiType, model: effectiveModel, providerId: provider.id, isStream,
302
+ reqBodyStr, clientReq, upstreamReqBase, logId, routerKeyId, originalModel,
303
+ }, attempts, r, startTime);
200
304
  // --- Failover 检查 ---
201
305
  if (isFailover && r.statusCode >= FAILOVER_FAIL_THRESHOLD && !reply.raw.headersSent) {
306
+ deps.tracker?.complete(logId, { status: "failed", statusCode: r.statusCode });
307
+ releaseSemaphore();
202
308
  excludeTargets.push(resolved);
203
309
  continue;
204
310
  }
@@ -227,32 +333,13 @@ export async function handleProxyPost(request, reply, apiType, upstreamPath, err
227
333
  }
228
334
  for (const [k, v] of Object.entries(pr.headers))
229
335
  reply.header(k, v);
230
- return reply.status(pr.statusCode).send(pr.body);
336
+ reply.status(pr.statusCode).send(pr.body);
231
337
  }
232
- // metrics 采集
233
338
  if (r.statusCode === UPSTREAM_SUCCESS) {
234
- if (isStream) {
235
- const streamResult = r;
236
- if (streamResult.metricsResult) {
237
- try {
238
- insertMetrics(db, { ...streamResult.metricsResult, request_log_id: lastSuccessLogId, provider_id: provider.id, backend_model: resolved.backend_model, api_type: apiType });
239
- }
240
- catch (err) {
241
- request.log.error({ err }, "Failed to insert metrics");
242
- }
243
- }
244
- }
245
- else {
246
- try {
247
- const mr = MetricsExtractor.fromNonStreamResponse(apiType, r.body);
248
- if (mr)
249
- insertMetrics(db, { ...mr, request_log_id: lastSuccessLogId, provider_id: provider.id, backend_model: resolved.backend_model, api_type: apiType });
250
- }
251
- catch (err) {
252
- request.log.error({ err }, "Failed to insert metrics");
253
- }
254
- }
339
+ collectMetrics(db, apiType, r, isStream, lastSuccessLogId, provider.id, resolved.backend_model, request);
255
340
  }
341
+ deps.tracker?.complete(logId, { status: r.statusCode < HTTP_BAD_REQUEST ? "completed" : "failed", statusCode: r.statusCode });
342
+ releaseSemaphore();
256
343
  return reply;
257
344
  }
258
345
  catch (err) {
@@ -269,10 +356,14 @@ export async function handleProxyPost(request, reply, apiType, upstreamPath, err
269
356
  });
270
357
  // --- Failover 检查(异常路径)---
271
358
  if (isFailover && !reply.raw.headersSent) {
359
+ deps.tracker?.complete(logId, { status: "failed" });
360
+ releaseSemaphore();
272
361
  excludeTargets.push(resolved);
273
362
  continue;
274
363
  }
275
364
  const e = errors.upstreamConnectionFailed();
365
+ deps.tracker?.complete(logId, { status: "failed", statusCode: HTTP_BAD_GATEWAY });
366
+ releaseSemaphore();
276
367
  return reply.status(e.statusCode).send(e.body);
277
368
  }
278
369
  }
@@ -24,7 +24,7 @@ export interface RetryStrategy {
24
24
  export declare class FixedIntervalStrategy implements RetryStrategy {
25
25
  private delayMs;
26
26
  constructor(delayMs: number);
27
- getDelay(_attempt: number): number;
27
+ getDelay(): number;
28
28
  }
29
29
  export declare class ExponentialBackoffStrategy implements RetryStrategy {
30
30
  private baseMs;
@@ -3,8 +3,9 @@ export class FixedIntervalStrategy {
3
3
  constructor(delayMs) {
4
4
  this.delayMs = delayMs;
5
5
  }
6
- getDelay(_attempt) { return this.delayMs; }
6
+ getDelay() { return this.delayMs; }
7
7
  }
8
+ const EXPONENTIAL_BASE = 2;
8
9
  export class ExponentialBackoffStrategy {
9
10
  baseMs;
10
11
  capMs;
@@ -13,7 +14,7 @@ export class ExponentialBackoffStrategy {
13
14
  this.capMs = capMs;
14
15
  }
15
16
  getDelay(attempt) {
16
- return Math.min(this.baseMs * 2 ** attempt, this.capMs);
17
+ return Math.min(this.baseMs * EXPONENTIAL_BASE ** attempt, this.capMs);
17
18
  }
18
19
  }
19
20
  export function createStrategy(rule) {
@@ -0,0 +1,27 @@
1
+ export declare class SemaphoreQueueFullError extends Error {
2
+ readonly providerId: string;
3
+ constructor(providerId: string);
4
+ }
5
+ export declare class SemaphoreTimeoutError extends Error {
6
+ readonly providerId: string;
7
+ readonly timeoutMs: number;
8
+ constructor(providerId: string, timeoutMs: number);
9
+ }
10
+ interface ConcurrencyConfig {
11
+ maxConcurrency: number;
12
+ queueTimeoutMs: number;
13
+ maxQueueSize: number;
14
+ }
15
+ export declare class ProviderSemaphoreManager {
16
+ private readonly entries;
17
+ private getOrCreate;
18
+ updateConfig(providerId: string, config: ConcurrencyConfig): void;
19
+ acquire(providerId: string, signal?: AbortSignal, onQueued?: () => void): Promise<void>;
20
+ release(providerId: string): void;
21
+ getStatus(providerId: string): {
22
+ active: number;
23
+ queued: number;
24
+ };
25
+ remove(providerId: string): void;
26
+ }
27
+ export {};
@@ -0,0 +1,125 @@
1
+ export class SemaphoreQueueFullError extends Error {
2
+ providerId;
3
+ constructor(providerId) {
4
+ super(`Provider '${providerId}' concurrency queue is full`);
5
+ this.providerId = providerId;
6
+ this.name = "SemaphoreQueueFullError";
7
+ }
8
+ }
9
+ export class SemaphoreTimeoutError extends Error {
10
+ providerId;
11
+ timeoutMs;
12
+ constructor(providerId, timeoutMs) {
13
+ super(`Provider '${providerId}' concurrency wait timeout (${timeoutMs}ms)`);
14
+ this.providerId = providerId;
15
+ this.timeoutMs = timeoutMs;
16
+ this.name = "SemaphoreTimeoutError";
17
+ }
18
+ }
19
+ export class ProviderSemaphoreManager {
20
+ entries = new Map();
21
+ getOrCreate(providerId) {
22
+ let entry = this.entries.get(providerId);
23
+ if (!entry) {
24
+ entry = {
25
+ config: { maxConcurrency: 0, queueTimeoutMs: 0, maxQueueSize: 0 },
26
+ current: 0,
27
+ queue: [],
28
+ };
29
+ this.entries.set(providerId, entry);
30
+ }
31
+ return entry;
32
+ }
33
+ updateConfig(providerId, config) {
34
+ const entry = this.getOrCreate(providerId);
35
+ entry.config = config;
36
+ if (config.maxConcurrency === 0) {
37
+ // Admin disabled throttling — drain queue without counting, reset current
38
+ // because no tracking is needed when maxConcurrency=0
39
+ while (entry.queue.length > 0) {
40
+ const e = entry.queue.shift();
41
+ if (e.timer)
42
+ clearTimeout(e.timer);
43
+ e.resolve();
44
+ }
45
+ entry.current = 0;
46
+ return;
47
+ }
48
+ while (entry.current < config.maxConcurrency &&
49
+ entry.queue.length > 0) {
50
+ entry.current++;
51
+ const e = entry.queue.shift();
52
+ if (e.timer)
53
+ clearTimeout(e.timer);
54
+ e.resolve();
55
+ }
56
+ }
57
+ async acquire(providerId, signal, onQueued) {
58
+ const entry = this.getOrCreate(providerId);
59
+ const { maxConcurrency, queueTimeoutMs, maxQueueSize } = entry.config;
60
+ if (maxConcurrency === 0)
61
+ return;
62
+ if (entry.current < maxConcurrency) {
63
+ entry.current++;
64
+ return;
65
+ }
66
+ if (entry.queue.length >= maxQueueSize) {
67
+ throw new SemaphoreQueueFullError(providerId);
68
+ }
69
+ onQueued?.();
70
+ return new Promise((resolve, reject) => {
71
+ const qe = { resolve, reject, timer: null };
72
+ if (queueTimeoutMs > 0) {
73
+ qe.timer = setTimeout(() => {
74
+ const idx = entry.queue.indexOf(qe);
75
+ if (idx !== -1)
76
+ entry.queue.splice(idx, 1);
77
+ reject(new SemaphoreTimeoutError(providerId, queueTimeoutMs));
78
+ }, queueTimeoutMs);
79
+ }
80
+ if (signal) {
81
+ const onAbort = () => {
82
+ const idx = entry.queue.indexOf(qe);
83
+ if (idx !== -1)
84
+ entry.queue.splice(idx, 1);
85
+ if (qe.timer)
86
+ clearTimeout(qe.timer);
87
+ reject(new DOMException("Aborted", "AbortError"));
88
+ };
89
+ signal.addEventListener("abort", onAbort, { once: true });
90
+ }
91
+ entry.queue.push(qe);
92
+ });
93
+ }
94
+ release(providerId) {
95
+ const entry = this.entries.get(providerId);
96
+ if (!entry)
97
+ return;
98
+ if (entry.queue.length > 0) {
99
+ const e = entry.queue.shift();
100
+ if (e.timer)
101
+ clearTimeout(e.timer);
102
+ e.resolve();
103
+ }
104
+ else {
105
+ entry.current--;
106
+ }
107
+ }
108
+ getStatus(providerId) {
109
+ const entry = this.entries.get(providerId);
110
+ if (!entry)
111
+ return { active: 0, queued: 0 };
112
+ return { active: entry.current, queued: entry.queue.length };
113
+ }
114
+ remove(providerId) {
115
+ const entry = this.entries.get(providerId);
116
+ if (!entry)
117
+ return;
118
+ for (const e of entry.queue) {
119
+ if (e.timer)
120
+ clearTimeout(e.timer);
121
+ e.reject(new Error("Provider removed"));
122
+ }
123
+ this.entries.delete(providerId);
124
+ }
125
+ }