llm-simple-router 0.3.7 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/README.md +81 -49
  2. package/dist/admin/constants.d.ts +1 -8
  3. package/dist/admin/constants.js +2 -8
  4. package/dist/admin/logs.js +18 -3
  5. package/dist/admin/router-keys.js +1 -2
  6. package/dist/cli.js +0 -0
  7. package/dist/constants.d.ts +8 -0
  8. package/dist/constants.js +9 -0
  9. package/dist/db/index.d.ts +4 -4
  10. package/dist/db/index.js +2 -2
  11. package/dist/db/logs.d.ts +18 -33
  12. package/dist/db/logs.js +40 -17
  13. package/dist/db/metrics.d.ts +33 -0
  14. package/dist/db/metrics.js +7 -0
  15. package/dist/db/migrations/018_add_failover_field.sql +2 -0
  16. package/dist/db/retry-rules.d.ts +2 -2
  17. package/dist/db/retry-rules.js +26 -13
  18. package/dist/index.js +3 -5
  19. package/dist/monitor/request-tracker.d.ts +6 -0
  20. package/dist/monitor/request-tracker.js +23 -54
  21. package/dist/monitor/stream-extractor.d.ts +11 -0
  22. package/dist/monitor/stream-extractor.js +51 -0
  23. package/dist/proxy/anthropic.js +19 -32
  24. package/dist/proxy/log-helpers.d.ts +11 -4
  25. package/dist/proxy/log-helpers.js +5 -3
  26. package/dist/proxy/openai.js +18 -34
  27. package/dist/proxy/orchestrator.d.ts +52 -0
  28. package/dist/proxy/orchestrator.js +100 -0
  29. package/dist/proxy/proxy-core.d.ts +14 -26
  30. package/dist/proxy/proxy-core.js +40 -337
  31. package/dist/proxy/proxy-handler.d.ts +18 -0
  32. package/dist/proxy/proxy-handler.js +223 -0
  33. package/dist/proxy/proxy-logging.d.ts +28 -0
  34. package/dist/proxy/proxy-logging.js +122 -0
  35. package/dist/proxy/resilience.d.ts +63 -0
  36. package/dist/proxy/resilience.js +188 -0
  37. package/dist/proxy/scope.d.ts +18 -0
  38. package/dist/proxy/scope.js +37 -0
  39. package/dist/proxy/semaphore.d.ts +9 -2
  40. package/dist/proxy/semaphore.js +34 -7
  41. package/dist/proxy/stream-proxy.d.ts +7 -0
  42. package/dist/proxy/stream-proxy.js +263 -0
  43. package/dist/proxy/{upstream-call.d.ts → transport.d.ts} +25 -18
  44. package/dist/proxy/transport.js +128 -0
  45. package/dist/proxy/types.d.ts +58 -0
  46. package/dist/proxy/types.js +30 -0
  47. package/frontend-dist/assets/{CardContent-CucI6u41.js → CardContent-CTnwqTdL.js} +1 -1
  48. package/frontend-dist/assets/{CardHeader-d-DYsWxe.js → CardHeader-CfUeY7tk.js} +1 -1
  49. package/frontend-dist/assets/{CardTitle-CIDEQkWB.js → CardTitle-CWiDwWqd.js} +1 -1
  50. package/frontend-dist/assets/{Checkbox-CybCw3zS.js → Checkbox-BxNz70R_.js} +1 -1
  51. package/frontend-dist/assets/{CollapsibleTrigger-BFNhb19_.js → CollapsibleTrigger-Uz1aGdtH.js} +1 -1
  52. package/frontend-dist/assets/{Collection-DUBb4r6h.js → Collection-1EHC87X5.js} +1 -1
  53. package/frontend-dist/assets/{Dashboard-DLB6iqH1.js → Dashboard-C3FL30UN.js} +2 -2
  54. package/frontend-dist/assets/{DialogTitle-Dq-5o7nJ.js → DialogTitle-CAOFxr83.js} +1 -1
  55. package/frontend-dist/assets/{Input-HN3Il0-c.js → Input-DRIid2C6.js} +1 -1
  56. package/frontend-dist/assets/{Label-CXAeFn-r.js → Label-UyNN2jyE.js} +1 -1
  57. package/frontend-dist/assets/LogDetailDialog-8BT4vIlV.js +3 -0
  58. package/frontend-dist/assets/{Login-Br3qsdxf.js → Login-CnzH6TdS.js} +1 -1
  59. package/frontend-dist/assets/Logs-CbK8NB_X.js +1 -0
  60. package/frontend-dist/assets/{ModelMappings-DXC0sNH5.js → ModelMappings-DeRFgsYG.js} +1 -1
  61. package/frontend-dist/assets/Monitor-Dd80bdUn.js +1 -0
  62. package/frontend-dist/assets/{PopperContent-CnZejY31.js → PopperContent-B3fZao7v.js} +1 -1
  63. package/frontend-dist/assets/{Providers-8CHhW4uH.js → Providers-B_DbV-_y.js} +1 -1
  64. package/frontend-dist/assets/ProxyEnhancement-up1fnPzq.js +5 -0
  65. package/frontend-dist/assets/RetryRules-Dkuhjh0u.js +1 -0
  66. package/frontend-dist/assets/RouterKeys-CvMMAa4t.js +1 -0
  67. package/frontend-dist/assets/{RovingFocusItem-B7ZIkplZ.js → RovingFocusItem-X0bfqWWS.js} +1 -1
  68. package/frontend-dist/assets/{SelectValue-B32pgmTJ.js → SelectValue-zO8t-tx1.js} +1 -1
  69. package/frontend-dist/assets/{Setup-Df9IQo2x.js → Setup-ByT2ThOQ.js} +1 -1
  70. package/frontend-dist/assets/{Switch-CLeo7H6d.js → Switch-BEMjVugO.js} +1 -1
  71. package/frontend-dist/assets/{TableHeader-BpscAtT3.js → TableHeader-DpHWSnxK.js} +1 -1
  72. package/frontend-dist/assets/{TabsTrigger-DErAbTuM.js → TabsTrigger-Db6RqsZc.js} +1 -1
  73. package/frontend-dist/assets/{VisuallyHidden-CJBR3YB3.js → VisuallyHidden-hs8pj8OP.js} +1 -1
  74. package/frontend-dist/assets/{VisuallyHiddenInput-Cy0VuE1l.js → VisuallyHiddenInput-1m0nNADN.js} +1 -1
  75. package/frontend-dist/assets/{alert-dialog-BAR1JRmT.js → alert-dialog-PP91kaO8.js} +1 -1
  76. package/frontend-dist/assets/{button-D54q76GQ.js → button-Dcc0gF5i.js} +1 -1
  77. package/frontend-dist/assets/{client-Mb8fy_bC.js → client-DIIo9zPK.js} +2 -2
  78. package/frontend-dist/assets/{createLucideIcon-CCmQ9QKM.js → createLucideIcon-DGZkBjcJ.js} +1 -1
  79. package/frontend-dist/assets/{dialog-DSH5k5Kj.js → dialog-CxSyR-fN.js} +1 -1
  80. package/frontend-dist/assets/format-CPdJtjZ5.js +1 -0
  81. package/frontend-dist/assets/index-BL-LAtac.css +1 -0
  82. package/frontend-dist/assets/{index-BQBtSfem.js → index-CvT41fGL.js} +1 -1
  83. package/frontend-dist/assets/{lib-BgOqOzXI.js → lib-Bl0OuBjh.js} +1 -1
  84. package/frontend-dist/assets/{ohash.D__AXeF1-p4vp6Svt.js → ohash.D__AXeF1-B64hB831.js} +1 -1
  85. package/frontend-dist/assets/{useClipboard-DO-38TXr.js → useClipboard-CWc1cTDo.js} +1 -1
  86. package/frontend-dist/assets/{useForwardExpose-CzQFheaD.js → useForwardExpose-AkE0lq8y.js} +1 -1
  87. package/frontend-dist/assets/useNonce-DGyPxdjq.js +1 -0
  88. package/frontend-dist/assets/x-BuUpx9Fr.js +1 -0
  89. package/frontend-dist/index.html +7 -7
  90. package/package.json +1 -1
  91. package/dist/admin/services.d.ts +0 -7
  92. package/dist/admin/services.js +0 -63
  93. package/dist/proxy/retry.d.ts +0 -43
  94. package/dist/proxy/retry.js +0 -121
  95. package/dist/proxy/upstream-call.js +0 -208
  96. package/frontend-dist/assets/LogResponseViewer-CyBzv02a.js +0 -3
  97. package/frontend-dist/assets/Logs-Cu_IftdS.js +0 -1
  98. package/frontend-dist/assets/Monitor-CKlid1sC.js +0 -1
  99. package/frontend-dist/assets/ProxyEnhancement-CkYeXwgH.js +0 -5
  100. package/frontend-dist/assets/RetryRules-Csb7u9W4.js +0 -1
  101. package/frontend-dist/assets/RouterKeys-C6YIufmj.js +0 -1
  102. package/frontend-dist/assets/index-H-lnTkMr.css +0 -1
  103. package/frontend-dist/assets/useNonce-CU-NirfM.js +0 -1
  104. package/frontend-dist/assets/x-DEJ1xpi5.js +0 -1
@@ -0,0 +1,122 @@
1
+ import { randomUUID } from "crypto";
2
+ import { insertRequestLog, insertMetrics } from "../db/index.js";
3
+ import { insertSuccessLog } from "./log-helpers.js";
4
+ import { MetricsExtractor } from "../metrics/metrics-extractor.js";
5
+ // Re-export shared constants & types from types.ts (canonical home)
6
+ export { UPSTREAM_SUCCESS } from "./types.js";
7
+ // Internal imports from types.ts
8
+ import { UPSTREAM_SUCCESS } from "./types.js";
9
+ import { HTTP_BAD_GATEWAY } from "../constants.js";
10
+ // ---------- Header sanitization ----------
11
+ const AUTH_HEADER_RE = /^authorization$/i;
12
+ /** 日志存储前脱敏 Authorization header,避免 API Key 被持久化 */
13
+ export function sanitizeHeadersForLog(headers) {
14
+ const sanitized = {};
15
+ for (const [key, value] of Object.entries(headers)) {
16
+ sanitized[key] = AUTH_HEADER_RE.test(key) ? value.replace(/(Bearer\s+)\S+/, "$1sk-***") : value;
17
+ }
18
+ return sanitized;
19
+ }
20
+ // ---------- Logging helpers (extracted from proxy-core) ----------
21
+ export function handleIntercept(db, apiType, request, reply, interceptResponse, clientModel) {
22
+ const logId = randomUUID();
23
+ const isStream = request.body.stream === true;
24
+ const respBody = JSON.stringify(interceptResponse.body);
25
+ insertRequestLog(db, {
26
+ id: logId, api_type: apiType, model: clientModel, provider_id: "router",
27
+ status_code: interceptResponse.statusCode, latency_ms: 0,
28
+ is_stream: isStream ? 1 : 0, error_message: null,
29
+ created_at: new Date().toISOString(),
30
+ request_body: JSON.stringify(request.body),
31
+ response_body: respBody,
32
+ client_request: JSON.stringify({ headers: request.headers, body: request.body }),
33
+ upstream_request: interceptResponse.meta ? JSON.stringify(interceptResponse.meta) : null,
34
+ client_response: JSON.stringify({ statusCode: interceptResponse.statusCode, body: respBody }),
35
+ is_retry: 0, is_failover: 0, original_request_id: null,
36
+ router_key_id: request.routerKey?.id ?? null, original_model: null,
37
+ });
38
+ return reply.status(interceptResponse.statusCode).send(interceptResponse.body);
39
+ }
40
+ // ---------- New-architecture logging ----------
41
+ export function logResilienceResult(db, params, attempts, result, startTime) {
42
+ const isFailoverIteration = params.failover?.isFailoverIteration ?? false;
43
+ const rootLogId = params.failover?.rootLogId ?? params.logId;
44
+ let lastSuccessLogId = params.logId;
45
+ for (const attempt of attempts) {
46
+ const isOriginal = attempt.attemptIndex === 0;
47
+ const attemptLogId = isOriginal ? params.logId : randomUUID();
48
+ const isFailoverLog = isOriginal && isFailoverIteration;
49
+ const parentId = isOriginal ? (isFailoverIteration ? rootLogId : null) : params.logId;
50
+ if (attempt.error) {
51
+ insertRequestLog(db, {
52
+ id: attemptLogId, api_type: params.apiType, model: params.model,
53
+ provider_id: attempt.target.provider_id,
54
+ status_code: HTTP_BAD_GATEWAY, latency_ms: attempt.latencyMs,
55
+ is_stream: params.isStream ? 1 : 0, error_message: attempt.error,
56
+ created_at: new Date().toISOString(), request_body: params.reqBodyStr,
57
+ client_request: params.clientReq, upstream_request: params.upstreamReqBase,
58
+ is_retry: isOriginal ? 0 : 1, is_failover: isFailoverLog ? 1 : 0,
59
+ original_request_id: parentId,
60
+ router_key_id: params.routerKeyId, original_model: params.originalModel,
61
+ });
62
+ }
63
+ else if (attempt.statusCode !== UPSTREAM_SUCCESS) {
64
+ insertRequestLog(db, {
65
+ id: attemptLogId, api_type: params.apiType, model: params.model,
66
+ provider_id: attempt.target.provider_id,
67
+ status_code: attempt.statusCode, latency_ms: attempt.latencyMs,
68
+ is_stream: params.isStream ? 1 : 0, error_message: null,
69
+ created_at: new Date().toISOString(), request_body: params.reqBodyStr,
70
+ response_body: attempt.responseBody,
71
+ client_request: params.clientReq, upstream_request: params.upstreamReqBase,
72
+ upstream_response: JSON.stringify({ statusCode: attempt.statusCode, body: attempt.responseBody }),
73
+ client_response: JSON.stringify({ statusCode: attempt.statusCode, body: attempt.responseBody }),
74
+ is_retry: isOriginal ? 0 : 1, is_failover: isFailoverLog ? 1 : 0,
75
+ original_request_id: parentId,
76
+ router_key_id: params.routerKeyId, original_model: params.originalModel,
77
+ });
78
+ }
79
+ else {
80
+ const upHdrs = (result.kind === "stream_success" || result.kind === "stream_abort")
81
+ ? (result.upstreamResponseHeaders ?? {})
82
+ : ("headers" in result ? result.headers : {});
83
+ insertSuccessLog(db, {
84
+ apiType: params.apiType, model: params.model,
85
+ provider: { id: attempt.target.provider_id },
86
+ isStream: params.isStream, startTime,
87
+ reqBody: params.reqBodyStr, clientReq: params.clientReq,
88
+ upstreamReq: params.upstreamReqBase, id: attemptLogId,
89
+ status: attempt.statusCode, respBody: attempt.responseBody,
90
+ upHdrs, cliHdrs: upHdrs,
91
+ isRetry: !isOriginal, isFailover: isFailoverLog,
92
+ originalRequestId: parentId,
93
+ routerKeyId: params.routerKeyId, originalModel: params.originalModel,
94
+ });
95
+ lastSuccessLogId = attemptLogId;
96
+ }
97
+ }
98
+ return lastSuccessLogId;
99
+ }
100
+ export function collectTransportMetrics(db, apiType, result, isStream, lastSuccessLogId, providerId, backendModel, request) {
101
+ const base = { request_log_id: lastSuccessLogId, provider_id: providerId, backend_model: backendModel, api_type: apiType };
102
+ try {
103
+ if (isStream && (result.kind === "stream_success" || result.kind === "stream_abort")) {
104
+ if (result.metrics) {
105
+ insertMetrics(db, { ...base, ...result.metrics });
106
+ return;
107
+ }
108
+ }
109
+ else if (result.kind === "success") {
110
+ const mr = MetricsExtractor.fromNonStreamResponse(apiType, result.body);
111
+ if (mr) {
112
+ insertMetrics(db, { ...base, ...mr });
113
+ return;
114
+ }
115
+ }
116
+ // 无法提取完整 metrics 的 fallback,标记为未完成
117
+ insertMetrics(db, { ...base, is_complete: 0 });
118
+ }
119
+ catch (err) {
120
+ request.log.error({ err }, "Failed to insert metrics");
121
+ }
122
+ }
@@ -0,0 +1,63 @@
1
+ import type { RetryRuleMatcher } from "./retry-rules.js";
2
+ import type { TransportResult } from "./types.js";
3
+ import type { Target } from "./strategy/types.js";
4
+ export interface RetryStrategy {
5
+ getDelay(attempt: number): number;
6
+ }
7
+ export declare class FixedIntervalStrategy implements RetryStrategy {
8
+ private delayMs;
9
+ constructor(delayMs: number);
10
+ getDelay(): number;
11
+ }
12
+ export declare class ExponentialBackoffStrategy implements RetryStrategy {
13
+ private baseMs;
14
+ private capMs;
15
+ constructor(baseMs: number, capMs: number);
16
+ getDelay(attempt: number): number;
17
+ }
18
+ export declare function createStrategy(rule: {
19
+ retry_strategy: string;
20
+ retry_delay_ms: number;
21
+ max_delay_ms: number;
22
+ }): RetryStrategy;
23
+ export interface ResilienceConfig {
24
+ maxRetries: number;
25
+ baseDelayMs: number;
26
+ failoverThreshold: number;
27
+ ruleMatcher?: RetryRuleMatcher;
28
+ isFailover: boolean;
29
+ }
30
+ export interface ResilienceAttempt {
31
+ target: Target;
32
+ attemptIndex: number;
33
+ statusCode: number | null;
34
+ error: string | null;
35
+ latencyMs: number;
36
+ responseBody: string | null;
37
+ }
38
+ export interface ResilienceResult {
39
+ result: TransportResult;
40
+ attempts: ResilienceAttempt[];
41
+ excludedTargets: Target[];
42
+ }
43
+ export type ResilienceDecision = {
44
+ action: "done";
45
+ } | {
46
+ action: "retry";
47
+ delayMs: number;
48
+ } | {
49
+ action: "failover";
50
+ excludeTarget: Target;
51
+ } | {
52
+ action: "abort";
53
+ reason: string;
54
+ };
55
+ export interface ResilienceState {
56
+ attemptCount: number;
57
+ currentTarget: Target;
58
+ excludedTargets: Target[];
59
+ }
60
+ export declare class ResilienceLayer {
61
+ decide(result: TransportResult, state: ResilienceState, config: ResilienceConfig): ResilienceDecision;
62
+ execute(targets: () => Target[], fn: (target: Target) => Promise<TransportResult>, config: ResilienceConfig): Promise<ResilienceResult>;
63
+ }
@@ -0,0 +1,188 @@
1
+ import { ProviderSwitchNeeded } from "./types.js";
2
+ export class FixedIntervalStrategy {
3
+ delayMs;
4
+ constructor(delayMs) {
5
+ this.delayMs = delayMs;
6
+ }
7
+ getDelay() { return this.delayMs; }
8
+ }
9
+ const EXPONENTIAL_BASE = 2;
10
+ export class ExponentialBackoffStrategy {
11
+ baseMs;
12
+ capMs;
13
+ constructor(baseMs, capMs) {
14
+ this.baseMs = baseMs;
15
+ this.capMs = capMs;
16
+ }
17
+ getDelay(attempt) {
18
+ return Math.min(this.baseMs * EXPONENTIAL_BASE ** attempt, this.capMs);
19
+ }
20
+ }
21
+ export function createStrategy(rule) {
22
+ if (rule.retry_strategy === "fixed")
23
+ return new FixedIntervalStrategy(rule.retry_delay_ms);
24
+ return new ExponentialBackoffStrategy(rule.retry_delay_ms, rule.max_delay_ms);
25
+ }
26
+ // ---------- Constants ----------
27
+ const RETRYABLE_THROW_CODES = new Set(["ETIMEDOUT", "ECONNRESET", "ECONNREFUSED"]);
28
+ const HTTP_TOO_MANY_REQUESTS = 429;
29
+ const MS_PER_SECOND = 1000;
30
+ // ---------- Internal helpers ----------
31
+ function isRetryableThrow(err) {
32
+ if (err && typeof err === "object" && "code" in err) {
33
+ return RETRYABLE_THROW_CODES.has(err.code ?? "");
34
+ }
35
+ return false;
36
+ }
37
+ function sleep(ms) {
38
+ return new Promise((resolve) => setTimeout(resolve, ms));
39
+ }
40
+ function parseRetryAfter(headers) {
41
+ if (!headers)
42
+ return null;
43
+ const val = headers["retry-after"] ?? headers["Retry-After"];
44
+ if (!val)
45
+ return null;
46
+ const seconds = parseInt(val, 10);
47
+ return isNaN(seconds) ? null : seconds * MS_PER_SECOND;
48
+ }
49
+ function extractBody(result) {
50
+ if ("body" in result)
51
+ return result.body;
52
+ return null;
53
+ }
54
+ function extractHeaders(result) {
55
+ if ("headers" in result)
56
+ return result.headers;
57
+ return undefined;
58
+ }
59
+ // ---------- ResilienceLayer ----------
60
+ export class ResilienceLayer {
61
+ decide(result, state, config) {
62
+ // stream_abort -> 不可恢复
63
+ if (result.kind === "stream_abort") {
64
+ return { action: "abort", reason: "stream_abort" };
65
+ }
66
+ // success + statusCode < failoverThreshold -> done
67
+ if ((result.kind === "success" || result.kind === "stream_success") &&
68
+ result.statusCode < config.failoverThreshold) {
69
+ return { action: "done" };
70
+ }
71
+ // throw -> 网络异常
72
+ if (result.kind === "throw") {
73
+ if (!isRetryableThrow(result.error)) {
74
+ return { action: "abort", reason: result.error.message };
75
+ }
76
+ if (state.attemptCount < config.maxRetries) {
77
+ return { action: "retry", delayMs: config.baseDelayMs };
78
+ }
79
+ return config.isFailover
80
+ ? { action: "failover", excludeTarget: state.currentTarget }
81
+ : { action: "abort", reason: "throw exhausted retries" };
82
+ }
83
+ // statusCode >= failoverThreshold -> retry or failover
84
+ if (result.statusCode >= config.failoverThreshold) {
85
+ const body = extractBody(result);
86
+ const matchedRule = body && config.ruleMatcher
87
+ ? config.ruleMatcher.match(result.statusCode, body)
88
+ : null;
89
+ const effectiveMaxRetries = Math.min(matchedRule?.max_retries ?? 0, config.maxRetries);
90
+ if (matchedRule && state.attemptCount < effectiveMaxRetries) {
91
+ const strategy = createStrategy(matchedRule);
92
+ const headers = extractHeaders(result);
93
+ const retryAfterMs = result.statusCode === HTTP_TOO_MANY_REQUESTS
94
+ ? parseRetryAfter(headers) : null;
95
+ const delay = Math.max(strategy.getDelay(state.attemptCount), retryAfterMs ?? 0);
96
+ return { action: "retry", delayMs: delay };
97
+ }
98
+ return config.isFailover
99
+ ? { action: "failover", excludeTarget: state.currentTarget }
100
+ : { action: "done" };
101
+ }
102
+ // 其他响应(< failoverThreshold 的非成功) -> 仅当 rule 匹配才 retry
103
+ const body = extractBody(result);
104
+ if (body && config.ruleMatcher) {
105
+ const matchedRule = config.ruleMatcher.match(result.statusCode, body);
106
+ const effectiveMaxRetries = Math.min(matchedRule?.max_retries ?? 0, config.maxRetries);
107
+ if (matchedRule && state.attemptCount < effectiveMaxRetries) {
108
+ const strategy = createStrategy(matchedRule);
109
+ return { action: "retry", delayMs: strategy.getDelay(state.attemptCount) };
110
+ }
111
+ }
112
+ return { action: "done" };
113
+ }
114
+ async execute(targets, fn, config) {
115
+ const allAttempts = [];
116
+ const excludedTargets = [];
117
+ const perTargetCounts = new Map();
118
+ let globalAttemptIndex = 0;
119
+ let lastResult;
120
+ const targetKey = (t) => `${t.provider_id}:${t.backend_model}`;
121
+ const getTargetCount = (t) => perTargetCounts.get(targetKey(t)) ?? 0;
122
+ const incrementTarget = (t) => {
123
+ perTargetCounts.set(targetKey(t), (perTargetCounts.get(targetKey(t)) ?? 0) + 1);
124
+ };
125
+ while (true) {
126
+ const available = targets().filter(t => !excludedTargets.some(e => e.backend_model === t.backend_model && e.provider_id === t.provider_id));
127
+ if (available.length === 0) {
128
+ return {
129
+ result: lastResult ?? { kind: "error", statusCode: 502, body: "All targets exhausted", headers: {}, sentHeaders: {}, sentBody: "" },
130
+ attempts: allAttempts,
131
+ excludedTargets,
132
+ };
133
+ }
134
+ const currentTarget = available[0];
135
+ incrementTarget(currentTarget);
136
+ const start = Date.now();
137
+ let transportResult;
138
+ try {
139
+ transportResult = await fn(currentTarget);
140
+ }
141
+ catch (err) {
142
+ const errMsg = err instanceof Error ? err.message : String(err);
143
+ transportResult = { kind: "throw", error: err instanceof Error ? err : new Error(errMsg) };
144
+ }
145
+ lastResult = transportResult;
146
+ if (transportResult.kind === "throw") {
147
+ const throwErr = transportResult.error;
148
+ allAttempts.push({
149
+ target: currentTarget, attemptIndex: globalAttemptIndex,
150
+ statusCode: null, error: throwErr instanceof Error ? throwErr.message : String(throwErr),
151
+ latencyMs: Date.now() - start, responseBody: null,
152
+ });
153
+ }
154
+ else {
155
+ allAttempts.push({
156
+ target: currentTarget, attemptIndex: globalAttemptIndex,
157
+ statusCode: transportResult.statusCode, error: null,
158
+ latencyMs: Date.now() - start, responseBody: extractBody(transportResult),
159
+ });
160
+ }
161
+ const state = {
162
+ attemptCount: getTargetCount(currentTarget) - 1,
163
+ currentTarget,
164
+ excludedTargets,
165
+ };
166
+ const decision = this.decide(transportResult, state, config);
167
+ switch (decision.action) {
168
+ case "done":
169
+ return { result: transportResult, attempts: allAttempts, excludedTargets };
170
+ case "retry":
171
+ globalAttemptIndex++;
172
+ await sleep(decision.delayMs);
173
+ continue;
174
+ case "failover":
175
+ excludedTargets.push(decision.excludeTarget);
176
+ globalAttemptIndex++;
177
+ // 跨 provider failover 需要切换信号量,抛出异常让上层处理
178
+ const nextAvail = targets().filter(t => !excludedTargets.some(e => e.backend_model === t.backend_model && e.provider_id === t.provider_id));
179
+ if (nextAvail.length > 0 && nextAvail[0].provider_id !== currentTarget.provider_id) {
180
+ throw new ProviderSwitchNeeded(nextAvail[0].provider_id);
181
+ }
182
+ continue;
183
+ case "abort":
184
+ return { result: transportResult, attempts: allAttempts, excludedTargets };
185
+ }
186
+ }
187
+ }
188
+ }
@@ -0,0 +1,18 @@
1
+ import type { ProviderSemaphoreManager } from "./semaphore.js";
2
+ import type { RequestTracker } from "../monitor/request-tracker.js";
3
+ import type { ActiveRequest } from "../monitor/types.js";
4
+ export declare class SemaphoreScope {
5
+ private manager;
6
+ constructor(manager: ProviderSemaphoreManager);
7
+ withSlot<T>(providerId: string, signal: AbortSignal, onQueued: () => void, fn: () => Promise<T>): Promise<T>;
8
+ }
9
+ export declare class TrackerScope {
10
+ private tracker;
11
+ constructor(tracker: RequestTracker);
12
+ track<T>(req: ActiveRequest, fn: () => Promise<T>, extractStatus: (result: T) => {
13
+ status: "completed" | "failed";
14
+ statusCode?: number;
15
+ }): Promise<T>;
16
+ /** 通知 tracker 请求进入/离开信号量队列,触发前端即时广播 */
17
+ markQueued(id: string, queued: boolean): void;
18
+ }
@@ -0,0 +1,37 @@
1
+ export class SemaphoreScope {
2
+ manager;
3
+ constructor(manager) {
4
+ this.manager = manager;
5
+ }
6
+ async withSlot(providerId, signal, onQueued, fn) {
7
+ const token = await this.manager.acquire(providerId, signal, onQueued);
8
+ try {
9
+ return await fn();
10
+ }
11
+ finally {
12
+ this.manager.release(providerId, token);
13
+ }
14
+ }
15
+ }
16
+ export class TrackerScope {
17
+ tracker;
18
+ constructor(tracker) {
19
+ this.tracker = tracker;
20
+ }
21
+ async track(req, fn, extractStatus) {
22
+ this.tracker.start(req);
23
+ try {
24
+ const result = await fn();
25
+ this.tracker.complete(req.id, extractStatus(result));
26
+ return result;
27
+ }
28
+ catch (e) {
29
+ this.tracker.complete(req.id, { status: "failed" });
30
+ throw e;
31
+ }
32
+ }
33
+ /** 通知 tracker 请求进入/离开信号量队列,触发前端即时广播 */
34
+ markQueued(id, queued) {
35
+ this.tracker.update(id, { queued });
36
+ }
37
+ }
@@ -12,12 +12,19 @@ interface ConcurrencyConfig {
12
12
  queueTimeoutMs: number;
13
13
  maxQueueSize: number;
14
14
  }
15
+ export interface SemaphoreLogger {
16
+ debug(obj: Record<string, unknown>, msg: string): void;
17
+ warn(obj: Record<string, unknown>, msg: string): void;
18
+ }
19
+ export interface AcquireToken {
20
+ readonly generation: number;
21
+ }
15
22
  export declare class ProviderSemaphoreManager {
16
23
  private readonly entries;
17
24
  private getOrCreate;
18
25
  updateConfig(providerId: string, config: ConcurrencyConfig): void;
19
- acquire(providerId: string, signal?: AbortSignal, onQueued?: () => void): Promise<void>;
20
- release(providerId: string): void;
26
+ acquire(providerId: string, signal?: AbortSignal, onQueued?: () => void, logger?: SemaphoreLogger): Promise<AcquireToken>;
27
+ release(providerId: string, token: AcquireToken, logger?: SemaphoreLogger): void;
21
28
  getStatus(providerId: string): {
22
29
  active: number;
23
30
  queued: number;
@@ -25,6 +25,7 @@ export class ProviderSemaphoreManager {
25
25
  config: { maxConcurrency: 0, queueTimeoutMs: 0, maxQueueSize: 0 },
26
26
  current: 0,
27
27
  queue: [],
28
+ generation: 0,
28
29
  };
29
30
  this.entries.set(providerId, entry);
30
31
  }
@@ -34,17 +35,19 @@ export class ProviderSemaphoreManager {
34
35
  const entry = this.getOrCreate(providerId);
35
36
  entry.config = config;
36
37
  if (config.maxConcurrency === 0) {
37
- // Admin disabled throttling — drain queue without counting, reset current
38
- // because no tracking is needed when maxConcurrency=0
39
38
  while (entry.queue.length > 0) {
40
39
  const e = entry.queue.shift();
41
40
  if (e.timer)
42
41
  clearTimeout(e.timer);
43
42
  e.resolve();
44
43
  }
44
+ // 递增 generation,使当前所有持有旧 token 的 release() 调用失效
45
+ entry.generation++;
45
46
  entry.current = 0;
46
47
  return;
47
48
  }
49
+ if (entry.current < 0)
50
+ entry.current = 0;
48
51
  while (entry.current < config.maxConcurrency &&
49
52
  entry.queue.length > 0) {
50
53
  entry.current++;
@@ -54,21 +57,35 @@ export class ProviderSemaphoreManager {
54
57
  e.resolve();
55
58
  }
56
59
  }
57
- async acquire(providerId, signal, onQueued) {
60
+ async acquire(providerId, signal, onQueued, logger) {
58
61
  const entry = this.getOrCreate(providerId);
59
62
  const { maxConcurrency, queueTimeoutMs, maxQueueSize } = entry.config;
60
63
  if (maxConcurrency === 0)
61
- return;
64
+ return { generation: entry.generation };
62
65
  if (entry.current < maxConcurrency) {
63
66
  entry.current++;
64
- return;
67
+ logger?.debug({ providerId, current: entry.current, maxConcurrency, action: "acquire_direct" }, "Semaphore: acquired directly");
68
+ return { generation: entry.generation };
65
69
  }
66
70
  if (entry.queue.length >= maxQueueSize) {
71
+ logger?.debug({ providerId, queueLength: entry.queue.length, maxQueueSize, action: "acquire_rejected" }, "Semaphore: queue full, rejecting");
67
72
  throw new SemaphoreQueueFullError(providerId);
68
73
  }
74
+ logger?.debug({ providerId, current: entry.current, maxConcurrency, queueLength: entry.queue.length, action: "acquire_queued" }, "Semaphore: entering wait queue");
69
75
  onQueued?.();
70
76
  return new Promise((resolve, reject) => {
71
- const qe = { resolve, reject, timer: null };
77
+ const token = { generation: entry.generation };
78
+ const qe = {
79
+ resolve: () => {
80
+ logger?.debug({ providerId, current: entry.current, maxConcurrency, queueLength: entry.queue.length, action: "acquire_resolved" }, "Semaphore: left wait queue, acquired");
81
+ resolve(token);
82
+ },
83
+ reject: (err) => {
84
+ logger?.debug({ providerId, action: "acquire_rejected_internal", error: err.message }, "Semaphore: wait queue entry rejected");
85
+ reject(err);
86
+ },
87
+ timer: null,
88
+ };
72
89
  if (queueTimeoutMs > 0) {
73
90
  qe.timer = setTimeout(() => {
74
91
  const idx = entry.queue.indexOf(qe);
@@ -91,18 +108,28 @@ export class ProviderSemaphoreManager {
91
108
  entry.queue.push(qe);
92
109
  });
93
110
  }
94
- release(providerId) {
111
+ release(providerId, token, logger) {
95
112
  const entry = this.entries.get(providerId);
96
113
  if (!entry)
97
114
  return;
115
+ // maxConcurrency=0 时 acquire 不计数,release 也不应递减
116
+ if (entry.config.maxConcurrency === 0)
117
+ return;
118
+ // generation 不匹配说明此请求在 updateConfig 重置前 acquire,其槽位已被回收
119
+ if (token.generation !== entry.generation) {
120
+ logger?.debug({ providerId, tokenGen: token.generation, currentGen: entry.generation, action: "release_stale" }, "Semaphore: stale token, skipping release");
121
+ return;
122
+ }
98
123
  if (entry.queue.length > 0) {
99
124
  const e = entry.queue.shift();
125
+ logger?.debug({ providerId, current: entry.current, maxConcurrency: entry.config.maxConcurrency, queueRemaining: entry.queue.length, action: "release_dequeue" }, "Semaphore: released, dequeued next waiter");
100
126
  if (e.timer)
101
127
  clearTimeout(e.timer);
102
128
  e.resolve();
103
129
  }
104
130
  else {
105
131
  entry.current--;
132
+ logger?.debug({ providerId, current: entry.current, maxConcurrency: entry.config.maxConcurrency, action: "release_decrement" }, "Semaphore: released slot");
106
133
  }
107
134
  }
108
135
  getStatus(providerId) {
@@ -0,0 +1,7 @@
1
+ import type { FastifyReply } from "fastify";
2
+ import type { RawHeaders, TransportResult } from "./types.js";
3
+ import type { SSEMetricsTransform } from "../metrics/sse-metrics-transform.js";
4
+ import { type BuildHeadersFn } from "./transport.js";
5
+ export declare function callStream(backend: {
6
+ base_url: string;
7
+ }, apiKey: string, body: Record<string, unknown>, clientHeaders: RawHeaders, reply: FastifyReply, timeoutMs: number, upstreamPath: string, buildHeaders: BuildHeadersFn, metricsTransform?: SSEMetricsTransform, checkEarlyError?: (bufferedData: string) => boolean, compatResolve?: (result: TransportResult) => void): Promise<TransportResult>;