llm-simple-router 0.3.7 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/README.md +81 -49
  2. package/dist/admin/constants.d.ts +1 -8
  3. package/dist/admin/constants.js +2 -8
  4. package/dist/admin/logs.js +18 -3
  5. package/dist/admin/router-keys.js +1 -2
  6. package/dist/cli.js +0 -0
  7. package/dist/constants.d.ts +8 -0
  8. package/dist/constants.js +9 -0
  9. package/dist/db/index.d.ts +4 -4
  10. package/dist/db/index.js +2 -2
  11. package/dist/db/logs.d.ts +18 -33
  12. package/dist/db/logs.js +40 -17
  13. package/dist/db/metrics.d.ts +33 -0
  14. package/dist/db/metrics.js +7 -0
  15. package/dist/db/migrations/018_add_failover_field.sql +2 -0
  16. package/dist/db/retry-rules.d.ts +2 -2
  17. package/dist/db/retry-rules.js +26 -13
  18. package/dist/index.js +3 -5
  19. package/dist/monitor/request-tracker.d.ts +6 -0
  20. package/dist/monitor/request-tracker.js +23 -54
  21. package/dist/monitor/stream-extractor.d.ts +11 -0
  22. package/dist/monitor/stream-extractor.js +51 -0
  23. package/dist/proxy/anthropic.js +19 -32
  24. package/dist/proxy/log-helpers.d.ts +11 -4
  25. package/dist/proxy/log-helpers.js +5 -3
  26. package/dist/proxy/openai.js +18 -34
  27. package/dist/proxy/orchestrator.d.ts +52 -0
  28. package/dist/proxy/orchestrator.js +100 -0
  29. package/dist/proxy/proxy-core.d.ts +14 -26
  30. package/dist/proxy/proxy-core.js +40 -337
  31. package/dist/proxy/proxy-handler.d.ts +18 -0
  32. package/dist/proxy/proxy-handler.js +223 -0
  33. package/dist/proxy/proxy-logging.d.ts +28 -0
  34. package/dist/proxy/proxy-logging.js +122 -0
  35. package/dist/proxy/resilience.d.ts +63 -0
  36. package/dist/proxy/resilience.js +188 -0
  37. package/dist/proxy/scope.d.ts +18 -0
  38. package/dist/proxy/scope.js +37 -0
  39. package/dist/proxy/semaphore.d.ts +9 -2
  40. package/dist/proxy/semaphore.js +34 -7
  41. package/dist/proxy/stream-proxy.d.ts +7 -0
  42. package/dist/proxy/stream-proxy.js +263 -0
  43. package/dist/proxy/{upstream-call.d.ts → transport.d.ts} +25 -18
  44. package/dist/proxy/transport.js +128 -0
  45. package/dist/proxy/types.d.ts +58 -0
  46. package/dist/proxy/types.js +30 -0
  47. package/frontend-dist/assets/{CardContent-CucI6u41.js → CardContent-CTnwqTdL.js} +1 -1
  48. package/frontend-dist/assets/{CardHeader-d-DYsWxe.js → CardHeader-CfUeY7tk.js} +1 -1
  49. package/frontend-dist/assets/{CardTitle-CIDEQkWB.js → CardTitle-CWiDwWqd.js} +1 -1
  50. package/frontend-dist/assets/{Checkbox-CybCw3zS.js → Checkbox-BxNz70R_.js} +1 -1
  51. package/frontend-dist/assets/{CollapsibleTrigger-BFNhb19_.js → CollapsibleTrigger-Uz1aGdtH.js} +1 -1
  52. package/frontend-dist/assets/{Collection-DUBb4r6h.js → Collection-1EHC87X5.js} +1 -1
  53. package/frontend-dist/assets/{Dashboard-DLB6iqH1.js → Dashboard-C3FL30UN.js} +2 -2
  54. package/frontend-dist/assets/{DialogTitle-Dq-5o7nJ.js → DialogTitle-CAOFxr83.js} +1 -1
  55. package/frontend-dist/assets/{Input-HN3Il0-c.js → Input-DRIid2C6.js} +1 -1
  56. package/frontend-dist/assets/{Label-CXAeFn-r.js → Label-UyNN2jyE.js} +1 -1
  57. package/frontend-dist/assets/LogDetailDialog-8BT4vIlV.js +3 -0
  58. package/frontend-dist/assets/{Login-Br3qsdxf.js → Login-CnzH6TdS.js} +1 -1
  59. package/frontend-dist/assets/Logs-CbK8NB_X.js +1 -0
  60. package/frontend-dist/assets/{ModelMappings-DXC0sNH5.js → ModelMappings-DeRFgsYG.js} +1 -1
  61. package/frontend-dist/assets/Monitor-Dd80bdUn.js +1 -0
  62. package/frontend-dist/assets/{PopperContent-CnZejY31.js → PopperContent-B3fZao7v.js} +1 -1
  63. package/frontend-dist/assets/{Providers-8CHhW4uH.js → Providers-B_DbV-_y.js} +1 -1
  64. package/frontend-dist/assets/ProxyEnhancement-up1fnPzq.js +5 -0
  65. package/frontend-dist/assets/RetryRules-Dkuhjh0u.js +1 -0
  66. package/frontend-dist/assets/RouterKeys-CvMMAa4t.js +1 -0
  67. package/frontend-dist/assets/{RovingFocusItem-B7ZIkplZ.js → RovingFocusItem-X0bfqWWS.js} +1 -1
  68. package/frontend-dist/assets/{SelectValue-B32pgmTJ.js → SelectValue-zO8t-tx1.js} +1 -1
  69. package/frontend-dist/assets/{Setup-Df9IQo2x.js → Setup-ByT2ThOQ.js} +1 -1
  70. package/frontend-dist/assets/{Switch-CLeo7H6d.js → Switch-BEMjVugO.js} +1 -1
  71. package/frontend-dist/assets/{TableHeader-BpscAtT3.js → TableHeader-DpHWSnxK.js} +1 -1
  72. package/frontend-dist/assets/{TabsTrigger-DErAbTuM.js → TabsTrigger-Db6RqsZc.js} +1 -1
  73. package/frontend-dist/assets/{VisuallyHidden-CJBR3YB3.js → VisuallyHidden-hs8pj8OP.js} +1 -1
  74. package/frontend-dist/assets/{VisuallyHiddenInput-Cy0VuE1l.js → VisuallyHiddenInput-1m0nNADN.js} +1 -1
  75. package/frontend-dist/assets/{alert-dialog-BAR1JRmT.js → alert-dialog-PP91kaO8.js} +1 -1
  76. package/frontend-dist/assets/{button-D54q76GQ.js → button-Dcc0gF5i.js} +1 -1
  77. package/frontend-dist/assets/{client-Mb8fy_bC.js → client-DIIo9zPK.js} +2 -2
  78. package/frontend-dist/assets/{createLucideIcon-CCmQ9QKM.js → createLucideIcon-DGZkBjcJ.js} +1 -1
  79. package/frontend-dist/assets/{dialog-DSH5k5Kj.js → dialog-CxSyR-fN.js} +1 -1
  80. package/frontend-dist/assets/format-CPdJtjZ5.js +1 -0
  81. package/frontend-dist/assets/index-BL-LAtac.css +1 -0
  82. package/frontend-dist/assets/{index-BQBtSfem.js → index-CvT41fGL.js} +1 -1
  83. package/frontend-dist/assets/{lib-BgOqOzXI.js → lib-Bl0OuBjh.js} +1 -1
  84. package/frontend-dist/assets/{ohash.D__AXeF1-p4vp6Svt.js → ohash.D__AXeF1-B64hB831.js} +1 -1
  85. package/frontend-dist/assets/{useClipboard-DO-38TXr.js → useClipboard-CWc1cTDo.js} +1 -1
  86. package/frontend-dist/assets/{useForwardExpose-CzQFheaD.js → useForwardExpose-AkE0lq8y.js} +1 -1
  87. package/frontend-dist/assets/useNonce-DGyPxdjq.js +1 -0
  88. package/frontend-dist/assets/x-BuUpx9Fr.js +1 -0
  89. package/frontend-dist/index.html +7 -7
  90. package/package.json +1 -1
  91. package/dist/admin/services.d.ts +0 -7
  92. package/dist/admin/services.js +0 -63
  93. package/dist/proxy/retry.d.ts +0 -43
  94. package/dist/proxy/retry.js +0 -121
  95. package/dist/proxy/upstream-call.js +0 -208
  96. package/frontend-dist/assets/LogResponseViewer-CyBzv02a.js +0 -3
  97. package/frontend-dist/assets/Logs-Cu_IftdS.js +0 -1
  98. package/frontend-dist/assets/Monitor-CKlid1sC.js +0 -1
  99. package/frontend-dist/assets/ProxyEnhancement-CkYeXwgH.js +0 -5
  100. package/frontend-dist/assets/RetryRules-Csb7u9W4.js +0 -1
  101. package/frontend-dist/assets/RouterKeys-C6YIufmj.js +0 -1
  102. package/frontend-dist/assets/index-H-lnTkMr.css +0 -1
  103. package/frontend-dist/assets/useNonce-CU-NirfM.js +0 -1
  104. package/frontend-dist/assets/x-DEJ1xpi5.js +0 -1
@@ -3,10 +3,15 @@ import { StatsAggregator } from "./stats-aggregator.js";
3
3
  import { RuntimeCollector } from "./runtime-collector.js";
4
4
  import type { ProviderSemaphoreManager } from "../proxy/semaphore.js";
5
5
  import type { ActiveRequest, ProviderConcurrencySnapshot, RuntimeMetrics, StatsSnapshot } from "./types.js";
6
+ export interface TrackerLogger {
7
+ debug(obj: Record<string, unknown>, msg: string): void;
8
+ warn(obj: Record<string, unknown>, msg: string): void;
9
+ }
6
10
  export declare class RequestTracker {
7
11
  private activeMap;
8
12
  private recentCompleted;
9
13
  private clients;
14
+ private logger?;
10
15
  private providerConfigCache;
11
16
  private pushTimer;
12
17
  private tickCount;
@@ -17,6 +22,7 @@ export declare class RequestTracker {
17
22
  constructor(deps?: {
18
23
  semaphoreManager?: ProviderSemaphoreManager;
19
24
  runtimeCollector?: RuntimeCollector;
25
+ logger?: TrackerLogger;
20
26
  });
21
27
  start(req: ActiveRequest): void;
22
28
  update(id: string, patch: Partial<ActiveRequest>): void;
@@ -1,56 +1,6 @@
1
1
  import { StatsAggregator } from "./stats-aggregator.js";
2
2
  import { RuntimeCollector } from "./runtime-collector.js";
3
- function extractStreamText(line, apiType) {
4
- const empty = { text: '', block: null };
5
- if (!line.startsWith(SSE_DATA_PREFIX))
6
- return empty;
7
- const jsonStr = line.slice(SSE_DATA_PREFIX.length);
8
- if (jsonStr === '[DONE]')
9
- return empty;
10
- let obj;
11
- try {
12
- obj = JSON.parse(jsonStr);
13
- }
14
- catch {
15
- return empty;
16
- }
17
- if (apiType === 'openai') {
18
- const choices = obj.choices;
19
- const delta = choices?.[0]?.delta;
20
- const text = delta?.content ?? '';
21
- return { text, block: text ? { index: 0, type: 'text', content: text } : null };
22
- }
23
- // Anthropic
24
- const type = obj.type;
25
- const index = obj.index;
26
- const delta = obj.delta;
27
- if (type === 'content_block_start') {
28
- const contentBlock = obj.content_block;
29
- const blockType = contentBlock?.type;
30
- const name = blockType === 'tool_use' ? contentBlock?.name : undefined;
31
- if (blockType === 'thinking' || blockType === 'text' || blockType === 'tool_use') {
32
- return { text: '', block: { index: index ?? 0, type: blockType, content: '', name } };
33
- }
34
- return empty;
35
- }
36
- if (type === 'content_block_delta' && delta) {
37
- const deltaType = delta.type;
38
- if (deltaType === 'thinking_delta') {
39
- const thinking = delta.thinking ?? '';
40
- return { text: '', block: { index: index ?? 0, type: 'thinking', content: thinking } };
41
- }
42
- if (deltaType === 'text_delta') {
43
- const text = delta.text ?? '';
44
- return { text, block: { index: index ?? 0, type: 'text', content: text } };
45
- }
46
- if (deltaType === 'input_json_delta') {
47
- const partialJson = delta.partial_json ?? '';
48
- return { text: '', block: { index: index ?? 0, type: 'tool_use', content: partialJson } };
49
- }
50
- }
51
- return empty;
52
- }
53
- const SSE_DATA_PREFIX = "data: ";
3
+ import { extractStreamText } from "./stream-extractor.js";
54
4
  const RUNTIME_PUSH_TICK_INTERVAL = 2;
55
5
  const RECENT_COMPLETED_MAX = 200;
56
6
  const RECENT_TTL_MS = 5 * 60 * 1000; // eslint-disable-line no-magic-numbers
@@ -60,6 +10,7 @@ export class RequestTracker {
60
10
  activeMap = new Map();
61
11
  recentCompleted = [];
62
12
  clients = new Set();
13
+ logger;
63
14
  providerConfigCache = new Map();
64
15
  pushTimer = null;
65
16
  tickCount = 0;
@@ -71,18 +22,23 @@ export class RequestTracker {
71
22
  this.semaphoreManager = deps?.semaphoreManager;
72
23
  this.runtimeCollector = deps?.runtimeCollector ?? new RuntimeCollector();
73
24
  this.statsAggregator = new StatsAggregator();
25
+ this.logger = deps?.logger;
74
26
  }
75
27
  // --- Core methods ---
76
28
  start(req) {
77
29
  this.activeMap.set(req.id, { ...req });
30
+ this.logger?.debug({ reqId: req.id, model: req.model, providerId: req.providerId, activeCount: this.activeMap.size }, "Tracker: start");
78
31
  this.broadcast("request_start", req);
79
32
  }
80
33
  update(id, patch) {
81
34
  const req = this.activeMap.get(id);
82
- if (!req)
35
+ if (!req) {
36
+ this.logger?.warn({ reqId: id, patchKeys: Object.keys(patch) }, "Tracker: update called but request not in activeMap");
83
37
  return;
38
+ }
84
39
  const prevQueued = req.queued;
85
40
  Object.assign(req, patch);
41
+ this.logger?.debug({ reqId: id, patchQueued: patch.queued, prevQueued, activeCount: this.activeMap.size }, "Tracker: update");
86
42
  // queued 状态变化时立即广播,让前端即时看到排队/取消排队
87
43
  if (patch.queued !== undefined && patch.queued !== prevQueued) {
88
44
  this.broadcast("request_update", this.getActive());
@@ -140,8 +96,10 @@ export class RequestTracker {
140
96
  }
141
97
  complete(id, result) {
142
98
  const req = this.activeMap.get(id);
143
- if (!req)
99
+ if (!req) {
100
+ this.logger?.warn({ reqId: id, result }, "Tracker: complete called but request not in activeMap");
144
101
  return;
102
+ }
145
103
  const now = Date.now();
146
104
  const latency = now - req.startTime;
147
105
  const statusCode = result.statusCode ?? 0;
@@ -158,6 +116,7 @@ export class RequestTracker {
158
116
  if (this.recentCompleted.length > RECENT_COMPLETED_MAX) {
159
117
  this.recentCompleted.length = RECENT_COMPLETED_MAX;
160
118
  }
119
+ this.logger?.debug({ reqId: id, status: result.status, statusCode, latency, activeCount: this.activeMap.size }, "Tracker: complete");
161
120
  this.broadcast("request_complete", completed);
162
121
  }
163
122
  // --- Query methods ---
@@ -237,15 +196,25 @@ export class RequestTracker {
237
196
  }
238
197
  broadcast(event, data) {
239
198
  const msg = `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
199
+ const clientCount = this.clients.size;
200
+ let sentCount = 0;
240
201
  for (const client of this.clients) {
241
202
  try {
242
- if (!client.writableEnded)
203
+ if (!client.writableEnded) {
243
204
  client.write(msg);
205
+ sentCount++;
206
+ }
244
207
  }
245
208
  catch {
246
209
  this.clients.delete(client);
247
210
  }
248
211
  }
212
+ const summary = event === "request_update" ? `active=${data?.length}`
213
+ : event === "concurrency_update" ? data?.map(p => `${p.providerName}=${p.active}/${p.maxConcurrency}q${p.queued}`).join(",")
214
+ : event === "request_start" ? `model=${data?.model}`
215
+ : event === "request_complete" ? `model=${data?.model} status=${data?.status}`
216
+ : "";
217
+ this.logger?.debug({ event, clientCount, sentCount, summary }, "Tracker: SSE broadcast");
249
218
  }
250
219
  // --- Provider config cache ---
251
220
  updateProviderConfig(providerId, config) {
@@ -0,0 +1,11 @@
1
+ import type { ContentBlock } from "./types.js";
2
+ export interface StreamExtraction {
3
+ text: string;
4
+ block?: {
5
+ index: number;
6
+ type: ContentBlock["type"];
7
+ content: string;
8
+ name?: string;
9
+ } | null;
10
+ }
11
+ export declare function extractStreamText(line: string, apiType: "openai" | "anthropic"): StreamExtraction;
@@ -0,0 +1,51 @@
1
+ const SSE_DATA_PREFIX = "data: ";
2
+ export function extractStreamText(line, apiType) {
3
+ const empty = { text: "", block: null };
4
+ if (!line.startsWith(SSE_DATA_PREFIX))
5
+ return empty;
6
+ const jsonStr = line.slice(SSE_DATA_PREFIX.length);
7
+ if (jsonStr === "[DONE]")
8
+ return empty;
9
+ let obj;
10
+ try {
11
+ obj = JSON.parse(jsonStr);
12
+ }
13
+ catch {
14
+ return empty;
15
+ }
16
+ if (apiType === "openai") {
17
+ const choices = obj.choices;
18
+ const delta = choices?.[0]?.delta;
19
+ const text = delta?.content ?? "";
20
+ return { text, block: text ? { index: 0, type: "text", content: text } : null };
21
+ }
22
+ // Anthropic
23
+ const type = obj.type;
24
+ const index = obj.index;
25
+ const delta = obj.delta;
26
+ if (type === "content_block_start") {
27
+ const contentBlock = obj.content_block;
28
+ const blockType = contentBlock?.type;
29
+ const name = blockType === "tool_use" ? contentBlock?.name : undefined;
30
+ if (blockType === "thinking" || blockType === "text" || blockType === "tool_use") {
31
+ return { text: "", block: { index: index ?? 0, type: blockType, content: "", name } };
32
+ }
33
+ return empty;
34
+ }
35
+ if (type === "content_block_delta" && delta) {
36
+ const deltaType = delta.type;
37
+ if (deltaType === "thinking_delta") {
38
+ const thinking = delta.thinking ?? "";
39
+ return { text: "", block: { index: index ?? 0, type: "thinking", content: thinking } };
40
+ }
41
+ if (deltaType === "text_delta") {
42
+ const text = delta.text ?? "";
43
+ return { text, block: { index: index ?? 0, type: "text", content: text } };
44
+ }
45
+ if (deltaType === "input_json_delta") {
46
+ const partialJson = delta.partial_json ?? "";
47
+ return { text: "", block: { index: index ?? 0, type: "tool_use", content: partialJson } };
48
+ }
49
+ }
50
+ return empty;
51
+ }
@@ -1,41 +1,28 @@
1
1
  import fp from "fastify-plugin";
2
- import { handleProxyPost, } from "./proxy-core.js";
2
+ import { createErrorFormatter } from "./proxy-core.js";
3
+ import { handleProxyRequest } from "./proxy-handler.js";
4
+ import { createOrchestrator } from "./orchestrator.js";
3
5
  const MESSAGES_PATH = "/v1/messages";
4
- const anthropicErrors = {
5
- modelNotFound: (model) => ({
6
- statusCode: 404,
7
- body: { type: "error", error: { type: "not_found_error", message: `Model '${model}' is not configured` } },
8
- }),
9
- modelNotAllowed: (model) => ({
10
- statusCode: 403,
11
- body: { type: "error", error: { type: "forbidden_error", message: `Model '${model}' is not allowed for this API key` } },
12
- }),
13
- providerUnavailable: () => ({
14
- statusCode: 503,
15
- body: { type: "error", error: { type: "api_error", message: "Provider unavailable" } },
16
- }),
17
- providerTypeMismatch: () => ({
18
- statusCode: 500,
19
- body: { type: "error", error: { type: "api_error", message: "Provider type mismatch for this endpoint" } },
20
- }),
21
- upstreamConnectionFailed: () => ({
22
- statusCode: 502,
23
- body: { type: "error", error: { type: "upstream_error", message: "Failed to connect to upstream service" } },
24
- }),
25
- concurrencyQueueFull: (providerId) => ({
26
- statusCode: 503,
27
- body: { type: "error", error: { type: "api_error", message: `Provider '${providerId}' concurrency queue is full` } },
28
- }),
29
- concurrencyTimeout: (providerId, timeoutMs) => ({
30
- statusCode: 504,
31
- body: { type: "error", error: { type: "api_error", message: `Provider '${providerId}' concurrency wait timeout (${timeoutMs}ms)` } },
32
- }),
6
+ const ANTHROPIC_ERROR_TYPE = {
7
+ modelNotFound: "not_found_error",
8
+ modelNotAllowed: "forbidden_error",
9
+ providerUnavailable: "api_error",
10
+ providerTypeMismatch: "api_error",
11
+ upstreamConnectionFailed: "upstream_error",
12
+ concurrencyQueueFull: "api_error",
13
+ concurrencyTimeout: "api_error",
33
14
  };
15
+ const anthropicErrors = createErrorFormatter((kind, message) => ({ type: "error", error: { type: ANTHROPIC_ERROR_TYPE[kind], message } }));
34
16
  const anthropicProxyRaw = (app, opts, done) => {
35
17
  const { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher, semaphoreManager, tracker } = opts;
18
+ const orchestrator = createOrchestrator(semaphoreManager, tracker);
36
19
  app.post(MESSAGES_PATH, async (request, reply) => {
37
- const deps = { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher, semaphoreManager, tracker };
38
- return handleProxyPost(request, reply, "anthropic", MESSAGES_PATH, anthropicErrors, deps);
20
+ if (!orchestrator) {
21
+ const e = anthropicErrors.providerUnavailable();
22
+ return reply.status(e.statusCode).send(e.body);
23
+ }
24
+ const deps = { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher, tracker, orchestrator };
25
+ return handleProxyRequest(request, reply, "anthropic", MESSAGES_PATH, anthropicErrors, deps);
39
26
  });
40
27
  done();
41
28
  };
@@ -1,7 +1,16 @@
1
1
  import Database from "better-sqlite3";
2
2
  import type { Provider } from "../db/index.js";
3
3
  import type { RawHeaders } from "./proxy-core.js";
4
- export interface RequestLogParams {
4
+ export interface FailoverContext {
5
+ isFailoverIteration: boolean;
6
+ rootLogId: string;
7
+ }
8
+ export interface LogRetryMeta {
9
+ isRetry?: boolean;
10
+ isFailover?: boolean;
11
+ originalRequestId?: string | null;
12
+ }
13
+ export interface RequestLogParams extends LogRetryMeta {
5
14
  id: string;
6
15
  apiType: string;
7
16
  model: string;
@@ -15,14 +24,12 @@ export interface RequestLogParams {
15
24
  respBody: string | null;
16
25
  upHdrs: Record<string, string>;
17
26
  cliHdrs: Record<string, string>;
18
- isRetry?: boolean;
19
- originalRequestId?: string | null;
20
27
  routerKeyId?: string | null;
21
28
  originalModel?: string | null;
22
29
  }
23
30
  /** 插入成功请求日志,供 openai/anthropic 插件共享 */
24
31
  export declare function insertSuccessLog(db: Database.Database, params: RequestLogParams): void;
25
- export interface RejectedLogParams {
32
+ export interface RejectedLogParams extends LogRetryMeta {
26
33
  db: Database.Database;
27
34
  logId: string;
28
35
  apiType: string;
@@ -1,7 +1,7 @@
1
1
  import { insertRequestLog } from "../db/index.js";
2
2
  /** 插入成功请求日志,供 openai/anthropic 插件共享 */
3
3
  export function insertSuccessLog(db, params) {
4
- const { id: logId, apiType, model, provider, isStream, startTime, reqBody, clientReq, upstreamReq, status, respBody, upHdrs, cliHdrs, isRetry = false, originalRequestId = null, routerKeyId = null, originalModel = null } = params;
4
+ const { id: logId, apiType, model, provider, isStream, startTime, reqBody, clientReq, upstreamReq, status, respBody, upHdrs, cliHdrs, isRetry = false, isFailover = false, originalRequestId = null, routerKeyId = null, originalModel = null } = params;
5
5
  insertRequestLog(db, {
6
6
  id: logId, api_type: apiType, model, provider_id: provider.id,
7
7
  status_code: status, latency_ms: Date.now() - startTime,
@@ -10,13 +10,13 @@ export function insertSuccessLog(db, params) {
10
10
  response_body: respBody, client_request: clientReq, upstream_request: upstreamReq,
11
11
  upstream_response: JSON.stringify({ statusCode: status, headers: upHdrs, body: respBody }),
12
12
  client_response: JSON.stringify({ statusCode: status, headers: cliHdrs, body: respBody }),
13
- is_retry: isRetry ? 1 : 0, original_request_id: originalRequestId,
13
+ is_retry: isRetry ? 1 : 0, is_failover: isFailover ? 1 : 0, original_request_id: originalRequestId,
14
14
  router_key_id: routerKeyId, original_model: originalModel,
15
15
  });
16
16
  }
17
17
  /** Log a request rejected before reaching upstream */
18
18
  export function insertRejectedLog(params) {
19
- const { db, logId, apiType, model, statusCode, errorMessage, startTime, isStream, routerKeyId, originalBody, clientHeaders, providerId = null, originalModel = null } = params;
19
+ const { db, logId, apiType, model, statusCode, errorMessage, startTime, isStream, routerKeyId, originalBody, clientHeaders, providerId = null, isFailover = false, originalRequestId = null, originalModel = null } = params;
20
20
  insertRequestLog(db, {
21
21
  id: logId,
22
22
  api_type: apiType,
@@ -29,6 +29,8 @@ export function insertRejectedLog(params) {
29
29
  created_at: new Date().toISOString(),
30
30
  request_body: JSON.stringify(originalBody),
31
31
  client_request: JSON.stringify({ headers: clientHeaders, body: originalBody }),
32
+ is_failover: isFailover ? 1 : 0,
33
+ original_request_id: originalRequestId,
32
34
  router_key_id: routerKeyId,
33
35
  original_model: originalModel,
34
36
  });
@@ -2,49 +2,33 @@ import fp from "fastify-plugin";
2
2
  import { getActiveProviders } from "../db/index.js";
3
3
  import { getSetting } from "../db/settings.js";
4
4
  import { decrypt } from "../utils/crypto.js";
5
- import { proxyGetRequest, handleProxyPost, } from "./proxy-core.js";
6
- const HTTP_NOT_FOUND = 404;
7
- const HTTP_BAD_GATEWAY = 502;
5
+ import { proxyGetRequest, createErrorFormatter } from "./proxy-core.js";
6
+ import { handleProxyRequest } from "./proxy-handler.js";
7
+ import { createOrchestrator } from "./orchestrator.js";
8
+ import { HTTP_NOT_FOUND, HTTP_BAD_GATEWAY } from "../constants.js";
8
9
  const CHAT_COMPLETIONS_PATH = "/v1/chat/completions";
9
10
  const MODELS_PATH = "/v1/models";
10
- const openaiErrors = {
11
- modelNotFound: (model) => ({
12
- statusCode: 404,
13
- body: { error: { message: `Model '${model}' is not configured`, type: "invalid_request_error", code: "model_not_found" } },
14
- }),
15
- modelNotAllowed: (model) => ({
16
- statusCode: 403,
17
- body: { error: { message: `Model '${model}' is not allowed for this API key`, type: "invalid_request_error", code: "model_not_allowed" } },
18
- }),
19
- providerUnavailable: () => ({
20
- statusCode: 503,
21
- body: { error: { message: "Provider unavailable", type: "server_error", code: "provider_unavailable" } },
22
- }),
23
- providerTypeMismatch: () => ({
24
- statusCode: 500,
25
- body: { error: { message: "Provider type mismatch for this endpoint", type: "server_error", code: "provider_type_mismatch" } },
26
- }),
27
- upstreamConnectionFailed: () => ({
28
- statusCode: 502,
29
- body: { error: { message: "Failed to connect to upstream service", type: "upstream_error", code: "upstream_connection_failed" } },
30
- }),
31
- concurrencyQueueFull: (providerId) => ({
32
- statusCode: 503,
33
- body: { error: { message: `Provider '${providerId}' concurrency queue is full`, type: "server_error", code: "concurrency_queue_full" } },
34
- }),
35
- concurrencyTimeout: (providerId, timeoutMs) => ({
36
- statusCode: 504,
37
- body: { error: { message: `Provider '${providerId}' concurrency wait timeout (${timeoutMs}ms)`, type: "server_error", code: "concurrency_timeout" } },
38
- }),
11
+ const OPENAI_ERROR_META = {
12
+ modelNotFound: { type: "invalid_request_error", code: "model_not_found" },
13
+ modelNotAllowed: { type: "invalid_request_error", code: "model_not_allowed" },
14
+ providerUnavailable: { type: "server_error", code: "provider_unavailable" },
15
+ providerTypeMismatch: { type: "server_error", code: "provider_type_mismatch" },
16
+ upstreamConnectionFailed: { type: "upstream_error", code: "upstream_connection_failed" },
17
+ concurrencyQueueFull: { type: "server_error", code: "concurrency_queue_full" },
18
+ concurrencyTimeout: { type: "server_error", code: "concurrency_timeout" },
39
19
  };
20
+ const openaiErrors = createErrorFormatter((kind, message) => ({ error: { message, ...OPENAI_ERROR_META[kind] } }));
40
21
  function sendError(reply, e) {
41
22
  return reply.status(e.statusCode).send(e.body);
42
23
  }
43
24
  const openaiProxyRaw = (app, opts, done) => {
44
25
  const { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher, semaphoreManager, tracker } = opts;
26
+ const orchestrator = createOrchestrator(semaphoreManager, tracker);
45
27
  app.post(CHAT_COMPLETIONS_PATH, async (request, reply) => {
46
- const deps = { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher, semaphoreManager, tracker };
47
- return handleProxyPost(request, reply, "openai", CHAT_COMPLETIONS_PATH, openaiErrors, deps, {
28
+ if (!orchestrator)
29
+ return sendError(reply, openaiErrors.providerUnavailable());
30
+ const deps = { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher, tracker, orchestrator };
31
+ return handleProxyRequest(request, reply, "openai", CHAT_COMPLETIONS_PATH, openaiErrors, deps, {
48
32
  beforeSendProxy: (body, isStream) => {
49
33
  if (isStream && !body.stream_options) {
50
34
  body.stream_options = { include_usage: true };
@@ -0,0 +1,52 @@
1
+ import type { FastifyReply, FastifyRequest } from "fastify";
2
+ import type { TransportResult } from "./types.js";
3
+ import type { Target } from "./strategy/types.js";
4
+ import type { ResilienceLayer, ResilienceResult } from "./resilience.js";
5
+ import type { RetryRuleMatcher } from "./retry-rules.js";
6
+ import type { SemaphoreScope } from "./scope.js";
7
+ import type { TrackerScope } from "./scope.js";
8
+ import type { ProviderSemaphoreManager } from "./semaphore.js";
9
+ import type { RequestTracker } from "../monitor/request-tracker.js";
10
+ export interface OrchestratorConfig {
11
+ resolved: Target;
12
+ provider: {
13
+ id: string;
14
+ name: string;
15
+ is_active: number;
16
+ api_type: string;
17
+ base_url: string;
18
+ api_key: string;
19
+ };
20
+ clientModel: string;
21
+ isStream: boolean;
22
+ /** 外部生成的 tracker ID,用于 tracker.appendStreamChunk / tracker.update 等回调匹配 */
23
+ trackerId?: string;
24
+ }
25
+ export interface HandleContext {
26
+ streamTimeoutMs?: number;
27
+ retryMaxAttempts?: number;
28
+ retryBaseDelayMs?: number;
29
+ failoverThreshold?: number;
30
+ isFailover?: boolean;
31
+ ruleMatcher?: RetryRuleMatcher;
32
+ transportFn: (target: Target) => Promise<TransportResult>;
33
+ }
34
+ /**
35
+ * 工厂函数,消除 openai/anthropic 创建 orchestrator 的重复代码。
36
+ * 两个 provider 的创建逻辑完全一致。
37
+ */
38
+ export declare function createOrchestrator(semaphoreManager?: ProviderSemaphoreManager, tracker?: RequestTracker): ProxyOrchestrator | undefined;
39
+ export declare class ProxyOrchestrator {
40
+ private deps;
41
+ constructor(deps: {
42
+ semaphoreScope: SemaphoreScope;
43
+ trackerScope: TrackerScope;
44
+ resilience: ResilienceLayer;
45
+ });
46
+ handle(request: FastifyRequest, reply: FastifyReply, apiType: "openai" | "anthropic", config: OrchestratorConfig, ctx?: HandleContext): Promise<ResilienceResult>;
47
+ private buildActiveRequest;
48
+ private createAbortSignal;
49
+ private executeResilience;
50
+ private sendResponse;
51
+ private extractTrackStatus;
52
+ }
@@ -0,0 +1,100 @@
1
+ import { ResilienceLayer as ResilienceLayerClass } from "./resilience.js";
2
+ import { SemaphoreScope as SemaphoreScopeClass } from "./scope.js";
3
+ import { TrackerScope as TrackerScopeClass } from "./scope.js";
4
+ const DEFAULT_MAX_RETRIES = 3;
5
+ const DEFAULT_BASE_DELAY_MS = 1000;
6
+ const DEFAULT_FAILOVER_THRESHOLD = 400;
7
+ /**
8
+ * 工厂函数,消除 openai/anthropic 创建 orchestrator 的重复代码。
9
+ * 两个 provider 的创建逻辑完全一致。
10
+ */
11
+ export function createOrchestrator(semaphoreManager, tracker) {
12
+ const semaphoreScope = semaphoreManager ? new SemaphoreScopeClass(semaphoreManager) : undefined;
13
+ const trackerScope = tracker ? new TrackerScopeClass(tracker) : undefined;
14
+ if (!semaphoreScope || !trackerScope)
15
+ return undefined;
16
+ return new ProxyOrchestrator({ semaphoreScope, trackerScope, resilience: new ResilienceLayerClass() });
17
+ }
18
+ export class ProxyOrchestrator {
19
+ deps;
20
+ constructor(deps) {
21
+ this.deps = deps;
22
+ }
23
+ async handle(request, reply, apiType, config, ctx) {
24
+ const trackerReq = this.buildActiveRequest(request, config, apiType);
25
+ const result = await this.deps.trackerScope.track(trackerReq, () => this.deps.semaphoreScope.withSlot(config.provider.id, this.createAbortSignal(request), () => {
26
+ trackerReq.queued = true;
27
+ this.deps.trackerScope.markQueued(trackerReq.id, true);
28
+ }, () => {
29
+ if (trackerReq.queued) {
30
+ trackerReq.queued = false;
31
+ this.deps.trackerScope.markQueued(trackerReq.id, false);
32
+ }
33
+ return this.executeResilience(config, ctx);
34
+ }), (result) => this.extractTrackStatus(result));
35
+ this.sendResponse(reply, result.result, ctx);
36
+ return result;
37
+ }
38
+ buildActiveRequest(request, config, apiType) {
39
+ return {
40
+ id: config.trackerId ?? crypto.randomUUID(),
41
+ apiType,
42
+ model: config.clientModel,
43
+ providerId: config.provider.id,
44
+ providerName: config.provider.name,
45
+ isStream: config.isStream,
46
+ queued: false,
47
+ startTime: Date.now(),
48
+ status: "pending",
49
+ retryCount: 0,
50
+ attempts: [],
51
+ clientIp: request.ip,
52
+ };
53
+ }
54
+ createAbortSignal(request) {
55
+ const controller = new AbortController();
56
+ request.raw.on("close", () => {
57
+ if (!request.raw.readableEnded) {
58
+ controller.abort();
59
+ }
60
+ });
61
+ return controller.signal;
62
+ }
63
+ async executeResilience(config, ctx) {
64
+ if (!ctx?.transportFn)
65
+ throw new Error("HandleContext.transportFn is required");
66
+ const resilienceConfig = {
67
+ maxRetries: ctx.retryMaxAttempts ?? DEFAULT_MAX_RETRIES,
68
+ baseDelayMs: ctx.retryBaseDelayMs ?? DEFAULT_BASE_DELAY_MS,
69
+ failoverThreshold: ctx.failoverThreshold ?? DEFAULT_FAILOVER_THRESHOLD,
70
+ isFailover: ctx.isFailover ?? false,
71
+ ruleMatcher: ctx.ruleMatcher,
72
+ };
73
+ return this.deps.resilience.execute(() => [config.resolved], ctx.transportFn, resilienceConfig);
74
+ }
75
+ sendResponse(reply, result, ctx) {
76
+ if (result.kind === "stream_success" || result.kind === "stream_abort" || result.kind === "throw") {
77
+ return;
78
+ }
79
+ // failover 场景下错误响应由外层 proxy-handler 控制,此处不发送
80
+ if (ctx?.isFailover && "statusCode" in result && result.statusCode >= (ctx.failoverThreshold ?? DEFAULT_FAILOVER_THRESHOLD)) {
81
+ return;
82
+ }
83
+ if (result.headers) {
84
+ for (const [key, value] of Object.entries(result.headers)) {
85
+ reply.header(key, value);
86
+ }
87
+ }
88
+ reply.status(result.statusCode).send(result.body);
89
+ }
90
+ extractTrackStatus(result) {
91
+ const transport = result.result;
92
+ if (transport.kind === "success" || transport.kind === "stream_success" || transport.kind === "stream_abort") {
93
+ return { status: "completed", statusCode: transport.statusCode };
94
+ }
95
+ if (transport.kind === "throw") {
96
+ return { status: "failed" };
97
+ }
98
+ return { status: "failed", statusCode: transport.statusCode };
99
+ }
100
+ }
@@ -1,11 +1,8 @@
1
- import type { FastifyReply, FastifyRequest } from "fastify";
2
- import Database from "better-sqlite3";
3
1
  import type { Provider } from "../db/index.js";
4
- import type { RetryRuleMatcher } from "./retry-rules.js";
5
- import { type ProxyResult, type StreamProxyResult, type GetProxyResult } from "./upstream-call.js";
6
- import { ProviderSemaphoreManager } from "./semaphore.js";
7
- import type { RequestTracker } from "../monitor/request-tracker.js";
8
- export type RawHeaders = Record<string, string | string[] | undefined>;
2
+ import type { GetTransportResult } from "./transport.js";
3
+ import type { RawHeaders } from "./types.js";
4
+ export { UPSTREAM_SUCCESS } from "./types.js";
5
+ export type { RawHeaders } from "./types.js";
9
6
  export interface ProxyErrorResponse {
10
7
  statusCode: number;
11
8
  body: unknown;
@@ -19,25 +16,16 @@ export interface ProxyErrorFormatter {
19
16
  concurrencyQueueFull(providerId: string): ProxyErrorResponse;
20
17
  concurrencyTimeout(providerId: string, timeoutMs: number): ProxyErrorResponse;
21
18
  }
22
- export interface ProxyHandlerDeps {
23
- db: Database.Database;
24
- streamTimeoutMs: number;
25
- retryMaxAttempts: number;
26
- retryBaseDelayMs: number;
27
- matcher?: RetryRuleMatcher;
28
- semaphoreManager?: ProviderSemaphoreManager;
29
- tracker?: RequestTracker;
30
- }
31
- export type { ProxyResult, StreamProxyResult, GetProxyResult };
19
+ export type { ProxyResult, StreamProxyResult } from "./transport.js";
20
+ export type { GetTransportResult as GetProxyResult } from "./transport.js";
21
+ export type ErrorKind = "modelNotFound" | "modelNotAllowed" | "providerUnavailable" | "providerTypeMismatch" | "upstreamConnectionFailed" | "concurrencyQueueFull" | "concurrencyTimeout";
22
+ /**
23
+ * 工厂函数,消除 openai/anthropic 错误格式化的重复代码。
24
+ * statusCode 和 message 两个 provider 完全一致,仅 body 格式不同,
25
+ * 由 formatBody 回调根据 kind 参数映射各自的 type/code 并组装 body。
26
+ */
27
+ export declare function createErrorFormatter(formatBody: (kind: ErrorKind, message: string) => Record<string, unknown>): ProxyErrorFormatter;
32
28
  export declare const SKIP_UPSTREAM: Set<string>;
33
29
  export declare function selectHeaders(raw: RawHeaders, skip: Set<string>): Record<string, string>;
34
30
  export declare function buildUpstreamHeaders(clientHeaders: RawHeaders, apiKey: string, payloadBytes?: number): Record<string, string>;
35
- export declare function proxyGetRequest(backend: Provider, apiKey: string, clientHeaders: RawHeaders, upstreamPath: string): Promise<GetProxyResult>;
36
- /**
37
- * 共享 POST handler,参数化 apiType/errorFormat/upstreamPath 等差异。
38
- * 当分组策略为 failover 时,在 while 循环中依次尝试不同 target,
39
- * 直到成功(或 headers 已发送)才返回。
40
- */
41
- export declare function handleProxyPost(request: FastifyRequest, reply: FastifyReply, apiType: "openai" | "anthropic", upstreamPath: string, errors: ProxyErrorFormatter, deps: ProxyHandlerDeps, options?: {
42
- beforeSendProxy?: (body: Record<string, unknown>, isStream: boolean) => void;
43
- }): Promise<FastifyReply>;
31
+ export declare function proxyGetRequest(backend: Provider, apiKey: string, clientHeaders: RawHeaders, upstreamPath: string): Promise<GetTransportResult>;