llm-simple-router 0.3.7 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +81 -49
- package/dist/admin/constants.d.ts +1 -8
- package/dist/admin/constants.js +2 -8
- package/dist/admin/logs.js +18 -3
- package/dist/admin/router-keys.js +1 -2
- package/dist/cli.js +0 -0
- package/dist/constants.d.ts +8 -0
- package/dist/constants.js +9 -0
- package/dist/db/index.d.ts +4 -4
- package/dist/db/index.js +2 -2
- package/dist/db/logs.d.ts +18 -33
- package/dist/db/logs.js +40 -17
- package/dist/db/metrics.d.ts +33 -0
- package/dist/db/metrics.js +7 -0
- package/dist/db/migrations/018_add_failover_field.sql +2 -0
- package/dist/db/retry-rules.d.ts +2 -2
- package/dist/db/retry-rules.js +26 -13
- package/dist/index.js +3 -5
- package/dist/monitor/request-tracker.d.ts +6 -0
- package/dist/monitor/request-tracker.js +23 -54
- package/dist/monitor/stream-extractor.d.ts +11 -0
- package/dist/monitor/stream-extractor.js +51 -0
- package/dist/proxy/anthropic.js +19 -32
- package/dist/proxy/log-helpers.d.ts +11 -4
- package/dist/proxy/log-helpers.js +5 -3
- package/dist/proxy/openai.js +18 -34
- package/dist/proxy/orchestrator.d.ts +52 -0
- package/dist/proxy/orchestrator.js +100 -0
- package/dist/proxy/proxy-core.d.ts +14 -26
- package/dist/proxy/proxy-core.js +40 -337
- package/dist/proxy/proxy-handler.d.ts +18 -0
- package/dist/proxy/proxy-handler.js +223 -0
- package/dist/proxy/proxy-logging.d.ts +28 -0
- package/dist/proxy/proxy-logging.js +122 -0
- package/dist/proxy/resilience.d.ts +63 -0
- package/dist/proxy/resilience.js +188 -0
- package/dist/proxy/scope.d.ts +18 -0
- package/dist/proxy/scope.js +37 -0
- package/dist/proxy/semaphore.d.ts +9 -2
- package/dist/proxy/semaphore.js +34 -7
- package/dist/proxy/stream-proxy.d.ts +7 -0
- package/dist/proxy/stream-proxy.js +263 -0
- package/dist/proxy/{upstream-call.d.ts → transport.d.ts} +25 -18
- package/dist/proxy/transport.js +128 -0
- package/dist/proxy/types.d.ts +58 -0
- package/dist/proxy/types.js +30 -0
- package/frontend-dist/assets/{CardContent-CucI6u41.js → CardContent-CTnwqTdL.js} +1 -1
- package/frontend-dist/assets/{CardHeader-d-DYsWxe.js → CardHeader-CfUeY7tk.js} +1 -1
- package/frontend-dist/assets/{CardTitle-CIDEQkWB.js → CardTitle-CWiDwWqd.js} +1 -1
- package/frontend-dist/assets/{Checkbox-CybCw3zS.js → Checkbox-BxNz70R_.js} +1 -1
- package/frontend-dist/assets/{CollapsibleTrigger-BFNhb19_.js → CollapsibleTrigger-Uz1aGdtH.js} +1 -1
- package/frontend-dist/assets/{Collection-DUBb4r6h.js → Collection-1EHC87X5.js} +1 -1
- package/frontend-dist/assets/{Dashboard-DLB6iqH1.js → Dashboard-C3FL30UN.js} +2 -2
- package/frontend-dist/assets/{DialogTitle-Dq-5o7nJ.js → DialogTitle-CAOFxr83.js} +1 -1
- package/frontend-dist/assets/{Input-HN3Il0-c.js → Input-DRIid2C6.js} +1 -1
- package/frontend-dist/assets/{Label-CXAeFn-r.js → Label-UyNN2jyE.js} +1 -1
- package/frontend-dist/assets/LogDetailDialog-8BT4vIlV.js +3 -0
- package/frontend-dist/assets/{Login-Br3qsdxf.js → Login-CnzH6TdS.js} +1 -1
- package/frontend-dist/assets/Logs-CbK8NB_X.js +1 -0
- package/frontend-dist/assets/{ModelMappings-DXC0sNH5.js → ModelMappings-DeRFgsYG.js} +1 -1
- package/frontend-dist/assets/Monitor-Dd80bdUn.js +1 -0
- package/frontend-dist/assets/{PopperContent-CnZejY31.js → PopperContent-B3fZao7v.js} +1 -1
- package/frontend-dist/assets/{Providers-8CHhW4uH.js → Providers-B_DbV-_y.js} +1 -1
- package/frontend-dist/assets/ProxyEnhancement-up1fnPzq.js +5 -0
- package/frontend-dist/assets/RetryRules-Dkuhjh0u.js +1 -0
- package/frontend-dist/assets/RouterKeys-CvMMAa4t.js +1 -0
- package/frontend-dist/assets/{RovingFocusItem-B7ZIkplZ.js → RovingFocusItem-X0bfqWWS.js} +1 -1
- package/frontend-dist/assets/{SelectValue-B32pgmTJ.js → SelectValue-zO8t-tx1.js} +1 -1
- package/frontend-dist/assets/{Setup-Df9IQo2x.js → Setup-ByT2ThOQ.js} +1 -1
- package/frontend-dist/assets/{Switch-CLeo7H6d.js → Switch-BEMjVugO.js} +1 -1
- package/frontend-dist/assets/{TableHeader-BpscAtT3.js → TableHeader-DpHWSnxK.js} +1 -1
- package/frontend-dist/assets/{TabsTrigger-DErAbTuM.js → TabsTrigger-Db6RqsZc.js} +1 -1
- package/frontend-dist/assets/{VisuallyHidden-CJBR3YB3.js → VisuallyHidden-hs8pj8OP.js} +1 -1
- package/frontend-dist/assets/{VisuallyHiddenInput-Cy0VuE1l.js → VisuallyHiddenInput-1m0nNADN.js} +1 -1
- package/frontend-dist/assets/{alert-dialog-BAR1JRmT.js → alert-dialog-PP91kaO8.js} +1 -1
- package/frontend-dist/assets/{button-D54q76GQ.js → button-Dcc0gF5i.js} +1 -1
- package/frontend-dist/assets/{client-Mb8fy_bC.js → client-DIIo9zPK.js} +2 -2
- package/frontend-dist/assets/{createLucideIcon-CCmQ9QKM.js → createLucideIcon-DGZkBjcJ.js} +1 -1
- package/frontend-dist/assets/{dialog-DSH5k5Kj.js → dialog-CxSyR-fN.js} +1 -1
- package/frontend-dist/assets/format-CPdJtjZ5.js +1 -0
- package/frontend-dist/assets/index-BL-LAtac.css +1 -0
- package/frontend-dist/assets/{index-BQBtSfem.js → index-CvT41fGL.js} +1 -1
- package/frontend-dist/assets/{lib-BgOqOzXI.js → lib-Bl0OuBjh.js} +1 -1
- package/frontend-dist/assets/{ohash.D__AXeF1-p4vp6Svt.js → ohash.D__AXeF1-B64hB831.js} +1 -1
- package/frontend-dist/assets/{useClipboard-DO-38TXr.js → useClipboard-CWc1cTDo.js} +1 -1
- package/frontend-dist/assets/{useForwardExpose-CzQFheaD.js → useForwardExpose-AkE0lq8y.js} +1 -1
- package/frontend-dist/assets/useNonce-DGyPxdjq.js +1 -0
- package/frontend-dist/assets/x-BuUpx9Fr.js +1 -0
- package/frontend-dist/index.html +7 -7
- package/package.json +1 -1
- package/dist/admin/services.d.ts +0 -7
- package/dist/admin/services.js +0 -63
- package/dist/proxy/retry.d.ts +0 -43
- package/dist/proxy/retry.js +0 -121
- package/dist/proxy/upstream-call.js +0 -208
- package/frontend-dist/assets/LogResponseViewer-CyBzv02a.js +0 -3
- package/frontend-dist/assets/Logs-Cu_IftdS.js +0 -1
- package/frontend-dist/assets/Monitor-CKlid1sC.js +0 -1
- package/frontend-dist/assets/ProxyEnhancement-CkYeXwgH.js +0 -5
- package/frontend-dist/assets/RetryRules-Csb7u9W4.js +0 -1
- package/frontend-dist/assets/RouterKeys-C6YIufmj.js +0 -1
- package/frontend-dist/assets/index-H-lnTkMr.css +0 -1
- package/frontend-dist/assets/useNonce-CU-NirfM.js +0 -1
- package/frontend-dist/assets/x-DEJ1xpi5.js +0 -1
|
@@ -3,10 +3,15 @@ import { StatsAggregator } from "./stats-aggregator.js";
|
|
|
3
3
|
import { RuntimeCollector } from "./runtime-collector.js";
|
|
4
4
|
import type { ProviderSemaphoreManager } from "../proxy/semaphore.js";
|
|
5
5
|
import type { ActiveRequest, ProviderConcurrencySnapshot, RuntimeMetrics, StatsSnapshot } from "./types.js";
|
|
6
|
+
export interface TrackerLogger {
|
|
7
|
+
debug(obj: Record<string, unknown>, msg: string): void;
|
|
8
|
+
warn(obj: Record<string, unknown>, msg: string): void;
|
|
9
|
+
}
|
|
6
10
|
export declare class RequestTracker {
|
|
7
11
|
private activeMap;
|
|
8
12
|
private recentCompleted;
|
|
9
13
|
private clients;
|
|
14
|
+
private logger?;
|
|
10
15
|
private providerConfigCache;
|
|
11
16
|
private pushTimer;
|
|
12
17
|
private tickCount;
|
|
@@ -17,6 +22,7 @@ export declare class RequestTracker {
|
|
|
17
22
|
constructor(deps?: {
|
|
18
23
|
semaphoreManager?: ProviderSemaphoreManager;
|
|
19
24
|
runtimeCollector?: RuntimeCollector;
|
|
25
|
+
logger?: TrackerLogger;
|
|
20
26
|
});
|
|
21
27
|
start(req: ActiveRequest): void;
|
|
22
28
|
update(id: string, patch: Partial<ActiveRequest>): void;
|
|
@@ -1,56 +1,6 @@
|
|
|
1
1
|
import { StatsAggregator } from "./stats-aggregator.js";
|
|
2
2
|
import { RuntimeCollector } from "./runtime-collector.js";
|
|
3
|
-
|
|
4
|
-
const empty = { text: '', block: null };
|
|
5
|
-
if (!line.startsWith(SSE_DATA_PREFIX))
|
|
6
|
-
return empty;
|
|
7
|
-
const jsonStr = line.slice(SSE_DATA_PREFIX.length);
|
|
8
|
-
if (jsonStr === '[DONE]')
|
|
9
|
-
return empty;
|
|
10
|
-
let obj;
|
|
11
|
-
try {
|
|
12
|
-
obj = JSON.parse(jsonStr);
|
|
13
|
-
}
|
|
14
|
-
catch {
|
|
15
|
-
return empty;
|
|
16
|
-
}
|
|
17
|
-
if (apiType === 'openai') {
|
|
18
|
-
const choices = obj.choices;
|
|
19
|
-
const delta = choices?.[0]?.delta;
|
|
20
|
-
const text = delta?.content ?? '';
|
|
21
|
-
return { text, block: text ? { index: 0, type: 'text', content: text } : null };
|
|
22
|
-
}
|
|
23
|
-
// Anthropic
|
|
24
|
-
const type = obj.type;
|
|
25
|
-
const index = obj.index;
|
|
26
|
-
const delta = obj.delta;
|
|
27
|
-
if (type === 'content_block_start') {
|
|
28
|
-
const contentBlock = obj.content_block;
|
|
29
|
-
const blockType = contentBlock?.type;
|
|
30
|
-
const name = blockType === 'tool_use' ? contentBlock?.name : undefined;
|
|
31
|
-
if (blockType === 'thinking' || blockType === 'text' || blockType === 'tool_use') {
|
|
32
|
-
return { text: '', block: { index: index ?? 0, type: blockType, content: '', name } };
|
|
33
|
-
}
|
|
34
|
-
return empty;
|
|
35
|
-
}
|
|
36
|
-
if (type === 'content_block_delta' && delta) {
|
|
37
|
-
const deltaType = delta.type;
|
|
38
|
-
if (deltaType === 'thinking_delta') {
|
|
39
|
-
const thinking = delta.thinking ?? '';
|
|
40
|
-
return { text: '', block: { index: index ?? 0, type: 'thinking', content: thinking } };
|
|
41
|
-
}
|
|
42
|
-
if (deltaType === 'text_delta') {
|
|
43
|
-
const text = delta.text ?? '';
|
|
44
|
-
return { text, block: { index: index ?? 0, type: 'text', content: text } };
|
|
45
|
-
}
|
|
46
|
-
if (deltaType === 'input_json_delta') {
|
|
47
|
-
const partialJson = delta.partial_json ?? '';
|
|
48
|
-
return { text: '', block: { index: index ?? 0, type: 'tool_use', content: partialJson } };
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
return empty;
|
|
52
|
-
}
|
|
53
|
-
const SSE_DATA_PREFIX = "data: ";
|
|
3
|
+
import { extractStreamText } from "./stream-extractor.js";
|
|
54
4
|
const RUNTIME_PUSH_TICK_INTERVAL = 2;
|
|
55
5
|
const RECENT_COMPLETED_MAX = 200;
|
|
56
6
|
const RECENT_TTL_MS = 5 * 60 * 1000; // eslint-disable-line no-magic-numbers
|
|
@@ -60,6 +10,7 @@ export class RequestTracker {
|
|
|
60
10
|
activeMap = new Map();
|
|
61
11
|
recentCompleted = [];
|
|
62
12
|
clients = new Set();
|
|
13
|
+
logger;
|
|
63
14
|
providerConfigCache = new Map();
|
|
64
15
|
pushTimer = null;
|
|
65
16
|
tickCount = 0;
|
|
@@ -71,18 +22,23 @@ export class RequestTracker {
|
|
|
71
22
|
this.semaphoreManager = deps?.semaphoreManager;
|
|
72
23
|
this.runtimeCollector = deps?.runtimeCollector ?? new RuntimeCollector();
|
|
73
24
|
this.statsAggregator = new StatsAggregator();
|
|
25
|
+
this.logger = deps?.logger;
|
|
74
26
|
}
|
|
75
27
|
// --- Core methods ---
|
|
76
28
|
start(req) {
|
|
77
29
|
this.activeMap.set(req.id, { ...req });
|
|
30
|
+
this.logger?.debug({ reqId: req.id, model: req.model, providerId: req.providerId, activeCount: this.activeMap.size }, "Tracker: start");
|
|
78
31
|
this.broadcast("request_start", req);
|
|
79
32
|
}
|
|
80
33
|
update(id, patch) {
|
|
81
34
|
const req = this.activeMap.get(id);
|
|
82
|
-
if (!req)
|
|
35
|
+
if (!req) {
|
|
36
|
+
this.logger?.warn({ reqId: id, patchKeys: Object.keys(patch) }, "Tracker: update called but request not in activeMap");
|
|
83
37
|
return;
|
|
38
|
+
}
|
|
84
39
|
const prevQueued = req.queued;
|
|
85
40
|
Object.assign(req, patch);
|
|
41
|
+
this.logger?.debug({ reqId: id, patchQueued: patch.queued, prevQueued, activeCount: this.activeMap.size }, "Tracker: update");
|
|
86
42
|
// queued 状态变化时立即广播,让前端即时看到排队/取消排队
|
|
87
43
|
if (patch.queued !== undefined && patch.queued !== prevQueued) {
|
|
88
44
|
this.broadcast("request_update", this.getActive());
|
|
@@ -140,8 +96,10 @@ export class RequestTracker {
|
|
|
140
96
|
}
|
|
141
97
|
complete(id, result) {
|
|
142
98
|
const req = this.activeMap.get(id);
|
|
143
|
-
if (!req)
|
|
99
|
+
if (!req) {
|
|
100
|
+
this.logger?.warn({ reqId: id, result }, "Tracker: complete called but request not in activeMap");
|
|
144
101
|
return;
|
|
102
|
+
}
|
|
145
103
|
const now = Date.now();
|
|
146
104
|
const latency = now - req.startTime;
|
|
147
105
|
const statusCode = result.statusCode ?? 0;
|
|
@@ -158,6 +116,7 @@ export class RequestTracker {
|
|
|
158
116
|
if (this.recentCompleted.length > RECENT_COMPLETED_MAX) {
|
|
159
117
|
this.recentCompleted.length = RECENT_COMPLETED_MAX;
|
|
160
118
|
}
|
|
119
|
+
this.logger?.debug({ reqId: id, status: result.status, statusCode, latency, activeCount: this.activeMap.size }, "Tracker: complete");
|
|
161
120
|
this.broadcast("request_complete", completed);
|
|
162
121
|
}
|
|
163
122
|
// --- Query methods ---
|
|
@@ -237,15 +196,25 @@ export class RequestTracker {
|
|
|
237
196
|
}
|
|
238
197
|
broadcast(event, data) {
|
|
239
198
|
const msg = `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
|
|
199
|
+
const clientCount = this.clients.size;
|
|
200
|
+
let sentCount = 0;
|
|
240
201
|
for (const client of this.clients) {
|
|
241
202
|
try {
|
|
242
|
-
if (!client.writableEnded)
|
|
203
|
+
if (!client.writableEnded) {
|
|
243
204
|
client.write(msg);
|
|
205
|
+
sentCount++;
|
|
206
|
+
}
|
|
244
207
|
}
|
|
245
208
|
catch {
|
|
246
209
|
this.clients.delete(client);
|
|
247
210
|
}
|
|
248
211
|
}
|
|
212
|
+
const summary = event === "request_update" ? `active=${data?.length}`
|
|
213
|
+
: event === "concurrency_update" ? data?.map(p => `${p.providerName}=${p.active}/${p.maxConcurrency}q${p.queued}`).join(",")
|
|
214
|
+
: event === "request_start" ? `model=${data?.model}`
|
|
215
|
+
: event === "request_complete" ? `model=${data?.model} status=${data?.status}`
|
|
216
|
+
: "";
|
|
217
|
+
this.logger?.debug({ event, clientCount, sentCount, summary }, "Tracker: SSE broadcast");
|
|
249
218
|
}
|
|
250
219
|
// --- Provider config cache ---
|
|
251
220
|
updateProviderConfig(providerId, config) {
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { ContentBlock } from "./types.js";
|
|
2
|
+
export interface StreamExtraction {
|
|
3
|
+
text: string;
|
|
4
|
+
block?: {
|
|
5
|
+
index: number;
|
|
6
|
+
type: ContentBlock["type"];
|
|
7
|
+
content: string;
|
|
8
|
+
name?: string;
|
|
9
|
+
} | null;
|
|
10
|
+
}
|
|
11
|
+
export declare function extractStreamText(line: string, apiType: "openai" | "anthropic"): StreamExtraction;
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
const SSE_DATA_PREFIX = "data: ";
|
|
2
|
+
export function extractStreamText(line, apiType) {
|
|
3
|
+
const empty = { text: "", block: null };
|
|
4
|
+
if (!line.startsWith(SSE_DATA_PREFIX))
|
|
5
|
+
return empty;
|
|
6
|
+
const jsonStr = line.slice(SSE_DATA_PREFIX.length);
|
|
7
|
+
if (jsonStr === "[DONE]")
|
|
8
|
+
return empty;
|
|
9
|
+
let obj;
|
|
10
|
+
try {
|
|
11
|
+
obj = JSON.parse(jsonStr);
|
|
12
|
+
}
|
|
13
|
+
catch {
|
|
14
|
+
return empty;
|
|
15
|
+
}
|
|
16
|
+
if (apiType === "openai") {
|
|
17
|
+
const choices = obj.choices;
|
|
18
|
+
const delta = choices?.[0]?.delta;
|
|
19
|
+
const text = delta?.content ?? "";
|
|
20
|
+
return { text, block: text ? { index: 0, type: "text", content: text } : null };
|
|
21
|
+
}
|
|
22
|
+
// Anthropic
|
|
23
|
+
const type = obj.type;
|
|
24
|
+
const index = obj.index;
|
|
25
|
+
const delta = obj.delta;
|
|
26
|
+
if (type === "content_block_start") {
|
|
27
|
+
const contentBlock = obj.content_block;
|
|
28
|
+
const blockType = contentBlock?.type;
|
|
29
|
+
const name = blockType === "tool_use" ? contentBlock?.name : undefined;
|
|
30
|
+
if (blockType === "thinking" || blockType === "text" || blockType === "tool_use") {
|
|
31
|
+
return { text: "", block: { index: index ?? 0, type: blockType, content: "", name } };
|
|
32
|
+
}
|
|
33
|
+
return empty;
|
|
34
|
+
}
|
|
35
|
+
if (type === "content_block_delta" && delta) {
|
|
36
|
+
const deltaType = delta.type;
|
|
37
|
+
if (deltaType === "thinking_delta") {
|
|
38
|
+
const thinking = delta.thinking ?? "";
|
|
39
|
+
return { text: "", block: { index: index ?? 0, type: "thinking", content: thinking } };
|
|
40
|
+
}
|
|
41
|
+
if (deltaType === "text_delta") {
|
|
42
|
+
const text = delta.text ?? "";
|
|
43
|
+
return { text, block: { index: index ?? 0, type: "text", content: text } };
|
|
44
|
+
}
|
|
45
|
+
if (deltaType === "input_json_delta") {
|
|
46
|
+
const partialJson = delta.partial_json ?? "";
|
|
47
|
+
return { text: "", block: { index: index ?? 0, type: "tool_use", content: partialJson } };
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return empty;
|
|
51
|
+
}
|
package/dist/proxy/anthropic.js
CHANGED
|
@@ -1,41 +1,28 @@
|
|
|
1
1
|
import fp from "fastify-plugin";
|
|
2
|
-
import {
|
|
2
|
+
import { createErrorFormatter } from "./proxy-core.js";
|
|
3
|
+
import { handleProxyRequest } from "./proxy-handler.js";
|
|
4
|
+
import { createOrchestrator } from "./orchestrator.js";
|
|
3
5
|
const MESSAGES_PATH = "/v1/messages";
|
|
4
|
-
const
|
|
5
|
-
modelNotFound:
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
}),
|
|
13
|
-
providerUnavailable: () => ({
|
|
14
|
-
statusCode: 503,
|
|
15
|
-
body: { type: "error", error: { type: "api_error", message: "Provider unavailable" } },
|
|
16
|
-
}),
|
|
17
|
-
providerTypeMismatch: () => ({
|
|
18
|
-
statusCode: 500,
|
|
19
|
-
body: { type: "error", error: { type: "api_error", message: "Provider type mismatch for this endpoint" } },
|
|
20
|
-
}),
|
|
21
|
-
upstreamConnectionFailed: () => ({
|
|
22
|
-
statusCode: 502,
|
|
23
|
-
body: { type: "error", error: { type: "upstream_error", message: "Failed to connect to upstream service" } },
|
|
24
|
-
}),
|
|
25
|
-
concurrencyQueueFull: (providerId) => ({
|
|
26
|
-
statusCode: 503,
|
|
27
|
-
body: { type: "error", error: { type: "api_error", message: `Provider '${providerId}' concurrency queue is full` } },
|
|
28
|
-
}),
|
|
29
|
-
concurrencyTimeout: (providerId, timeoutMs) => ({
|
|
30
|
-
statusCode: 504,
|
|
31
|
-
body: { type: "error", error: { type: "api_error", message: `Provider '${providerId}' concurrency wait timeout (${timeoutMs}ms)` } },
|
|
32
|
-
}),
|
|
6
|
+
const ANTHROPIC_ERROR_TYPE = {
|
|
7
|
+
modelNotFound: "not_found_error",
|
|
8
|
+
modelNotAllowed: "forbidden_error",
|
|
9
|
+
providerUnavailable: "api_error",
|
|
10
|
+
providerTypeMismatch: "api_error",
|
|
11
|
+
upstreamConnectionFailed: "upstream_error",
|
|
12
|
+
concurrencyQueueFull: "api_error",
|
|
13
|
+
concurrencyTimeout: "api_error",
|
|
33
14
|
};
|
|
15
|
+
const anthropicErrors = createErrorFormatter((kind, message) => ({ type: "error", error: { type: ANTHROPIC_ERROR_TYPE[kind], message } }));
|
|
34
16
|
const anthropicProxyRaw = (app, opts, done) => {
|
|
35
17
|
const { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher, semaphoreManager, tracker } = opts;
|
|
18
|
+
const orchestrator = createOrchestrator(semaphoreManager, tracker);
|
|
36
19
|
app.post(MESSAGES_PATH, async (request, reply) => {
|
|
37
|
-
|
|
38
|
-
|
|
20
|
+
if (!orchestrator) {
|
|
21
|
+
const e = anthropicErrors.providerUnavailable();
|
|
22
|
+
return reply.status(e.statusCode).send(e.body);
|
|
23
|
+
}
|
|
24
|
+
const deps = { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher, tracker, orchestrator };
|
|
25
|
+
return handleProxyRequest(request, reply, "anthropic", MESSAGES_PATH, anthropicErrors, deps);
|
|
39
26
|
});
|
|
40
27
|
done();
|
|
41
28
|
};
|
|
@@ -1,7 +1,16 @@
|
|
|
1
1
|
import Database from "better-sqlite3";
|
|
2
2
|
import type { Provider } from "../db/index.js";
|
|
3
3
|
import type { RawHeaders } from "./proxy-core.js";
|
|
4
|
-
export interface
|
|
4
|
+
export interface FailoverContext {
|
|
5
|
+
isFailoverIteration: boolean;
|
|
6
|
+
rootLogId: string;
|
|
7
|
+
}
|
|
8
|
+
export interface LogRetryMeta {
|
|
9
|
+
isRetry?: boolean;
|
|
10
|
+
isFailover?: boolean;
|
|
11
|
+
originalRequestId?: string | null;
|
|
12
|
+
}
|
|
13
|
+
export interface RequestLogParams extends LogRetryMeta {
|
|
5
14
|
id: string;
|
|
6
15
|
apiType: string;
|
|
7
16
|
model: string;
|
|
@@ -15,14 +24,12 @@ export interface RequestLogParams {
|
|
|
15
24
|
respBody: string | null;
|
|
16
25
|
upHdrs: Record<string, string>;
|
|
17
26
|
cliHdrs: Record<string, string>;
|
|
18
|
-
isRetry?: boolean;
|
|
19
|
-
originalRequestId?: string | null;
|
|
20
27
|
routerKeyId?: string | null;
|
|
21
28
|
originalModel?: string | null;
|
|
22
29
|
}
|
|
23
30
|
/** 插入成功请求日志,供 openai/anthropic 插件共享 */
|
|
24
31
|
export declare function insertSuccessLog(db: Database.Database, params: RequestLogParams): void;
|
|
25
|
-
export interface RejectedLogParams {
|
|
32
|
+
export interface RejectedLogParams extends LogRetryMeta {
|
|
26
33
|
db: Database.Database;
|
|
27
34
|
logId: string;
|
|
28
35
|
apiType: string;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { insertRequestLog } from "../db/index.js";
|
|
2
2
|
/** 插入成功请求日志,供 openai/anthropic 插件共享 */
|
|
3
3
|
export function insertSuccessLog(db, params) {
|
|
4
|
-
const { id: logId, apiType, model, provider, isStream, startTime, reqBody, clientReq, upstreamReq, status, respBody, upHdrs, cliHdrs, isRetry = false, originalRequestId = null, routerKeyId = null, originalModel = null } = params;
|
|
4
|
+
const { id: logId, apiType, model, provider, isStream, startTime, reqBody, clientReq, upstreamReq, status, respBody, upHdrs, cliHdrs, isRetry = false, isFailover = false, originalRequestId = null, routerKeyId = null, originalModel = null } = params;
|
|
5
5
|
insertRequestLog(db, {
|
|
6
6
|
id: logId, api_type: apiType, model, provider_id: provider.id,
|
|
7
7
|
status_code: status, latency_ms: Date.now() - startTime,
|
|
@@ -10,13 +10,13 @@ export function insertSuccessLog(db, params) {
|
|
|
10
10
|
response_body: respBody, client_request: clientReq, upstream_request: upstreamReq,
|
|
11
11
|
upstream_response: JSON.stringify({ statusCode: status, headers: upHdrs, body: respBody }),
|
|
12
12
|
client_response: JSON.stringify({ statusCode: status, headers: cliHdrs, body: respBody }),
|
|
13
|
-
is_retry: isRetry ? 1 : 0, original_request_id: originalRequestId,
|
|
13
|
+
is_retry: isRetry ? 1 : 0, is_failover: isFailover ? 1 : 0, original_request_id: originalRequestId,
|
|
14
14
|
router_key_id: routerKeyId, original_model: originalModel,
|
|
15
15
|
});
|
|
16
16
|
}
|
|
17
17
|
/** Log a request rejected before reaching upstream */
|
|
18
18
|
export function insertRejectedLog(params) {
|
|
19
|
-
const { db, logId, apiType, model, statusCode, errorMessage, startTime, isStream, routerKeyId, originalBody, clientHeaders, providerId = null, originalModel = null } = params;
|
|
19
|
+
const { db, logId, apiType, model, statusCode, errorMessage, startTime, isStream, routerKeyId, originalBody, clientHeaders, providerId = null, isFailover = false, originalRequestId = null, originalModel = null } = params;
|
|
20
20
|
insertRequestLog(db, {
|
|
21
21
|
id: logId,
|
|
22
22
|
api_type: apiType,
|
|
@@ -29,6 +29,8 @@ export function insertRejectedLog(params) {
|
|
|
29
29
|
created_at: new Date().toISOString(),
|
|
30
30
|
request_body: JSON.stringify(originalBody),
|
|
31
31
|
client_request: JSON.stringify({ headers: clientHeaders, body: originalBody }),
|
|
32
|
+
is_failover: isFailover ? 1 : 0,
|
|
33
|
+
original_request_id: originalRequestId,
|
|
32
34
|
router_key_id: routerKeyId,
|
|
33
35
|
original_model: originalModel,
|
|
34
36
|
});
|
package/dist/proxy/openai.js
CHANGED
|
@@ -2,49 +2,33 @@ import fp from "fastify-plugin";
|
|
|
2
2
|
import { getActiveProviders } from "../db/index.js";
|
|
3
3
|
import { getSetting } from "../db/settings.js";
|
|
4
4
|
import { decrypt } from "../utils/crypto.js";
|
|
5
|
-
import { proxyGetRequest,
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
import { proxyGetRequest, createErrorFormatter } from "./proxy-core.js";
|
|
6
|
+
import { handleProxyRequest } from "./proxy-handler.js";
|
|
7
|
+
import { createOrchestrator } from "./orchestrator.js";
|
|
8
|
+
import { HTTP_NOT_FOUND, HTTP_BAD_GATEWAY } from "../constants.js";
|
|
8
9
|
const CHAT_COMPLETIONS_PATH = "/v1/chat/completions";
|
|
9
10
|
const MODELS_PATH = "/v1/models";
|
|
10
|
-
const
|
|
11
|
-
modelNotFound:
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
}),
|
|
19
|
-
providerUnavailable: () => ({
|
|
20
|
-
statusCode: 503,
|
|
21
|
-
body: { error: { message: "Provider unavailable", type: "server_error", code: "provider_unavailable" } },
|
|
22
|
-
}),
|
|
23
|
-
providerTypeMismatch: () => ({
|
|
24
|
-
statusCode: 500,
|
|
25
|
-
body: { error: { message: "Provider type mismatch for this endpoint", type: "server_error", code: "provider_type_mismatch" } },
|
|
26
|
-
}),
|
|
27
|
-
upstreamConnectionFailed: () => ({
|
|
28
|
-
statusCode: 502,
|
|
29
|
-
body: { error: { message: "Failed to connect to upstream service", type: "upstream_error", code: "upstream_connection_failed" } },
|
|
30
|
-
}),
|
|
31
|
-
concurrencyQueueFull: (providerId) => ({
|
|
32
|
-
statusCode: 503,
|
|
33
|
-
body: { error: { message: `Provider '${providerId}' concurrency queue is full`, type: "server_error", code: "concurrency_queue_full" } },
|
|
34
|
-
}),
|
|
35
|
-
concurrencyTimeout: (providerId, timeoutMs) => ({
|
|
36
|
-
statusCode: 504,
|
|
37
|
-
body: { error: { message: `Provider '${providerId}' concurrency wait timeout (${timeoutMs}ms)`, type: "server_error", code: "concurrency_timeout" } },
|
|
38
|
-
}),
|
|
11
|
+
const OPENAI_ERROR_META = {
|
|
12
|
+
modelNotFound: { type: "invalid_request_error", code: "model_not_found" },
|
|
13
|
+
modelNotAllowed: { type: "invalid_request_error", code: "model_not_allowed" },
|
|
14
|
+
providerUnavailable: { type: "server_error", code: "provider_unavailable" },
|
|
15
|
+
providerTypeMismatch: { type: "server_error", code: "provider_type_mismatch" },
|
|
16
|
+
upstreamConnectionFailed: { type: "upstream_error", code: "upstream_connection_failed" },
|
|
17
|
+
concurrencyQueueFull: { type: "server_error", code: "concurrency_queue_full" },
|
|
18
|
+
concurrencyTimeout: { type: "server_error", code: "concurrency_timeout" },
|
|
39
19
|
};
|
|
20
|
+
const openaiErrors = createErrorFormatter((kind, message) => ({ error: { message, ...OPENAI_ERROR_META[kind] } }));
|
|
40
21
|
function sendError(reply, e) {
|
|
41
22
|
return reply.status(e.statusCode).send(e.body);
|
|
42
23
|
}
|
|
43
24
|
const openaiProxyRaw = (app, opts, done) => {
|
|
44
25
|
const { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher, semaphoreManager, tracker } = opts;
|
|
26
|
+
const orchestrator = createOrchestrator(semaphoreManager, tracker);
|
|
45
27
|
app.post(CHAT_COMPLETIONS_PATH, async (request, reply) => {
|
|
46
|
-
|
|
47
|
-
|
|
28
|
+
if (!orchestrator)
|
|
29
|
+
return sendError(reply, openaiErrors.providerUnavailable());
|
|
30
|
+
const deps = { db, streamTimeoutMs, retryMaxAttempts, retryBaseDelayMs, matcher, tracker, orchestrator };
|
|
31
|
+
return handleProxyRequest(request, reply, "openai", CHAT_COMPLETIONS_PATH, openaiErrors, deps, {
|
|
48
32
|
beforeSendProxy: (body, isStream) => {
|
|
49
33
|
if (isStream && !body.stream_options) {
|
|
50
34
|
body.stream_options = { include_usage: true };
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import type { FastifyReply, FastifyRequest } from "fastify";
|
|
2
|
+
import type { TransportResult } from "./types.js";
|
|
3
|
+
import type { Target } from "./strategy/types.js";
|
|
4
|
+
import type { ResilienceLayer, ResilienceResult } from "./resilience.js";
|
|
5
|
+
import type { RetryRuleMatcher } from "./retry-rules.js";
|
|
6
|
+
import type { SemaphoreScope } from "./scope.js";
|
|
7
|
+
import type { TrackerScope } from "./scope.js";
|
|
8
|
+
import type { ProviderSemaphoreManager } from "./semaphore.js";
|
|
9
|
+
import type { RequestTracker } from "../monitor/request-tracker.js";
|
|
10
|
+
export interface OrchestratorConfig {
|
|
11
|
+
resolved: Target;
|
|
12
|
+
provider: {
|
|
13
|
+
id: string;
|
|
14
|
+
name: string;
|
|
15
|
+
is_active: number;
|
|
16
|
+
api_type: string;
|
|
17
|
+
base_url: string;
|
|
18
|
+
api_key: string;
|
|
19
|
+
};
|
|
20
|
+
clientModel: string;
|
|
21
|
+
isStream: boolean;
|
|
22
|
+
/** 外部生成的 tracker ID,用于 tracker.appendStreamChunk / tracker.update 等回调匹配 */
|
|
23
|
+
trackerId?: string;
|
|
24
|
+
}
|
|
25
|
+
export interface HandleContext {
|
|
26
|
+
streamTimeoutMs?: number;
|
|
27
|
+
retryMaxAttempts?: number;
|
|
28
|
+
retryBaseDelayMs?: number;
|
|
29
|
+
failoverThreshold?: number;
|
|
30
|
+
isFailover?: boolean;
|
|
31
|
+
ruleMatcher?: RetryRuleMatcher;
|
|
32
|
+
transportFn: (target: Target) => Promise<TransportResult>;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* 工厂函数,消除 openai/anthropic 创建 orchestrator 的重复代码。
|
|
36
|
+
* 两个 provider 的创建逻辑完全一致。
|
|
37
|
+
*/
|
|
38
|
+
export declare function createOrchestrator(semaphoreManager?: ProviderSemaphoreManager, tracker?: RequestTracker): ProxyOrchestrator | undefined;
|
|
39
|
+
export declare class ProxyOrchestrator {
|
|
40
|
+
private deps;
|
|
41
|
+
constructor(deps: {
|
|
42
|
+
semaphoreScope: SemaphoreScope;
|
|
43
|
+
trackerScope: TrackerScope;
|
|
44
|
+
resilience: ResilienceLayer;
|
|
45
|
+
});
|
|
46
|
+
handle(request: FastifyRequest, reply: FastifyReply, apiType: "openai" | "anthropic", config: OrchestratorConfig, ctx?: HandleContext): Promise<ResilienceResult>;
|
|
47
|
+
private buildActiveRequest;
|
|
48
|
+
private createAbortSignal;
|
|
49
|
+
private executeResilience;
|
|
50
|
+
private sendResponse;
|
|
51
|
+
private extractTrackStatus;
|
|
52
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import { ResilienceLayer as ResilienceLayerClass } from "./resilience.js";
|
|
2
|
+
import { SemaphoreScope as SemaphoreScopeClass } from "./scope.js";
|
|
3
|
+
import { TrackerScope as TrackerScopeClass } from "./scope.js";
|
|
4
|
+
const DEFAULT_MAX_RETRIES = 3;
|
|
5
|
+
const DEFAULT_BASE_DELAY_MS = 1000;
|
|
6
|
+
const DEFAULT_FAILOVER_THRESHOLD = 400;
|
|
7
|
+
/**
|
|
8
|
+
* 工厂函数,消除 openai/anthropic 创建 orchestrator 的重复代码。
|
|
9
|
+
* 两个 provider 的创建逻辑完全一致。
|
|
10
|
+
*/
|
|
11
|
+
export function createOrchestrator(semaphoreManager, tracker) {
|
|
12
|
+
const semaphoreScope = semaphoreManager ? new SemaphoreScopeClass(semaphoreManager) : undefined;
|
|
13
|
+
const trackerScope = tracker ? new TrackerScopeClass(tracker) : undefined;
|
|
14
|
+
if (!semaphoreScope || !trackerScope)
|
|
15
|
+
return undefined;
|
|
16
|
+
return new ProxyOrchestrator({ semaphoreScope, trackerScope, resilience: new ResilienceLayerClass() });
|
|
17
|
+
}
|
|
18
|
+
export class ProxyOrchestrator {
|
|
19
|
+
deps;
|
|
20
|
+
constructor(deps) {
|
|
21
|
+
this.deps = deps;
|
|
22
|
+
}
|
|
23
|
+
async handle(request, reply, apiType, config, ctx) {
|
|
24
|
+
const trackerReq = this.buildActiveRequest(request, config, apiType);
|
|
25
|
+
const result = await this.deps.trackerScope.track(trackerReq, () => this.deps.semaphoreScope.withSlot(config.provider.id, this.createAbortSignal(request), () => {
|
|
26
|
+
trackerReq.queued = true;
|
|
27
|
+
this.deps.trackerScope.markQueued(trackerReq.id, true);
|
|
28
|
+
}, () => {
|
|
29
|
+
if (trackerReq.queued) {
|
|
30
|
+
trackerReq.queued = false;
|
|
31
|
+
this.deps.trackerScope.markQueued(trackerReq.id, false);
|
|
32
|
+
}
|
|
33
|
+
return this.executeResilience(config, ctx);
|
|
34
|
+
}), (result) => this.extractTrackStatus(result));
|
|
35
|
+
this.sendResponse(reply, result.result, ctx);
|
|
36
|
+
return result;
|
|
37
|
+
}
|
|
38
|
+
buildActiveRequest(request, config, apiType) {
|
|
39
|
+
return {
|
|
40
|
+
id: config.trackerId ?? crypto.randomUUID(),
|
|
41
|
+
apiType,
|
|
42
|
+
model: config.clientModel,
|
|
43
|
+
providerId: config.provider.id,
|
|
44
|
+
providerName: config.provider.name,
|
|
45
|
+
isStream: config.isStream,
|
|
46
|
+
queued: false,
|
|
47
|
+
startTime: Date.now(),
|
|
48
|
+
status: "pending",
|
|
49
|
+
retryCount: 0,
|
|
50
|
+
attempts: [],
|
|
51
|
+
clientIp: request.ip,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
createAbortSignal(request) {
|
|
55
|
+
const controller = new AbortController();
|
|
56
|
+
request.raw.on("close", () => {
|
|
57
|
+
if (!request.raw.readableEnded) {
|
|
58
|
+
controller.abort();
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
return controller.signal;
|
|
62
|
+
}
|
|
63
|
+
async executeResilience(config, ctx) {
|
|
64
|
+
if (!ctx?.transportFn)
|
|
65
|
+
throw new Error("HandleContext.transportFn is required");
|
|
66
|
+
const resilienceConfig = {
|
|
67
|
+
maxRetries: ctx.retryMaxAttempts ?? DEFAULT_MAX_RETRIES,
|
|
68
|
+
baseDelayMs: ctx.retryBaseDelayMs ?? DEFAULT_BASE_DELAY_MS,
|
|
69
|
+
failoverThreshold: ctx.failoverThreshold ?? DEFAULT_FAILOVER_THRESHOLD,
|
|
70
|
+
isFailover: ctx.isFailover ?? false,
|
|
71
|
+
ruleMatcher: ctx.ruleMatcher,
|
|
72
|
+
};
|
|
73
|
+
return this.deps.resilience.execute(() => [config.resolved], ctx.transportFn, resilienceConfig);
|
|
74
|
+
}
|
|
75
|
+
sendResponse(reply, result, ctx) {
|
|
76
|
+
if (result.kind === "stream_success" || result.kind === "stream_abort" || result.kind === "throw") {
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
// failover 场景下错误响应由外层 proxy-handler 控制,此处不发送
|
|
80
|
+
if (ctx?.isFailover && "statusCode" in result && result.statusCode >= (ctx.failoverThreshold ?? DEFAULT_FAILOVER_THRESHOLD)) {
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
if (result.headers) {
|
|
84
|
+
for (const [key, value] of Object.entries(result.headers)) {
|
|
85
|
+
reply.header(key, value);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
reply.status(result.statusCode).send(result.body);
|
|
89
|
+
}
|
|
90
|
+
extractTrackStatus(result) {
|
|
91
|
+
const transport = result.result;
|
|
92
|
+
if (transport.kind === "success" || transport.kind === "stream_success" || transport.kind === "stream_abort") {
|
|
93
|
+
return { status: "completed", statusCode: transport.statusCode };
|
|
94
|
+
}
|
|
95
|
+
if (transport.kind === "throw") {
|
|
96
|
+
return { status: "failed" };
|
|
97
|
+
}
|
|
98
|
+
return { status: "failed", statusCode: transport.statusCode };
|
|
99
|
+
}
|
|
100
|
+
}
|
|
@@ -1,11 +1,8 @@
|
|
|
1
|
-
import type { FastifyReply, FastifyRequest } from "fastify";
|
|
2
|
-
import Database from "better-sqlite3";
|
|
3
1
|
import type { Provider } from "../db/index.js";
|
|
4
|
-
import type {
|
|
5
|
-
import
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
export type RawHeaders = Record<string, string | string[] | undefined>;
|
|
2
|
+
import type { GetTransportResult } from "./transport.js";
|
|
3
|
+
import type { RawHeaders } from "./types.js";
|
|
4
|
+
export { UPSTREAM_SUCCESS } from "./types.js";
|
|
5
|
+
export type { RawHeaders } from "./types.js";
|
|
9
6
|
export interface ProxyErrorResponse {
|
|
10
7
|
statusCode: number;
|
|
11
8
|
body: unknown;
|
|
@@ -19,25 +16,16 @@ export interface ProxyErrorFormatter {
|
|
|
19
16
|
concurrencyQueueFull(providerId: string): ProxyErrorResponse;
|
|
20
17
|
concurrencyTimeout(providerId: string, timeoutMs: number): ProxyErrorResponse;
|
|
21
18
|
}
|
|
22
|
-
export
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
export type { ProxyResult, StreamProxyResult, GetProxyResult };
|
|
19
|
+
export type { ProxyResult, StreamProxyResult } from "./transport.js";
|
|
20
|
+
export type { GetTransportResult as GetProxyResult } from "./transport.js";
|
|
21
|
+
export type ErrorKind = "modelNotFound" | "modelNotAllowed" | "providerUnavailable" | "providerTypeMismatch" | "upstreamConnectionFailed" | "concurrencyQueueFull" | "concurrencyTimeout";
|
|
22
|
+
/**
|
|
23
|
+
* 工厂函数,消除 openai/anthropic 错误格式化的重复代码。
|
|
24
|
+
* statusCode 和 message 两个 provider 完全一致,仅 body 格式不同,
|
|
25
|
+
* 由 formatBody 回调根据 kind 参数映射各自的 type/code 并组装 body。
|
|
26
|
+
*/
|
|
27
|
+
export declare function createErrorFormatter(formatBody: (kind: ErrorKind, message: string) => Record<string, unknown>): ProxyErrorFormatter;
|
|
32
28
|
export declare const SKIP_UPSTREAM: Set<string>;
|
|
33
29
|
export declare function selectHeaders(raw: RawHeaders, skip: Set<string>): Record<string, string>;
|
|
34
30
|
export declare function buildUpstreamHeaders(clientHeaders: RawHeaders, apiKey: string, payloadBytes?: number): Record<string, string>;
|
|
35
|
-
export declare function proxyGetRequest(backend: Provider, apiKey: string, clientHeaders: RawHeaders, upstreamPath: string): Promise<
|
|
36
|
-
/**
|
|
37
|
-
* 共享 POST handler,参数化 apiType/errorFormat/upstreamPath 等差异。
|
|
38
|
-
* 当分组策略为 failover 时,在 while 循环中依次尝试不同 target,
|
|
39
|
-
* 直到成功(或 headers 已发送)才返回。
|
|
40
|
-
*/
|
|
41
|
-
export declare function handleProxyPost(request: FastifyRequest, reply: FastifyReply, apiType: "openai" | "anthropic", upstreamPath: string, errors: ProxyErrorFormatter, deps: ProxyHandlerDeps, options?: {
|
|
42
|
-
beforeSendProxy?: (body: Record<string, unknown>, isStream: boolean) => void;
|
|
43
|
-
}): Promise<FastifyReply>;
|
|
31
|
+
export declare function proxyGetRequest(backend: Provider, apiKey: string, clientHeaders: RawHeaders, upstreamPath: string): Promise<GetTransportResult>;
|