llm-simple-router 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/recommended-providers.json +76 -0
- package/config/recommended-retry-rules.json +10 -0
- package/dist/admin/api-response.d.ts +27 -0
- package/dist/admin/api-response.js +40 -0
- package/dist/admin/constants.d.ts +0 -2
- package/dist/admin/constants.js +0 -3
- package/dist/admin/groups.js +9 -5
- package/dist/admin/logs.js +3 -2
- package/dist/admin/mappings.js +7 -6
- package/dist/admin/metrics.js +23 -5
- package/dist/admin/monitor.js +2 -1
- package/dist/admin/providers.js +13 -4
- package/dist/admin/proxy-enhancement.js +11 -6
- package/dist/admin/recommended.js +1 -9
- package/dist/admin/retry-rules.js +8 -4
- package/dist/admin/router-keys.js +5 -1
- package/dist/admin/routes.js +2 -0
- package/dist/admin/settings-import-export.js +3 -2
- package/dist/admin/settings.js +7 -5
- package/dist/admin/setup.js +3 -2
- package/dist/admin/stats.js +20 -3
- package/dist/admin/upgrade.d.ts +13 -0
- package/dist/admin/upgrade.js +114 -0
- package/dist/admin/usage.js +12 -24
- package/dist/config.d.ts +1 -1
- package/dist/config.js +1 -1
- package/dist/constants.d.ts +3 -0
- package/dist/constants.js +11 -0
- package/dist/db/index.d.ts +3 -3
- package/dist/db/index.js +2 -2
- package/dist/db/mappings.js +5 -8
- package/dist/db/metrics.js +3 -4
- package/dist/db/providers.d.ts +8 -0
- package/dist/db/providers.js +6 -0
- package/dist/db/retry-rules.d.ts +1 -0
- package/dist/db/retry-rules.js +3 -0
- package/dist/db/settings.d.ts +2 -0
- package/dist/db/settings.js +7 -0
- package/dist/db/stats.d.ts +1 -2
- package/dist/db/stats.js +7 -11
- package/dist/index.d.ts +2 -0
- package/dist/index.js +55 -34
- package/dist/metrics/metrics-extractor.js +1 -1
- package/dist/metrics/sse-parser.js +2 -0
- package/dist/middleware/admin-auth.js +6 -5
- package/dist/middleware/auth.js +1 -10
- package/dist/monitor/request-tracker.d.ts +1 -0
- package/dist/monitor/request-tracker.js +9 -45
- package/dist/monitor/runtime-collector.js +1 -1
- package/dist/monitor/stream-content-accumulator.d.ts +14 -0
- package/dist/monitor/stream-content-accumulator.js +58 -0
- package/dist/proxy/anthropic.d.ts +2 -1
- package/dist/proxy/anthropic.js +3 -3
- package/dist/proxy/enhancement/directive-parser.d.ts +18 -0
- package/dist/proxy/{directive-parser.js → enhancement/directive-parser.js} +44 -0
- package/dist/proxy/{enhancement-handler.js → enhancement/enhancement-handler.js} +152 -32
- package/dist/proxy/enhancement/index.d.ts +3 -0
- package/dist/proxy/enhancement/index.js +3 -0
- package/dist/proxy/{response-cleaner.js → enhancement/response-cleaner.js} +14 -0
- package/dist/proxy/log-helpers.d.ts +1 -1
- package/dist/proxy/mapping-resolver.js +4 -4
- package/dist/proxy/openai.d.ts +2 -1
- package/dist/proxy/openai.js +4 -4
- package/dist/proxy/orchestrator.d.ts +0 -1
- package/dist/proxy/orchestrator.js +1 -3
- package/dist/proxy/proxy-core.d.ts +0 -4
- package/dist/proxy/proxy-core.js +0 -2
- package/dist/proxy/proxy-handler.d.ts +1 -1
- package/dist/proxy/proxy-handler.js +52 -132
- package/dist/proxy/proxy-logging.d.ts +0 -2
- package/dist/proxy/proxy-logging.js +1 -3
- package/dist/proxy/resilience.d.ts +5 -2
- package/dist/proxy/resilience.js +16 -7
- package/dist/proxy/strategy/failover.js +2 -7
- package/dist/proxy/strategy/random.js +2 -2
- package/dist/proxy/strategy/round-robin.js +2 -2
- package/dist/proxy/strategy/scheduled.js +1 -8
- package/dist/proxy/strategy/targets-rule.d.ts +1 -0
- package/dist/proxy/strategy/targets-rule.js +5 -0
- package/dist/proxy/transport-fn.d.ts +25 -0
- package/dist/proxy/transport-fn.js +55 -0
- package/dist/proxy/transport.d.ts +0 -25
- package/dist/proxy/transport.js +0 -38
- package/dist/upgrade/checker.d.ts +25 -0
- package/dist/upgrade/checker.js +120 -0
- package/dist/upgrade/deployment.d.ts +2 -0
- package/dist/upgrade/deployment.js +20 -0
- package/dist/upgrade/version.d.ts +1 -0
- package/dist/upgrade/version.js +13 -0
- package/dist/utils/password.js +4 -2
- package/dist/utils/time-range.d.ts +9 -0
- package/dist/utils/time-range.js +40 -0
- package/frontend-dist/assets/CardContent-WrBnGhTg.js +1 -0
- package/frontend-dist/assets/CardTitle-BcDYk7cq.js +1 -0
- package/frontend-dist/assets/Checkbox-MZf0YsDG.js +1 -0
- package/frontend-dist/assets/CollapsibleTrigger-CrOH9HlW.js +1 -0
- package/frontend-dist/assets/Collection-DcTx_Y54.js +1 -0
- package/frontend-dist/assets/Dashboard-D0oDrSLr.js +3 -0
- package/frontend-dist/assets/DialogTitle-Cl5Cd7QH.js +1 -0
- package/frontend-dist/assets/{Input-l5ZurXX5.js → Input-O0ebU-Va.js} +1 -1
- package/frontend-dist/assets/Label-C_S0y7Um.js +1 -0
- package/frontend-dist/assets/Login-DGY7uF8P.js +1 -0
- package/frontend-dist/assets/Logs-ls8pv89b.js +1 -0
- package/frontend-dist/assets/ModelMappings-DGlf0S4s.js +1 -0
- package/frontend-dist/assets/Monitor-BSI87grz.js +1 -0
- package/frontend-dist/assets/PopperContent-C6Q7hDmf.js +1 -0
- package/frontend-dist/assets/Providers-ZkRpj8_m.js +1 -0
- package/frontend-dist/assets/ProxyEnhancement-DFPI1W6Z.js +5 -0
- package/frontend-dist/assets/RetryRules-DtM31qsl.js +1 -0
- package/frontend-dist/assets/RouterKeys-D63tRFKm.js +1 -0
- package/frontend-dist/assets/RovingFocusItem-BJoylAKU.js +1 -0
- package/frontend-dist/assets/SelectValue-CLp5z6_I.js +1 -0
- package/frontend-dist/assets/Settings-DSgRKbTQ.js +6 -0
- package/frontend-dist/assets/Setup-BDmj6CRk.js +1 -0
- package/frontend-dist/assets/Switch-Wz-t_zkv.js +1 -0
- package/frontend-dist/assets/TableHeader-DGtcqGkw.js +1 -0
- package/frontend-dist/assets/TabsTrigger-CPCi2HIa.js +1 -0
- package/frontend-dist/assets/Teleport-DdjYHlNK.js +3 -0
- package/frontend-dist/assets/TooltipTrigger-H_QoPY1n.js +1 -0
- package/frontend-dist/assets/UnifiedRequestDialog-BAAfMJJl.js +3 -0
- package/frontend-dist/assets/{VisuallyHidden-BwwTtzb9.js → VisuallyHidden-Cyk-jWwh.js} +1 -1
- package/frontend-dist/assets/VisuallyHiddenInput-CYjNe_H8.js +1 -0
- package/frontend-dist/assets/alert-dialog-Bi3dliLl.js +1 -0
- package/frontend-dist/assets/badge-Kkta3e9W.js +1 -0
- package/frontend-dist/assets/button-BQ3s7yNh.js +12 -0
- package/frontend-dist/assets/{createLucideIcon-Biq59l_W.js → createLucideIcon-D1tkPDOQ.js} +1 -1
- package/frontend-dist/assets/dialog-DoIATUYw.js +1 -0
- package/frontend-dist/assets/{file-text-DoRW0hQW.js → file-text-Dt6QP1bZ.js} +1 -1
- package/frontend-dist/assets/index-BY0E7CHR.js +1 -0
- package/frontend-dist/assets/index-Bnrh1mFY.css +1 -0
- package/frontend-dist/assets/lib-CxwxnlwW.js +1 -0
- package/frontend-dist/assets/{ohash.D__AXeF1-BGxYMs6k.js → ohash.D__AXeF1-b0PiKZB_.js} +1 -1
- package/frontend-dist/assets/{useClipboard-vaHkvJHw.js → useClipboard-Cnnz6AAN.js} +1 -1
- package/frontend-dist/assets/useLogRetention-DYP5LOAc.js +1 -0
- package/frontend-dist/assets/useNonce-DKbOCfgM.js +1 -0
- package/frontend-dist/assets/x-CAoitXRt.js +1 -0
- package/frontend-dist/index.html +18 -9
- package/package.json +2 -1
- package/dist/proxy/directive-parser.d.ts +0 -7
- package/frontend-dist/assets/CardContent-CIO85eT6.js +0 -1
- package/frontend-dist/assets/CardTitle-DiqIReMT.js +0 -1
- package/frontend-dist/assets/Checkbox-C2u5pIp4.js +0 -1
- package/frontend-dist/assets/CollapsibleTrigger-RKFL41om.js +0 -1
- package/frontend-dist/assets/Collection-iiNnuTQj.js +0 -1
- package/frontend-dist/assets/Dashboard-DOEqP6gF.js +0 -3
- package/frontend-dist/assets/DialogTitle-CEqndrf6.js +0 -1
- package/frontend-dist/assets/Label-PgGtS8v2.js +0 -1
- package/frontend-dist/assets/Login-DaN6ZcCx.js +0 -1
- package/frontend-dist/assets/Logs-CleRQ7Xk.js +0 -1
- package/frontend-dist/assets/ModelMappings-CacA_ua_.js +0 -1
- package/frontend-dist/assets/Monitor-LSMFOBN2.js +0 -1
- package/frontend-dist/assets/PopperContent-zLFHqQP0.js +0 -1
- package/frontend-dist/assets/Providers-NT5MUDU0.js +0 -1
- package/frontend-dist/assets/ProxyEnhancement-DhOy8nNy.js +0 -5
- package/frontend-dist/assets/RetryRules-7arWa3jB.js +0 -1
- package/frontend-dist/assets/RouterKeys-CdaZunRg.js +0 -1
- package/frontend-dist/assets/SelectValue-CSg-MKW_.js +0 -1
- package/frontend-dist/assets/Settings-1ntV9XE3.js +0 -6
- package/frontend-dist/assets/Setup-CXLTDhYJ.js +0 -1
- package/frontend-dist/assets/Switch-DivrIFE3.js +0 -1
- package/frontend-dist/assets/TableHeader-Bn0bodWx.js +0 -1
- package/frontend-dist/assets/TabsContent-MWvOH_LJ.js +0 -1
- package/frontend-dist/assets/TabsTrigger-WKkUfO2M.js +0 -1
- package/frontend-dist/assets/Teleport-B0PNXZbP.js +0 -3
- package/frontend-dist/assets/UnifiedRequestDialog-Ba2e7YuJ.js +0 -3
- package/frontend-dist/assets/VisuallyHiddenInput-EGZSP7s8.js +0 -1
- package/frontend-dist/assets/alert-dialog-CS1yFhdV.js +0 -1
- package/frontend-dist/assets/badge-C-QcC5n2.js +0 -1
- package/frontend-dist/assets/button-Dbz2Be22.js +0 -12
- package/frontend-dist/assets/dialog-Cr0YQlLW.js +0 -1
- package/frontend-dist/assets/index-0H2uCGbx.js +0 -1
- package/frontend-dist/assets/index-D-cdVNCb.css +0 -1
- package/frontend-dist/assets/lib-B0lieqgg.js +0 -1
- package/frontend-dist/assets/useForwardExpose-C2_ks3sW.js +0 -1
- package/frontend-dist/assets/useLogRetention-Cs_fiKql.js +0 -1
- package/frontend-dist/assets/useNonce-C9do0jOI.js +0 -1
- package/frontend-dist/assets/x-BlTnH_0_.js +0 -1
- /package/dist/proxy/{enhancement-handler.d.ts → enhancement/enhancement-handler.d.ts} +0 -0
- /package/dist/proxy/{response-cleaner.d.ts → enhancement/response-cleaner.d.ts} +0 -0
|
@@ -3,19 +3,27 @@ import { getMappingGroup, getProviderById, insertRequestLog } from "../db/index.
|
|
|
3
3
|
import { decrypt } from "../utils/crypto.js";
|
|
4
4
|
import { getSetting } from "../db/settings.js";
|
|
5
5
|
import { resolveMapping } from "./mapping-resolver.js";
|
|
6
|
-
import { applyEnhancement
|
|
6
|
+
import { applyEnhancement } from "./enhancement/enhancement-handler.js";
|
|
7
7
|
import { SemaphoreQueueFullError, SemaphoreTimeoutError } from "./semaphore.js";
|
|
8
8
|
import { logResilienceResult, collectTransportMetrics, handleIntercept, sanitizeHeadersForLog, } from "./proxy-logging.js";
|
|
9
9
|
import { buildUpstreamHeaders } from "./proxy-core.js";
|
|
10
|
-
import {
|
|
11
|
-
import { SSEMetricsTransform } from "../metrics/sse-metrics-transform.js";
|
|
12
|
-
import { MetricsExtractor } from "../metrics/metrics-extractor.js";
|
|
10
|
+
import { ProviderSwitchNeeded } from "./types.js";
|
|
13
11
|
import { updateLogStreamContent } from "../db/index.js";
|
|
14
|
-
import { callNonStream, callStream } from "./transport.js";
|
|
15
12
|
import { insertRejectedLog } from "./log-helpers.js";
|
|
13
|
+
import { buildTransportFn } from "./transport-fn.js";
|
|
16
14
|
const HTTP_ERROR_THRESHOLD = 400;
|
|
17
|
-
|
|
18
|
-
|
|
15
|
+
function rejectAndReply(reply, params, error, errorMessage, providerId) {
|
|
16
|
+
insertRejectedLog({
|
|
17
|
+
db: params.db, logId: params.logId, apiType: params.apiType, model: params.model,
|
|
18
|
+
statusCode: error.statusCode, errorMessage, startTime: params.startTime,
|
|
19
|
+
isStream: params.isStream, routerKeyId: params.routerKeyId,
|
|
20
|
+
originalBody: params.originalBody, clientHeaders: params.clientHeaders,
|
|
21
|
+
providerId, originalModel: params.originalModel,
|
|
22
|
+
isFailover: params.isFailover, originalRequestId: params.originalRequestId,
|
|
23
|
+
sessionId: params.sessionId,
|
|
24
|
+
});
|
|
25
|
+
return reply.code(error.statusCode).send(error.body);
|
|
26
|
+
}
|
|
19
27
|
/** 将 tracker blocks 序列化为前端 tryDirectParse 可解析的 JSON */
|
|
20
28
|
function serializeBlocksForStorage(blocks, apiType) {
|
|
21
29
|
if (!blocks || blocks.length === 0)
|
|
@@ -39,17 +47,7 @@ function serializeBlocksForStorage(blocks, apiType) {
|
|
|
39
47
|
const text = blocks.filter(b => b.type === "text").map(b => b.content).join("");
|
|
40
48
|
return JSON.stringify({ choices: [{ message: { content: text } }] });
|
|
41
49
|
}
|
|
42
|
-
|
|
43
|
-
return {
|
|
44
|
-
inputTokens: m.input_tokens,
|
|
45
|
-
outputTokens: m.output_tokens,
|
|
46
|
-
cacheReadTokens: m.cache_read_tokens,
|
|
47
|
-
ttftMs: m.ttft_ms,
|
|
48
|
-
tokensPerSecond: m.tokens_per_second,
|
|
49
|
-
stopReason: m.stop_reason,
|
|
50
|
-
isComplete: m.is_complete === 1,
|
|
51
|
-
};
|
|
52
|
-
}
|
|
50
|
+
// ---------- Main entry ----------
|
|
53
51
|
export async function handleProxyRequest(request, reply, apiType, upstreamPath, errors, deps, options) {
|
|
54
52
|
request.raw.socket.on("error", (err) => request.log.debug({ err }, "client socket error"));
|
|
55
53
|
const clientModel = request.body.model || "unknown";
|
|
@@ -58,9 +56,18 @@ export async function handleProxyRequest(request, reply, apiType, upstreamPath,
|
|
|
58
56
|
if (interceptResponse)
|
|
59
57
|
return handleIntercept(deps.db, apiType, request, reply, interceptResponse, clientModel, sessionId);
|
|
60
58
|
const group = getMappingGroup(deps.db, effectiveModel);
|
|
61
|
-
|
|
59
|
+
return executeFailoverLoop({
|
|
60
|
+
request, reply, apiType, upstreamPath, errors, deps, options,
|
|
61
|
+
effectiveModel, originalModel,
|
|
62
|
+
isFailover: group?.strategy === "failover",
|
|
63
|
+
originalBody: JSON.parse(JSON.stringify(request.body)),
|
|
64
|
+
sessionId,
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
// ---------- Failover loop ----------
|
|
68
|
+
async function executeFailoverLoop(ctx) {
|
|
69
|
+
const { request, reply, apiType, upstreamPath, errors, deps, options, effectiveModel, originalModel, isFailover, originalBody, sessionId } = ctx;
|
|
62
70
|
const excludeTargets = [];
|
|
63
|
-
const originalBody = JSON.parse(JSON.stringify(request.body));
|
|
64
71
|
let rootLogId = null;
|
|
65
72
|
while (true) {
|
|
66
73
|
const startTime = Date.now();
|
|
@@ -72,27 +79,18 @@ export async function handleProxyRequest(request, reply, apiType, upstreamPath,
|
|
|
72
79
|
const body = request.body;
|
|
73
80
|
const isStream = body.stream === true;
|
|
74
81
|
const cliHdrs = request.headers;
|
|
82
|
+
const rCtx = {
|
|
83
|
+
db: deps.db, logId, apiType, model: effectiveModel,
|
|
84
|
+
startTime, isStream, routerKeyId, originalBody, clientHeaders: cliHdrs, originalModel,
|
|
85
|
+
isFailover: isFailoverIteration, originalRequestId: isFailoverIteration ? rootLogId : null, sessionId,
|
|
86
|
+
};
|
|
75
87
|
const resolved = resolveMapping(deps.db, effectiveModel, { now: new Date(), excludeTargets });
|
|
76
88
|
request.log.debug({ logId, model: effectiveModel, apiType, isStream, action: "resolve_mapping", resolved: !!resolved });
|
|
77
89
|
if (!resolved) {
|
|
78
90
|
if (isFailover && excludeTargets.length > 0) {
|
|
79
|
-
|
|
80
|
-
insertRejectedLog({
|
|
81
|
-
db: deps.db, logId, apiType, model: effectiveModel, statusCode: e.statusCode,
|
|
82
|
-
errorMessage: `All failover targets exhausted (${excludeTargets.length} attempted)`,
|
|
83
|
-
startTime, isStream, routerKeyId, originalBody, clientHeaders: cliHdrs, originalModel,
|
|
84
|
-
isFailover: true, originalRequestId: rootLogId, sessionId,
|
|
85
|
-
});
|
|
86
|
-
return reply.status(e.statusCode).send(e.body);
|
|
91
|
+
return rejectAndReply(reply, { ...rCtx, isFailover: true, originalRequestId: rootLogId }, errors.upstreamConnectionFailed(), `All failover targets exhausted (${excludeTargets.length} attempted)`);
|
|
87
92
|
}
|
|
88
|
-
|
|
89
|
-
insertRejectedLog({
|
|
90
|
-
db: deps.db, logId, apiType, model: effectiveModel, statusCode: e.statusCode,
|
|
91
|
-
errorMessage: `No mapping found for model '${effectiveModel}'`, startTime, isStream,
|
|
92
|
-
routerKeyId, originalBody, clientHeaders: cliHdrs, originalModel,
|
|
93
|
-
isFailover: isFailoverIteration, originalRequestId: isFailoverIteration ? rootLogId : null, sessionId,
|
|
94
|
-
});
|
|
95
|
-
return reply.status(e.statusCode).send(e.body);
|
|
93
|
+
return rejectAndReply(reply, rCtx, errors.modelNotFound(effectiveModel), `No mapping found for model '${effectiveModel}'`);
|
|
96
94
|
}
|
|
97
95
|
if (excludeTargets.length === 0) {
|
|
98
96
|
const allowedModels = request.routerKey?.allowed_models;
|
|
@@ -100,14 +98,7 @@ export async function handleProxyRequest(request, reply, apiType, upstreamPath,
|
|
|
100
98
|
try {
|
|
101
99
|
const models = JSON.parse(allowedModels).filter((m) => m.trim() !== "");
|
|
102
100
|
if (models.length > 0 && !models.includes(resolved.backend_model)) {
|
|
103
|
-
|
|
104
|
-
insertRejectedLog({
|
|
105
|
-
db: deps.db, logId, apiType, model: effectiveModel, statusCode: e.statusCode,
|
|
106
|
-
errorMessage: `Model '${resolved.backend_model}' not allowed`, startTime, isStream, routerKeyId,
|
|
107
|
-
originalBody, clientHeaders: cliHdrs, providerId: resolved.provider_id, originalModel,
|
|
108
|
-
isFailover: isFailoverIteration, originalRequestId: isFailoverIteration ? rootLogId : null, sessionId,
|
|
109
|
-
});
|
|
110
|
-
return reply.status(e.statusCode).send(e.body);
|
|
101
|
+
return rejectAndReply(reply, rCtx, errors.modelNotAllowed(resolved.backend_model), `Model '${resolved.backend_model}' not allowed`, resolved.provider_id);
|
|
111
102
|
}
|
|
112
103
|
}
|
|
113
104
|
catch {
|
|
@@ -117,24 +108,10 @@ export async function handleProxyRequest(request, reply, apiType, upstreamPath,
|
|
|
117
108
|
}
|
|
118
109
|
const provider = getProviderById(deps.db, resolved.provider_id);
|
|
119
110
|
if (!provider || !provider.is_active) {
|
|
120
|
-
|
|
121
|
-
insertRejectedLog({
|
|
122
|
-
db: deps.db, logId, apiType, model: effectiveModel, statusCode: e.statusCode,
|
|
123
|
-
errorMessage: `Provider '${resolved.provider_id}' unavailable`, startTime, isStream, routerKeyId,
|
|
124
|
-
originalBody, clientHeaders: cliHdrs, providerId: resolved.provider_id, originalModel,
|
|
125
|
-
isFailover: isFailoverIteration, originalRequestId: isFailoverIteration ? rootLogId : null, sessionId,
|
|
126
|
-
});
|
|
127
|
-
return reply.status(e.statusCode).send(e.body);
|
|
111
|
+
return rejectAndReply(reply, rCtx, errors.providerUnavailable(), `Provider '${resolved.provider_id}' unavailable`, resolved.provider_id);
|
|
128
112
|
}
|
|
129
113
|
if (provider.api_type !== apiType) {
|
|
130
|
-
|
|
131
|
-
insertRejectedLog({
|
|
132
|
-
db: deps.db, logId, apiType, model: effectiveModel, statusCode: e.statusCode,
|
|
133
|
-
errorMessage: `API type mismatch: expected '${apiType}'`, startTime, isStream, routerKeyId,
|
|
134
|
-
originalBody, clientHeaders: cliHdrs, providerId: resolved.provider_id, originalModel,
|
|
135
|
-
isFailover: isFailoverIteration, originalRequestId: isFailoverIteration ? rootLogId : null, sessionId,
|
|
136
|
-
});
|
|
137
|
-
return reply.status(e.statusCode).send(e.body);
|
|
114
|
+
return rejectAndReply(reply, rCtx, errors.providerTypeMismatch(), `API type mismatch: expected '${apiType}'`, resolved.provider_id);
|
|
138
115
|
}
|
|
139
116
|
body.model = resolved.backend_model;
|
|
140
117
|
const apiKey = decrypt(provider.api_key, getSetting(deps.db, "encryption_key"));
|
|
@@ -146,63 +123,23 @@ export async function handleProxyRequest(request, reply, apiType, upstreamPath,
|
|
|
146
123
|
headers: sanitizeHeadersForLog(buildUpstreamHeaders(cliHdrs, apiKey, Buffer.byteLength(reqBodyStr))),
|
|
147
124
|
body: reqBodyStr,
|
|
148
125
|
});
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
const metricsTransform = new SSEMetricsTransform(apiType, startTime, {
|
|
155
|
-
onMetrics: (m) => {
|
|
156
|
-
deps.tracker?.update(logId, { streamMetrics: toStreamMetrics(m) });
|
|
157
|
-
},
|
|
158
|
-
onChunk: (rawLine) => {
|
|
159
|
-
deps.tracker?.appendStreamChunk(logId, rawLine, apiType, STREAM_CONTENT_MAX_RAW, STREAM_CONTENT_MAX_TEXT);
|
|
160
|
-
},
|
|
161
|
-
});
|
|
162
|
-
const checkEarlyError = deps.matcher
|
|
163
|
-
? (data) => deps.matcher.test(UPSTREAM_SUCCESS, data)
|
|
164
|
-
: undefined;
|
|
165
|
-
const streamResult = await callStream(provider, apiKey, body, cliHdrs, reply, deps.streamTimeoutMs, upstreamPath, buildUpstreamHeaders, metricsTransform, checkEarlyError);
|
|
166
|
-
const m = (streamResult.kind === "stream_success" || streamResult.kind === "stream_abort")
|
|
167
|
-
? streamResult.metrics : undefined;
|
|
168
|
-
if (m)
|
|
169
|
-
deps.tracker?.update(logId, { streamMetrics: toStreamMetrics(m) });
|
|
170
|
-
return streamResult;
|
|
171
|
-
}
|
|
172
|
-
const result = await callNonStream(provider, apiKey, body, cliHdrs, upstreamPath, buildUpstreamHeaders);
|
|
173
|
-
// 非流式请求:从响应体提取指标并更新 tracker
|
|
174
|
-
if (result.kind === "success") {
|
|
175
|
-
const mr = MetricsExtractor.fromNonStreamResponse(apiType, result.body);
|
|
176
|
-
if (mr) {
|
|
177
|
-
deps.tracker?.update(logId, { streamMetrics: toStreamMetrics(mr) });
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
// 非流式响应注入模型信息标签(模型映射场景)
|
|
181
|
-
if (originalModel && result.kind === "success" && result.statusCode === UPSTREAM_SUCCESS) {
|
|
182
|
-
try {
|
|
183
|
-
const bodyObj = JSON.parse(result.body);
|
|
184
|
-
if (bodyObj.content?.[0]?.text) {
|
|
185
|
-
bodyObj.content[0].text += `\n\n${buildModelInfoTag(effectiveModel)}`;
|
|
186
|
-
return { ...result, body: JSON.stringify(bodyObj) };
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
catch {
|
|
190
|
-
request.log.debug("Failed to inject model-info tag into non-JSON response");
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
return result;
|
|
194
|
-
};
|
|
126
|
+
const transportFn = buildTransportFn({
|
|
127
|
+
provider, apiKey, body, cliHdrs, reply, upstreamPath, apiType,
|
|
128
|
+
isStream, startTime, logId, effectiveModel, originalModel,
|
|
129
|
+
streamTimeoutMs: deps.streamTimeoutMs, tracker: deps.tracker, matcher: deps.matcher, request,
|
|
130
|
+
});
|
|
195
131
|
try {
|
|
196
|
-
const resilienceResult = await deps.orchestrator.handle(request, reply, apiType, { resolved, provider, clientModel: effectiveModel, isStream, trackerId: logId, sessionId, clientRequest: clientReq }, {
|
|
132
|
+
const resilienceResult = await deps.orchestrator.handle(request, reply, apiType, { resolved, provider, clientModel: effectiveModel, isStream, trackerId: logId, sessionId, clientRequest: clientReq }, { retryBaseDelayMs: deps.retryBaseDelayMs, isFailover, ruleMatcher: deps.matcher, transportFn });
|
|
197
133
|
const lastLogId = logResilienceResult(deps.db, {
|
|
198
134
|
apiType, model: effectiveModel, providerId: provider.id, isStream,
|
|
199
135
|
clientReq, upstreamReqBase, logId, routerKeyId, originalModel, sessionId,
|
|
200
136
|
failover: { isFailoverIteration, rootLogId: rootLogId },
|
|
201
137
|
}, resilienceResult.attempts, resilienceResult.result, startTime);
|
|
202
138
|
collectTransportMetrics(deps.db, apiType, resilienceResult.result, isStream, lastLogId, provider.id, resolved.backend_model, request);
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
139
|
+
const tr = resilienceResult.result;
|
|
140
|
+
const succeeded = tr.kind === "success" || tr.kind === "stream_success" || tr.kind === "stream_abort";
|
|
141
|
+
if (succeeded)
|
|
142
|
+
deps.usageWindowTracker?.recordRequest(routerKeyId ?? undefined);
|
|
206
143
|
if (isStream && deps.tracker) {
|
|
207
144
|
const sc = deps.tracker.get(logId)?.streamContent;
|
|
208
145
|
const blocks = sc?.blocks;
|
|
@@ -213,7 +150,6 @@ export async function handleProxyRequest(request, reply, apiType, upstreamPath,
|
|
|
213
150
|
if (content)
|
|
214
151
|
updateLogStreamContent(deps.db, lastLogId, content);
|
|
215
152
|
}
|
|
216
|
-
// Failover: 单 provider 内重试已耗尽但仍失败,尝试下一个 target
|
|
217
153
|
if (isFailover && !reply.raw.headersSent) {
|
|
218
154
|
const tr = resilienceResult.result;
|
|
219
155
|
const failed = tr.kind === "throw"
|
|
@@ -224,12 +160,12 @@ export async function handleProxyRequest(request, reply, apiType, upstreamPath,
|
|
|
224
160
|
}
|
|
225
161
|
}
|
|
226
162
|
// orchestrator.sendResponse 对 throw/stream_success/stream_abort 不发送,
|
|
227
|
-
//
|
|
163
|
+
// 对 failover 场景的错误也不发送——这些情况需要外层 proxy-handler 处理
|
|
228
164
|
if (!reply.raw.headersSent) {
|
|
229
165
|
const tr = resilienceResult.result;
|
|
230
166
|
if (tr.kind === "throw" || (tr.kind === "error" && tr.statusCode >= HTTP_ERROR_THRESHOLD)) {
|
|
231
167
|
const err = errors.upstreamConnectionFailed();
|
|
232
|
-
return reply.
|
|
168
|
+
return reply.code(err.statusCode).send(err.body);
|
|
233
169
|
}
|
|
234
170
|
}
|
|
235
171
|
return reply;
|
|
@@ -241,26 +177,10 @@ export async function handleProxyRequest(request, reply, apiType, upstreamPath,
|
|
|
241
177
|
continue;
|
|
242
178
|
}
|
|
243
179
|
if (e instanceof SemaphoreQueueFullError) {
|
|
244
|
-
|
|
245
|
-
insertRejectedLog({
|
|
246
|
-
db: deps.db, logId, apiType, model: effectiveModel, statusCode: err.statusCode,
|
|
247
|
-
errorMessage: `Concurrency queue full for provider '${provider.id}'`,
|
|
248
|
-
startTime, isStream, routerKeyId, originalBody, clientHeaders: cliHdrs,
|
|
249
|
-
providerId: provider.id, originalModel,
|
|
250
|
-
isFailover: isFailoverIteration, originalRequestId: isFailoverIteration ? rootLogId : null, sessionId,
|
|
251
|
-
});
|
|
252
|
-
return reply.status(err.statusCode).send(err.body);
|
|
180
|
+
return rejectAndReply(reply, rCtx, errors.concurrencyQueueFull(provider.id), `Concurrency queue full for provider '${provider.id}'`, provider.id);
|
|
253
181
|
}
|
|
254
182
|
if (e instanceof SemaphoreTimeoutError) {
|
|
255
|
-
|
|
256
|
-
insertRejectedLog({
|
|
257
|
-
db: deps.db, logId, apiType, model: effectiveModel, statusCode: err.statusCode,
|
|
258
|
-
errorMessage: `Concurrency wait timeout for provider '${provider.id}' (${e.timeoutMs}ms)`,
|
|
259
|
-
startTime, isStream, routerKeyId, originalBody, clientHeaders: cliHdrs,
|
|
260
|
-
providerId: provider.id, originalModel,
|
|
261
|
-
isFailover: isFailoverIteration, originalRequestId: isFailoverIteration ? rootLogId : null, sessionId,
|
|
262
|
-
});
|
|
263
|
-
return reply.status(err.statusCode).send(err.body);
|
|
183
|
+
return rejectAndReply(reply, rCtx, errors.concurrencyTimeout(provider.id, e.timeoutMs), `Concurrency wait timeout for provider '${provider.id}' (${e.timeoutMs}ms)`, provider.id);
|
|
264
184
|
}
|
|
265
185
|
const errMsg = e instanceof Error ? e.message : String(e);
|
|
266
186
|
request.log.debug({ logId, error: errMsg, action: "upstream_error" });
|
|
@@ -274,7 +194,7 @@ export async function handleProxyRequest(request, reply, apiType, upstreamPath,
|
|
|
274
194
|
session_id: sessionId,
|
|
275
195
|
});
|
|
276
196
|
const err = errors.upstreamConnectionFailed();
|
|
277
|
-
return reply.
|
|
197
|
+
return reply.code(err.statusCode).send(err.body);
|
|
278
198
|
}
|
|
279
199
|
}
|
|
280
200
|
}
|
|
@@ -3,8 +3,6 @@ import { type FailoverContext } from "./log-helpers.js";
|
|
|
3
3
|
import type { FastifyRequest } from "fastify";
|
|
4
4
|
import type { ResilienceAttempt } from "./resilience.js";
|
|
5
5
|
import type { TransportResult } from "./types.js";
|
|
6
|
-
export { UPSTREAM_SUCCESS } from "./types.js";
|
|
7
|
-
export type { RawHeaders } from "./types.js";
|
|
8
6
|
/** 日志存储前脱敏 Authorization header,避免 API Key 被持久化 */
|
|
9
7
|
export declare function sanitizeHeadersForLog(headers: Record<string, string>): Record<string, string>;
|
|
10
8
|
export declare function handleIntercept(db: Database.Database, apiType: "openai" | "anthropic", request: FastifyRequest, reply: import("fastify").FastifyReply, interceptResponse: {
|
|
@@ -2,8 +2,6 @@ import { randomUUID } from "crypto";
|
|
|
2
2
|
import { insertRequestLog, insertMetrics, updateLogMetrics } from "../db/index.js";
|
|
3
3
|
import { insertSuccessLog } from "./log-helpers.js";
|
|
4
4
|
import { MetricsExtractor } from "../metrics/metrics-extractor.js";
|
|
5
|
-
// Re-export shared constants & types from types.ts (canonical home)
|
|
6
|
-
export { UPSTREAM_SUCCESS } from "./types.js";
|
|
7
5
|
// Internal imports from types.ts
|
|
8
6
|
import { UPSTREAM_SUCCESS } from "./types.js";
|
|
9
7
|
import { HTTP_BAD_GATEWAY } from "../constants.js";
|
|
@@ -34,7 +32,7 @@ export function handleIntercept(db, apiType, request, reply, interceptResponse,
|
|
|
34
32
|
router_key_id: request.routerKey?.id ?? null, original_model: null,
|
|
35
33
|
session_id: sessionId,
|
|
36
34
|
});
|
|
37
|
-
return reply.
|
|
35
|
+
return reply.code(interceptResponse.statusCode).send(interceptResponse.body);
|
|
38
36
|
}
|
|
39
37
|
// ---------- New-architecture logging ----------
|
|
40
38
|
export function logResilienceResult(db, params, attempts, result, startTime) {
|
|
@@ -7,7 +7,7 @@ export interface RetryStrategy {
|
|
|
7
7
|
export declare class FixedIntervalStrategy implements RetryStrategy {
|
|
8
8
|
private delayMs;
|
|
9
9
|
constructor(delayMs: number);
|
|
10
|
-
getDelay(): number;
|
|
10
|
+
getDelay(_attempt: number): number;
|
|
11
11
|
}
|
|
12
12
|
export declare class ExponentialBackoffStrategy implements RetryStrategy {
|
|
13
13
|
private baseMs;
|
|
@@ -21,11 +21,14 @@ export declare function createStrategy(rule: {
|
|
|
21
21
|
max_delay_ms: number;
|
|
22
22
|
}): RetryStrategy;
|
|
23
23
|
export interface ResilienceConfig {
|
|
24
|
-
maxRetries: number;
|
|
25
24
|
baseDelayMs: number;
|
|
26
25
|
failoverThreshold: number;
|
|
27
26
|
ruleMatcher?: RetryRuleMatcher;
|
|
28
27
|
isFailover: boolean;
|
|
28
|
+
/** DB 规则 max_retries 的全局安全阀,防止单规则配置导致过多重试 */
|
|
29
|
+
globalRetryCap?: number;
|
|
30
|
+
/** 全局迭代上限,防止极端配置导致 while(true) 循环过多 */
|
|
31
|
+
iterationCap?: number;
|
|
29
32
|
}
|
|
30
33
|
export interface ResilienceAttempt {
|
|
31
34
|
target: Target;
|
package/dist/proxy/resilience.js
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
|
+
import { MS_PER_SECOND } from "../constants.js";
|
|
1
2
|
import { ProviderSwitchNeeded } from "./types.js";
|
|
2
3
|
export class FixedIntervalStrategy {
|
|
3
4
|
delayMs;
|
|
4
5
|
constructor(delayMs) {
|
|
5
6
|
this.delayMs = delayMs;
|
|
6
7
|
}
|
|
7
|
-
|
|
8
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
9
|
+
getDelay(_attempt) { return this.delayMs; }
|
|
8
10
|
}
|
|
9
11
|
const EXPONENTIAL_BASE = 2;
|
|
10
12
|
export class ExponentialBackoffStrategy {
|
|
@@ -26,7 +28,9 @@ export function createStrategy(rule) {
|
|
|
26
28
|
// ---------- Constants ----------
|
|
27
29
|
const RETRYABLE_THROW_CODES = new Set(["ETIMEDOUT", "ECONNRESET", "ECONNREFUSED"]);
|
|
28
30
|
const HTTP_TOO_MANY_REQUESTS = 429;
|
|
29
|
-
const
|
|
31
|
+
const DEFAULT_THROW_MAX_RETRIES = 3;
|
|
32
|
+
const DEFAULT_RETRY_CAP = 3;
|
|
33
|
+
const DEFAULT_ITERATION_CAP = 50;
|
|
30
34
|
// ---------- Internal helpers ----------
|
|
31
35
|
function isRetryableThrow(err) {
|
|
32
36
|
if (err && typeof err === "object" && "code" in err) {
|
|
@@ -73,7 +77,7 @@ export class ResilienceLayer {
|
|
|
73
77
|
if (!isRetryableThrow(result.error)) {
|
|
74
78
|
return { action: "abort", reason: result.error.message };
|
|
75
79
|
}
|
|
76
|
-
if (state.attemptCount <
|
|
80
|
+
if (state.attemptCount < DEFAULT_THROW_MAX_RETRIES) {
|
|
77
81
|
return { action: "retry", delayMs: config.baseDelayMs };
|
|
78
82
|
}
|
|
79
83
|
return config.isFailover
|
|
@@ -86,8 +90,7 @@ export class ResilienceLayer {
|
|
|
86
90
|
const matchedRule = body && config.ruleMatcher
|
|
87
91
|
? config.ruleMatcher.match(result.statusCode, body)
|
|
88
92
|
: null;
|
|
89
|
-
|
|
90
|
-
if (matchedRule && state.attemptCount < effectiveMaxRetries) {
|
|
93
|
+
if (matchedRule && state.attemptCount < Math.min(matchedRule.max_retries, config.globalRetryCap ?? DEFAULT_RETRY_CAP)) {
|
|
91
94
|
const strategy = createStrategy(matchedRule);
|
|
92
95
|
const headers = extractHeaders(result);
|
|
93
96
|
const retryAfterMs = result.statusCode === HTTP_TOO_MANY_REQUESTS
|
|
@@ -103,8 +106,7 @@ export class ResilienceLayer {
|
|
|
103
106
|
const body = extractBody(result);
|
|
104
107
|
if (body && config.ruleMatcher) {
|
|
105
108
|
const matchedRule = config.ruleMatcher.match(result.statusCode, body);
|
|
106
|
-
|
|
107
|
-
if (matchedRule && state.attemptCount < effectiveMaxRetries) {
|
|
109
|
+
if (matchedRule && state.attemptCount < Math.min(matchedRule.max_retries, config.globalRetryCap ?? DEFAULT_RETRY_CAP)) {
|
|
108
110
|
const strategy = createStrategy(matchedRule);
|
|
109
111
|
return { action: "retry", delayMs: strategy.getDelay(state.attemptCount) };
|
|
110
112
|
}
|
|
@@ -123,6 +125,13 @@ export class ResilienceLayer {
|
|
|
123
125
|
perTargetCounts.set(targetKey(t), (perTargetCounts.get(targetKey(t)) ?? 0) + 1);
|
|
124
126
|
};
|
|
125
127
|
while (true) {
|
|
128
|
+
if (globalAttemptIndex >= (config.iterationCap ?? DEFAULT_ITERATION_CAP)) {
|
|
129
|
+
return {
|
|
130
|
+
result: lastResult ?? { kind: "error", statusCode: 502, body: "Iteration cap exceeded", headers: {}, sentHeaders: {}, sentBody: "" },
|
|
131
|
+
attempts: allAttempts,
|
|
132
|
+
excludedTargets,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
126
135
|
const available = targets().filter(t => !excludedTargets.some(e => e.backend_model === t.backend_model && e.provider_id === t.provider_id));
|
|
127
136
|
if (available.length === 0) {
|
|
128
137
|
return {
|
|
@@ -1,13 +1,8 @@
|
|
|
1
|
-
import { isTargetsRule } from "./targets-rule.js";
|
|
1
|
+
import { isTargetsRule, filterExcluded } from "./targets-rule.js";
|
|
2
2
|
export class FailoverStrategy {
|
|
3
3
|
select(rule, context) {
|
|
4
4
|
if (!isTargetsRule(rule))
|
|
5
5
|
return undefined;
|
|
6
|
-
|
|
7
|
-
const excluded = context.excludeTargets?.some((e) => e.backend_model === t.backend_model && e.provider_id === t.provider_id);
|
|
8
|
-
if (!excluded)
|
|
9
|
-
return t;
|
|
10
|
-
}
|
|
11
|
-
return undefined;
|
|
6
|
+
return filterExcluded(rule.targets, context.excludeTargets)[0];
|
|
12
7
|
}
|
|
13
8
|
}
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { isTargetsRule } from "./targets-rule.js";
|
|
1
|
+
import { isTargetsRule, filterExcluded } from "./targets-rule.js";
|
|
2
2
|
export class RandomStrategy {
|
|
3
3
|
select(rule, context) {
|
|
4
4
|
if (!isTargetsRule(rule))
|
|
5
5
|
return undefined;
|
|
6
|
-
const filtered = rule.targets
|
|
6
|
+
const filtered = filterExcluded(rule.targets, context.excludeTargets);
|
|
7
7
|
if (filtered.length === 0)
|
|
8
8
|
return undefined;
|
|
9
9
|
return filtered[Math.floor(Math.random() * filtered.length)];
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import { isTargetsRule } from "./targets-rule.js";
|
|
1
|
+
import { isTargetsRule, filterExcluded } from "./targets-rule.js";
|
|
2
2
|
export class RoundRobinStrategy {
|
|
3
3
|
indexMap = new Map();
|
|
4
4
|
select(rule, context, clientModel) {
|
|
5
5
|
if (!isTargetsRule(rule))
|
|
6
6
|
return undefined;
|
|
7
7
|
const key = clientModel ?? JSON.stringify(rule);
|
|
8
|
-
const filtered = rule.targets
|
|
8
|
+
const filtered = filterExcluded(rule.targets, context.excludeTargets);
|
|
9
9
|
if (filtered.length === 0)
|
|
10
10
|
return undefined;
|
|
11
11
|
const lastIndex = this.indexMap.get(key) ?? -1;
|
|
@@ -1,11 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
return (typeof value === "object" &&
|
|
3
|
-
value !== null &&
|
|
4
|
-
"backend_model" in value &&
|
|
5
|
-
typeof value.backend_model === "string" &&
|
|
6
|
-
"provider_id" in value &&
|
|
7
|
-
typeof value.provider_id === "string");
|
|
8
|
-
}
|
|
1
|
+
import { isTarget } from "./targets-rule.js";
|
|
9
2
|
function isTimeWindow(value) {
|
|
10
3
|
return (typeof value === "object" &&
|
|
11
4
|
value !== null &&
|
|
@@ -4,4 +4,5 @@ interface TargetsRule {
|
|
|
4
4
|
}
|
|
5
5
|
export declare function isTarget(value: unknown): value is Target;
|
|
6
6
|
export declare function isTargetsRule(value: unknown): value is TargetsRule;
|
|
7
|
+
export declare function filterExcluded(targets: Target[], excludeTargets?: Target[]): Target[];
|
|
7
8
|
export {};
|
|
@@ -12,3 +12,8 @@ export function isTargetsRule(value) {
|
|
|
12
12
|
const r = value;
|
|
13
13
|
return Array.isArray(r.targets) && r.targets.every(isTarget);
|
|
14
14
|
}
|
|
15
|
+
export function filterExcluded(targets, excludeTargets) {
|
|
16
|
+
if (!excludeTargets || excludeTargets.length === 0)
|
|
17
|
+
return targets;
|
|
18
|
+
return targets.filter((t) => !excludeTargets.some((e) => e.backend_model === t.backend_model && e.provider_id === t.provider_id));
|
|
19
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { FastifyReply, FastifyRequest } from "fastify";
|
|
2
|
+
import { getProviderById } from "../db/index.js";
|
|
3
|
+
import type { RawHeaders, TransportResult } from "./types.js";
|
|
4
|
+
import type { Target } from "./strategy/types.js";
|
|
5
|
+
import type { RequestTracker } from "../monitor/request-tracker.js";
|
|
6
|
+
import type { RetryRuleMatcher } from "./retry-rules.js";
|
|
7
|
+
export interface TransportFnParams {
|
|
8
|
+
provider: NonNullable<ReturnType<typeof getProviderById>>;
|
|
9
|
+
apiKey: string;
|
|
10
|
+
body: Record<string, unknown>;
|
|
11
|
+
cliHdrs: RawHeaders;
|
|
12
|
+
reply: FastifyReply;
|
|
13
|
+
upstreamPath: string;
|
|
14
|
+
apiType: "openai" | "anthropic";
|
|
15
|
+
isStream: boolean;
|
|
16
|
+
startTime: number;
|
|
17
|
+
logId: string;
|
|
18
|
+
effectiveModel: string;
|
|
19
|
+
originalModel: string | null;
|
|
20
|
+
streamTimeoutMs: number;
|
|
21
|
+
tracker?: RequestTracker;
|
|
22
|
+
matcher?: RetryRuleMatcher;
|
|
23
|
+
request: FastifyRequest;
|
|
24
|
+
}
|
|
25
|
+
export declare function buildTransportFn(p: TransportFnParams): (target: Target) => Promise<TransportResult>;
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { callNonStream, callStream } from "./transport.js";
|
|
2
|
+
import { SSEMetricsTransform } from "../metrics/sse-metrics-transform.js";
|
|
3
|
+
import { MetricsExtractor } from "../metrics/metrics-extractor.js";
|
|
4
|
+
import { buildUpstreamHeaders } from "./proxy-core.js";
|
|
5
|
+
import { UPSTREAM_SUCCESS } from "./types.js";
|
|
6
|
+
import { buildModelInfoTag } from "./enhancement/enhancement-handler.js";
|
|
7
|
+
import { DEFAULT_MAX_RAW as STREAM_CONTENT_MAX_RAW, DEFAULT_MAX_TEXT as STREAM_CONTENT_MAX_TEXT } from "../monitor/stream-content-accumulator.js";
|
|
8
|
+
function toStreamMetrics(m) {
|
|
9
|
+
return {
|
|
10
|
+
inputTokens: m.input_tokens,
|
|
11
|
+
outputTokens: m.output_tokens,
|
|
12
|
+
cacheReadTokens: m.cache_read_tokens,
|
|
13
|
+
ttftMs: m.ttft_ms,
|
|
14
|
+
tokensPerSecond: m.tokens_per_second,
|
|
15
|
+
stopReason: m.stop_reason,
|
|
16
|
+
isComplete: m.is_complete === 1,
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
export function buildTransportFn(p) {
|
|
20
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
21
|
+
return async (_target) => {
|
|
22
|
+
if (p.isStream) {
|
|
23
|
+
const metricsTransform = new SSEMetricsTransform(p.apiType, p.startTime, {
|
|
24
|
+
onMetrics: (m) => { p.tracker?.update(p.logId, { streamMetrics: toStreamMetrics(m) }); },
|
|
25
|
+
onChunk: (rawLine) => { p.tracker?.appendStreamChunk(p.logId, rawLine, p.apiType, STREAM_CONTENT_MAX_RAW, STREAM_CONTENT_MAX_TEXT); },
|
|
26
|
+
});
|
|
27
|
+
const checkEarlyError = p.matcher ? (data) => p.matcher.test(UPSTREAM_SUCCESS, data) : undefined;
|
|
28
|
+
const streamResult = await callStream(p.provider, p.apiKey, p.body, p.cliHdrs, p.reply, p.streamTimeoutMs, p.upstreamPath, buildUpstreamHeaders, metricsTransform, checkEarlyError);
|
|
29
|
+
const m = (streamResult.kind === "stream_success" || streamResult.kind === "stream_abort")
|
|
30
|
+
? streamResult.metrics : undefined;
|
|
31
|
+
if (m)
|
|
32
|
+
p.tracker?.update(p.logId, { streamMetrics: toStreamMetrics(m) });
|
|
33
|
+
return streamResult;
|
|
34
|
+
}
|
|
35
|
+
const result = await callNonStream(p.provider, p.apiKey, p.body, p.cliHdrs, p.upstreamPath, buildUpstreamHeaders);
|
|
36
|
+
if (result.kind === "success") {
|
|
37
|
+
const mr = MetricsExtractor.fromNonStreamResponse(p.apiType, result.body);
|
|
38
|
+
if (mr)
|
|
39
|
+
p.tracker?.update(p.logId, { streamMetrics: toStreamMetrics(mr) });
|
|
40
|
+
}
|
|
41
|
+
if (p.originalModel && result.kind === "success" && result.statusCode === UPSTREAM_SUCCESS) {
|
|
42
|
+
try {
|
|
43
|
+
const bodyObj = JSON.parse(result.body);
|
|
44
|
+
if (bodyObj.content?.[0]?.text) {
|
|
45
|
+
bodyObj.content[0].text += `\n\n${buildModelInfoTag(p.effectiveModel)}`;
|
|
46
|
+
return { ...result, body: JSON.stringify(bodyObj) };
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
catch {
|
|
50
|
+
p.request.log.debug("Failed to inject model-info tag into non-JSON response");
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return result;
|
|
54
|
+
};
|
|
55
|
+
}
|
|
@@ -1,7 +1,4 @@
|
|
|
1
|
-
import type { FastifyReply } from "fastify";
|
|
2
1
|
import type { RawHeaders, TransportResult } from "./types.js";
|
|
3
|
-
import type { MetricsResult } from "../metrics/metrics-extractor.js";
|
|
4
|
-
import type { SSEMetricsTransform } from "../metrics/sse-metrics-transform.js";
|
|
5
2
|
export { callStream } from "./stream-proxy.js";
|
|
6
3
|
export interface UpstreamRequestOptions {
|
|
7
4
|
hostname: string;
|
|
@@ -13,7 +10,6 @@ export interface UpstreamRequestOptions {
|
|
|
13
10
|
export declare const _transportInternals: {
|
|
14
11
|
createUpstreamRequest(url: URL, options: UpstreamRequestOptions): import("http").ClientRequest;
|
|
15
12
|
};
|
|
16
|
-
export declare function createUpstreamRequest(url: URL, options: UpstreamRequestOptions): import("http").ClientRequest;
|
|
17
13
|
export declare function buildRequestOptions(url: URL, headers: Record<string, string>, method?: string): UpstreamRequestOptions;
|
|
18
14
|
export type BuildHeadersFn = (cliHdrs: RawHeaders, key: string, bytes?: number) => Record<string, string>;
|
|
19
15
|
export declare function callNonStream(backend: {
|
|
@@ -27,24 +23,3 @@ export interface GetTransportResult {
|
|
|
27
23
|
export declare function callGet(backend: {
|
|
28
24
|
base_url: string;
|
|
29
25
|
}, apiKey: string, clientHeaders: RawHeaders, upstreamPath: string, buildHeaders: (cliHdrs: RawHeaders, key: string) => Record<string, string>): Promise<GetTransportResult>;
|
|
30
|
-
export interface ProxyResult {
|
|
31
|
-
statusCode: number;
|
|
32
|
-
body: string;
|
|
33
|
-
headers: Record<string, string>;
|
|
34
|
-
sentHeaders: Record<string, string>;
|
|
35
|
-
sentBody: string;
|
|
36
|
-
}
|
|
37
|
-
export interface StreamProxyResult {
|
|
38
|
-
statusCode: number;
|
|
39
|
-
responseBody?: string;
|
|
40
|
-
upstreamResponseHeaders?: Record<string, string>;
|
|
41
|
-
sentHeaders?: Record<string, string>;
|
|
42
|
-
metricsResult?: MetricsResult;
|
|
43
|
-
abnormalClose?: boolean;
|
|
44
|
-
}
|
|
45
|
-
export declare function proxyNonStreamCompat(backend: {
|
|
46
|
-
base_url: string;
|
|
47
|
-
}, apiKey: string, body: Record<string, unknown>, clientHeaders: RawHeaders, upstreamPath: string, buildHeaders: BuildHeadersFn): Promise<ProxyResult>;
|
|
48
|
-
export declare function proxyStreamCompat(backend: {
|
|
49
|
-
base_url: string;
|
|
50
|
-
}, apiKey: string, body: Record<string, unknown>, clientHeaders: RawHeaders, reply: FastifyReply, timeoutMs: number, upstreamPath: string, buildHeaders: BuildHeadersFn, metricsTransform?: SSEMetricsTransform, checkEarlyError?: (bufferedData: string) => boolean): Promise<StreamProxyResult>;
|