llm-simple-router 0.5.4 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/admin/metrics.js +4 -4
- package/dist/admin/stats.js +5 -3
- package/dist/admin/usage.js +40 -19
- package/dist/db/metrics.d.ts +2 -0
- package/dist/db/metrics.js +4 -7
- package/dist/db/migrations/026_metrics_independent.sql +54 -0
- package/dist/db/stats.d.ts +3 -2
- package/dist/db/stats.js +15 -6
- package/dist/db/usage-windows.d.ts +5 -4
- package/dist/db/usage-windows.js +48 -20
- package/dist/middleware/auth.js +12 -2
- package/dist/monitor/request-tracker.d.ts +2 -1
- package/dist/monitor/request-tracker.js +3 -0
- package/dist/monitor/stats-aggregator.js +8 -7
- package/dist/proxy/orchestrator.js +6 -1
- package/dist/proxy/proxy-handler.js +11 -2
- package/dist/proxy/proxy-logging.d.ts +1 -1
- package/dist/proxy/proxy-logging.js +5 -2
- package/dist/proxy/resilience.d.ts +0 -2
- package/dist/proxy/resilience.js +2 -3
- package/dist/proxy/scope.d.ts +2 -2
- package/dist/proxy/scope.js +4 -2
- package/dist/proxy/usage-window-tracker.d.ts +5 -3
- package/dist/proxy/usage-window-tracker.js +21 -22
- package/dist/utils/time-range.d.ts +1 -1
- package/dist/utils/time-range.js +13 -7
- package/frontend-dist/assets/{CardContent-GNY_j_L3.js → CardContent-ByybpNZM.js} +1 -1
- package/frontend-dist/assets/{CardTitle-BhXJbSoh.js → CardTitle-Cv39_iQu.js} +1 -1
- package/frontend-dist/assets/{Checkbox-n_sh6Lvx.js → Checkbox-F8_Gy_s5.js} +1 -1
- package/frontend-dist/assets/{CollapsibleTrigger-DDCUOXDR.js → CollapsibleTrigger-BPzLViBo.js} +1 -1
- package/frontend-dist/assets/{Collection-DbtqQ1jF.js → Collection-Dafpcl-w.js} +1 -1
- package/frontend-dist/assets/Dashboard-BTwf4ZtI.js +3 -0
- package/frontend-dist/assets/{DialogTitle-BEWUnuJQ.js → DialogTitle-BMNhmnin.js} +1 -1
- package/frontend-dist/assets/{Input-CmibY9Fx.js → Input-BkzqSK7i.js} +1 -1
- package/frontend-dist/assets/{Label-Cs__wFH0.js → Label-DwqBcp6d.js} +1 -1
- package/frontend-dist/assets/{Login-BciEc1TW.js → Login-B7sSST00.js} +1 -1
- package/frontend-dist/assets/Logs-DJc0hZ8C.js +1 -0
- package/frontend-dist/assets/ModelMappings-BavaEbnL.js +1 -0
- package/frontend-dist/assets/Monitor-B4hCGdS-.js +1 -0
- package/frontend-dist/assets/{PopoverTrigger-DaKOMSVs.js → PopoverTrigger-vgVugQwU.js} +1 -1
- package/frontend-dist/assets/{PopperContent-DZ6plcjf.js → PopperContent-tf2A4fsa.js} +1 -1
- package/frontend-dist/assets/{Providers-u8utX74M.js → Providers-BmrsbthR.js} +1 -1
- package/frontend-dist/assets/{ProxyEnhancement-8_xhndGt.js → ProxyEnhancement-BQ02PeEF.js} +1 -1
- package/frontend-dist/assets/{RetryRules-D1psYDEP.js → RetryRules-H460Dyek.js} +1 -1
- package/frontend-dist/assets/{RouterKeys-ovPFGhjy.js → RouterKeys-D8rXsmpq.js} +1 -1
- package/frontend-dist/assets/{RovingFocusItem-Dsv9AkP7.js → RovingFocusItem-Bo0dNNmj.js} +1 -1
- package/frontend-dist/assets/{SelectValue-BoUWfZAg.js → SelectValue-BU16UnrX.js} +1 -1
- package/frontend-dist/assets/{Settings-DXF-6A8C.js → Settings-YiR7zqua.js} +1 -1
- package/frontend-dist/assets/{Setup-rVLqiz0d.js → Setup-D7EXZ1Nv.js} +1 -1
- package/frontend-dist/assets/{Switch-po5ZVBE3.js → Switch-CA_wdlEs.js} +1 -1
- package/frontend-dist/assets/{TableHeader-Zyvq_0p2.js → TableHeader-BuObvzlS.js} +1 -1
- package/frontend-dist/assets/{TabsTrigger-CgDhZGkT.js → TabsTrigger-DZIFRVA_.js} +1 -1
- package/frontend-dist/assets/{Teleport-CgTHarey.js → Teleport-FxAUQAZT.js} +1 -1
- package/frontend-dist/assets/{TooltipTrigger-C2qO21dQ.js → TooltipTrigger-D9nCGsBG.js} +1 -1
- package/frontend-dist/assets/{UnifiedRequestDialog-Dksad8eN.js → UnifiedRequestDialog-BPv5B17F.js} +1 -1
- package/frontend-dist/assets/{VisuallyHidden-fbPmoMwi.js → VisuallyHidden-clBSgYdG.js} +1 -1
- package/frontend-dist/assets/{VisuallyHiddenInput-7j8wkPrW.js → VisuallyHiddenInput-CmDbYWUO.js} +1 -1
- package/frontend-dist/assets/{alert-dialog-DbT3PzoF.js → alert-dialog-BvbdNhnK.js} +1 -1
- package/frontend-dist/assets/{badge-BVxnlnsH.js → badge-CcCt1-ig.js} +1 -1
- package/frontend-dist/assets/{button-BCrIpNwA.js → button-DeOsxcjG.js} +2 -2
- package/frontend-dist/assets/chevron-down-D_DCDFPY.js +1 -0
- package/frontend-dist/assets/{dialog-BNlCZpHK.js → dialog-CPO2KcC1.js} +1 -1
- package/frontend-dist/assets/{file-text-BavS6SrF.js → file-text-C-6LFEhP.js} +1 -1
- package/frontend-dist/assets/index-DHONWydQ.css +1 -0
- package/frontend-dist/assets/{index-DrBJPq6d.js → index-DW58MMV6.js} +1 -1
- package/frontend-dist/assets/{lib-CGpNhf06.js → lib-DkM_rWnj.js} +1 -1
- package/frontend-dist/assets/loader-circle-BS4uI1Z4.js +1 -0
- package/frontend-dist/assets/{ohash.D__AXeF1-DkJnWU8a.js → ohash.D__AXeF1-CBYQgVou.js} +1 -1
- package/frontend-dist/assets/{useClipboard-Bq8yZunx.js → useClipboard-NBCgpr6Z.js} +1 -1
- package/frontend-dist/assets/{useLogRetention-BWPm3G_A.js → useLogRetention-B_u8u74J.js} +1 -1
- package/frontend-dist/assets/useNonce-D1dqoOZO.js +1 -0
- package/frontend-dist/assets/x-DVLhwc3Q.js +1 -0
- package/frontend-dist/index.html +19 -19
- package/package.json +2 -2
- package/frontend-dist/assets/Dashboard-Dy9frcgO.js +0 -3
- package/frontend-dist/assets/Logs-BkqwWW0-.js +0 -1
- package/frontend-dist/assets/ModelMappings-DrCJ_TCf.js +0 -1
- package/frontend-dist/assets/Monitor-C-b4qyuI.js +0 -1
- package/frontend-dist/assets/chevron-down-CWBwGxSp.js +0 -1
- package/frontend-dist/assets/circle-question-mark-DRkkqjgG.js +0 -1
- package/frontend-dist/assets/index-BP4imfye.css +0 -1
- package/frontend-dist/assets/loader-circle-Cpd89XQ7.js +0 -1
- package/frontend-dist/assets/useNonce-D5lpSPNk.js +0 -1
- package/frontend-dist/assets/x-BFIp7DLt.js +0 -1
package/dist/admin/metrics.js
CHANGED
|
@@ -33,13 +33,13 @@ const TimeseriesQuerySchema = Type.Object({
|
|
|
33
33
|
end_time: Type.Optional(Type.String()),
|
|
34
34
|
});
|
|
35
35
|
const DASHBOARD_PERIODS = new Set(["window", "weekly", "monthly"]);
|
|
36
|
-
function resolveMetricsTime(query, db, routerKeyId) {
|
|
36
|
+
function resolveMetricsTime(query, db, routerKeyId, providerId) {
|
|
37
37
|
if (query.start_time && query.end_time) {
|
|
38
38
|
return { startTime: query.start_time, endTime: query.end_time, legacyPeriod: "30d" };
|
|
39
39
|
}
|
|
40
40
|
const period = query.period ?? "weekly";
|
|
41
41
|
if (DASHBOARD_PERIODS.has(period)) {
|
|
42
|
-
const range = resolveTimeRange(period, db, routerKeyId);
|
|
42
|
+
const range = resolveTimeRange(period, db, routerKeyId, providerId);
|
|
43
43
|
return { startTime: range.startTime, endTime: range.endTime, legacyPeriod: "5h" };
|
|
44
44
|
}
|
|
45
45
|
return { legacyPeriod: period };
|
|
@@ -48,14 +48,14 @@ export const adminMetricsRoutes = (app, options, done) => {
|
|
|
48
48
|
const { db } = options;
|
|
49
49
|
app.get("/admin/api/metrics/summary", { schema: { querystring: SummaryQuerySchema } }, async (request, reply) => {
|
|
50
50
|
const query = request.query;
|
|
51
|
-
const { startTime, endTime, legacyPeriod } = resolveMetricsTime(query, db, query.router_key_id);
|
|
51
|
+
const { startTime, endTime, legacyPeriod } = resolveMetricsTime(query, db, query.router_key_id, query.provider_id);
|
|
52
52
|
const summary = getMetricsSummary(db, legacyPeriod, query.provider_id, query.backend_model, query.router_key_id, startTime, endTime);
|
|
53
53
|
return reply.send(summary);
|
|
54
54
|
});
|
|
55
55
|
app.get("/admin/api/metrics/timeseries", { schema: { querystring: TimeseriesQuerySchema } }, async (request, reply) => {
|
|
56
56
|
const query = request.query;
|
|
57
57
|
const metric = query.metric;
|
|
58
|
-
const { startTime, endTime, legacyPeriod } = resolveMetricsTime(query, db, query.router_key_id);
|
|
58
|
+
const { startTime, endTime, legacyPeriod } = resolveMetricsTime(query, db, query.router_key_id, query.provider_id);
|
|
59
59
|
const timeseries = getMetricsTimeseries(db, legacyPeriod, metric, query.provider_id, query.backend_model, query.router_key_id, startTime, endTime);
|
|
60
60
|
return reply.send(timeseries);
|
|
61
61
|
});
|
package/dist/admin/stats.js
CHANGED
|
@@ -10,6 +10,8 @@ const StatsQuerySchema = Type.Object({
|
|
|
10
10
|
start_time: Type.Optional(Type.String()),
|
|
11
11
|
end_time: Type.Optional(Type.String()),
|
|
12
12
|
router_key_id: Type.Optional(Type.String()),
|
|
13
|
+
provider_id: Type.Optional(Type.String()),
|
|
14
|
+
backend_model: Type.Optional(Type.String()),
|
|
13
15
|
});
|
|
14
16
|
export const adminStatsRoutes = (app, options, done) => {
|
|
15
17
|
app.get("/admin/api/stats", { schema: { querystring: StatsQuerySchema } }, async (request, reply) => {
|
|
@@ -21,12 +23,12 @@ export const adminStatsRoutes = (app, options, done) => {
|
|
|
21
23
|
endTime = query.end_time;
|
|
22
24
|
}
|
|
23
25
|
else {
|
|
24
|
-
const range = resolveTimeRange((query.period ?? "weekly"), options.db, query.router_key_id);
|
|
26
|
+
const range = resolveTimeRange((query.period ?? "weekly"), options.db, query.router_key_id, query.provider_id);
|
|
25
27
|
startTime = range.startTime;
|
|
26
28
|
endTime = range.endTime;
|
|
27
29
|
}
|
|
28
|
-
const stats = getStats(options.db, startTime, endTime, query.router_key_id);
|
|
29
|
-
return reply.send(stats);
|
|
30
|
+
const stats = getStats(options.db, startTime, endTime, query.router_key_id, query.provider_id, query.backend_model);
|
|
31
|
+
return reply.send({ ...stats, startTime, endTime });
|
|
30
32
|
});
|
|
31
33
|
done();
|
|
32
34
|
};
|
package/dist/admin/usage.js
CHANGED
|
@@ -1,16 +1,26 @@
|
|
|
1
1
|
import { Type } from "@sinclair/typebox";
|
|
2
2
|
import { getWindowsInRange, getWindowUsage } from "../db/usage-windows.js";
|
|
3
|
+
import { getProviderById } from "../db/providers.js";
|
|
3
4
|
import { resolveTimeRange } from "../utils/time-range.js";
|
|
4
5
|
const UsageQuerySchema = Type.Object({
|
|
5
6
|
router_key_id: Type.Optional(Type.String()),
|
|
7
|
+
provider_id: Type.Optional(Type.String()),
|
|
6
8
|
});
|
|
7
|
-
function getDailyUsage(db, startTime, endTime, routerKeyId) {
|
|
8
|
-
const
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
9
|
+
function getDailyUsage(db, startTime, endTime, routerKeyId, providerId) {
|
|
10
|
+
const conditions = [
|
|
11
|
+
"rm.is_complete = 1",
|
|
12
|
+
"rm.created_at >= datetime(?)",
|
|
13
|
+
"rm.created_at < datetime(?)",
|
|
14
|
+
];
|
|
15
|
+
const params = [startTime, endTime];
|
|
16
|
+
if (routerKeyId) {
|
|
17
|
+
conditions.push("rm.router_key_id = ?");
|
|
18
|
+
params.push(routerKeyId);
|
|
19
|
+
}
|
|
20
|
+
if (providerId) {
|
|
21
|
+
conditions.push("rm.provider_id = ?");
|
|
22
|
+
params.push(providerId);
|
|
23
|
+
}
|
|
14
24
|
return db.prepare(`
|
|
15
25
|
SELECT
|
|
16
26
|
date(rm.created_at) AS date,
|
|
@@ -18,37 +28,48 @@ function getDailyUsage(db, startTime, endTime, routerKeyId) {
|
|
|
18
28
|
COALESCE(SUM(rm.input_tokens), 0) AS total_input_tokens,
|
|
19
29
|
COALESCE(SUM(rm.output_tokens), 0) AS total_output_tokens
|
|
20
30
|
FROM request_metrics rm
|
|
21
|
-
|
|
22
|
-
WHERE rm.is_complete = 1
|
|
23
|
-
AND rm.created_at >= datetime(?)
|
|
24
|
-
AND rm.created_at < datetime(?)
|
|
25
|
-
${routerKeyFilter}
|
|
31
|
+
WHERE ${conditions.join(" AND ")}
|
|
26
32
|
GROUP BY date(rm.created_at)
|
|
27
33
|
ORDER BY date ASC
|
|
28
34
|
`).all(...params);
|
|
29
35
|
}
|
|
36
|
+
function resolveProviderName(db, providerId) {
|
|
37
|
+
if (!providerId)
|
|
38
|
+
return null;
|
|
39
|
+
return getProviderById(db, providerId)?.name ?? null;
|
|
40
|
+
}
|
|
30
41
|
export const adminUsageRoutes = (app, options, done) => {
|
|
31
42
|
const { db } = options;
|
|
32
43
|
app.get("/admin/api/usage/windows", { schema: { querystring: UsageQuerySchema } }, async (request) => {
|
|
33
44
|
const query = request.query;
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
45
|
+
if (query.provider_id) {
|
|
46
|
+
const range = resolveTimeRange("window", db, query.router_key_id, query.provider_id);
|
|
47
|
+
const windows = getWindowsInRange(db, range.startTime, range.endTime, query.router_key_id, query.provider_id);
|
|
48
|
+
if (windows.length === 0)
|
|
49
|
+
return [];
|
|
50
|
+
return windows.map(w => ({
|
|
51
|
+
window: { ...w, provider_name: resolveProviderName(db, w.provider_id) },
|
|
52
|
+
usage: getWindowUsage(db, w.start_time, w.end_time, query.router_key_id, query.provider_id),
|
|
53
|
+
}));
|
|
54
|
+
}
|
|
55
|
+
const allWindows = getWindowsInRange(db, "1970-01-01", "2099-12-31", query.router_key_id)
|
|
56
|
+
.filter((w) => w.provider_id !== null);
|
|
57
|
+
if (allWindows.length === 0)
|
|
37
58
|
return [];
|
|
38
|
-
return
|
|
39
|
-
window: w,
|
|
59
|
+
return allWindows.map(w => ({
|
|
60
|
+
window: { ...w, provider_name: resolveProviderName(db, w.provider_id) },
|
|
40
61
|
usage: getWindowUsage(db, w.start_time, w.end_time, query.router_key_id),
|
|
41
62
|
}));
|
|
42
63
|
});
|
|
43
64
|
app.get("/admin/api/usage/weekly", { schema: { querystring: UsageQuerySchema } }, async (request) => {
|
|
44
65
|
const query = request.query;
|
|
45
66
|
const range = resolveTimeRange("weekly", db, query.router_key_id);
|
|
46
|
-
return getDailyUsage(db, range.startTime, range.endTime, query.router_key_id);
|
|
67
|
+
return getDailyUsage(db, range.startTime, range.endTime, query.router_key_id, query.provider_id);
|
|
47
68
|
});
|
|
48
69
|
app.get("/admin/api/usage/monthly", { schema: { querystring: UsageQuerySchema } }, async (request) => {
|
|
49
70
|
const query = request.query;
|
|
50
71
|
const range = resolveTimeRange("monthly", db, query.router_key_id);
|
|
51
|
-
return getDailyUsage(db, range.startTime, range.endTime, query.router_key_id);
|
|
72
|
+
return getDailyUsage(db, range.startTime, range.endTime, query.router_key_id, query.provider_id);
|
|
52
73
|
});
|
|
53
74
|
done();
|
|
54
75
|
};
|
package/dist/db/metrics.d.ts
CHANGED
|
@@ -23,6 +23,8 @@ export type MetricsInsert = {
|
|
|
23
23
|
provider_id: string;
|
|
24
24
|
backend_model: string;
|
|
25
25
|
api_type: string;
|
|
26
|
+
router_key_id?: string | null;
|
|
27
|
+
status_code?: number | null;
|
|
26
28
|
input_tokens?: number | null;
|
|
27
29
|
output_tokens?: number | null;
|
|
28
30
|
cache_creation_tokens?: number | null;
|
package/dist/db/metrics.js
CHANGED
|
@@ -2,8 +2,8 @@ import { randomUUID } from "crypto";
|
|
|
2
2
|
import { MS_PER_SECOND } from "../constants.js";
|
|
3
3
|
export function insertMetrics(db, m) {
|
|
4
4
|
const id = randomUUID();
|
|
5
|
-
db.prepare(`INSERT INTO request_metrics (id, request_log_id, provider_id, backend_model, api_type, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, ttft_ms, total_duration_ms, tokens_per_second, stop_reason, is_complete)
|
|
6
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(id, m.request_log_id, m.provider_id, m.backend_model, m.api_type, m.input_tokens ?? null, m.output_tokens ?? null, m.cache_creation_tokens ?? null, m.cache_read_tokens ?? null, m.ttft_ms ?? null, m.total_duration_ms ?? null, m.tokens_per_second ?? null, m.stop_reason ?? null, m.is_complete ?? 1);
|
|
5
|
+
db.prepare(`INSERT INTO request_metrics (id, request_log_id, provider_id, backend_model, api_type, router_key_id, status_code, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, ttft_ms, total_duration_ms, tokens_per_second, stop_reason, is_complete)
|
|
6
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(id, m.request_log_id, m.provider_id, m.backend_model, m.api_type, m.router_key_id ?? null, m.status_code ?? null, m.input_tokens ?? null, m.output_tokens ?? null, m.cache_creation_tokens ?? null, m.cache_read_tokens ?? null, m.ttft_ms ?? null, m.total_duration_ms ?? null, m.tokens_per_second ?? null, m.stop_reason ?? null, m.is_complete ?? 1);
|
|
7
7
|
return id;
|
|
8
8
|
}
|
|
9
9
|
const PERIOD_OFFSET = {
|
|
@@ -64,8 +64,7 @@ export function getMetricsSummary(db, period, providerId, backendModel, routerKe
|
|
|
64
64
|
params.push(backendModel);
|
|
65
65
|
}
|
|
66
66
|
if (routerKeyId) {
|
|
67
|
-
|
|
68
|
-
conditions.push("rl.router_key_id = ?");
|
|
67
|
+
conditions.push("rm.router_key_id = ?");
|
|
69
68
|
params.push(routerKeyId);
|
|
70
69
|
}
|
|
71
70
|
return db.prepare(`
|
|
@@ -108,19 +107,17 @@ export function getMetricsTimeseries(db, period, metric, providerId, backendMode
|
|
|
108
107
|
params.push(backendModel);
|
|
109
108
|
}
|
|
110
109
|
if (routerKeyId) {
|
|
111
|
-
conditions.push("
|
|
110
|
+
conditions.push("rm.router_key_id = ?");
|
|
112
111
|
params.push(routerKeyId);
|
|
113
112
|
}
|
|
114
113
|
const where = conditions.join(" AND ");
|
|
115
114
|
const expr = METRIC_EXPR[metric];
|
|
116
|
-
const joinClause = routerKeyId ? "LEFT JOIN request_logs rl ON rl.id = rm.request_log_id" : "";
|
|
117
115
|
const rows = db.prepare(`
|
|
118
116
|
SELECT
|
|
119
117
|
(unixepoch(rm.created_at) / ?) * ? AS bucket_key,
|
|
120
118
|
${expr} AS avg_value,
|
|
121
119
|
COUNT(*) AS count
|
|
122
120
|
FROM request_metrics rm
|
|
123
|
-
${joinClause}
|
|
124
121
|
WHERE ${where}
|
|
125
122
|
GROUP BY bucket_key
|
|
126
123
|
ORDER BY bucket_key ASC
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
-- Metrics 独立化:request_metrics 增加路由维度列,解除级联删除依赖
|
|
2
|
+
-- usage_windows 增加 provider_id 支持按 provider 维度追踪使用量
|
|
3
|
+
|
|
4
|
+
-- 1. 重建 request_metrics:CASCADE -> SET NULL,同时新增 router_key_id / status_code
|
|
5
|
+
CREATE TABLE request_metrics_new (
|
|
6
|
+
id TEXT PRIMARY KEY,
|
|
7
|
+
request_log_id TEXT UNIQUE REFERENCES request_logs(id) ON DELETE SET NULL,
|
|
8
|
+
provider_id TEXT NOT NULL,
|
|
9
|
+
backend_model TEXT NOT NULL,
|
|
10
|
+
api_type TEXT NOT NULL,
|
|
11
|
+
input_tokens INTEGER,
|
|
12
|
+
output_tokens INTEGER,
|
|
13
|
+
cache_creation_tokens INTEGER,
|
|
14
|
+
cache_read_tokens INTEGER,
|
|
15
|
+
ttft_ms INTEGER,
|
|
16
|
+
total_duration_ms INTEGER,
|
|
17
|
+
tokens_per_second REAL,
|
|
18
|
+
stop_reason TEXT,
|
|
19
|
+
is_complete INTEGER NOT NULL DEFAULT 1,
|
|
20
|
+
router_key_id TEXT,
|
|
21
|
+
status_code INTEGER,
|
|
22
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
23
|
+
);
|
|
24
|
+
|
|
25
|
+
INSERT INTO request_metrics_new
|
|
26
|
+
(id, request_log_id, provider_id, backend_model, api_type,
|
|
27
|
+
input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens,
|
|
28
|
+
ttft_ms, total_duration_ms, tokens_per_second, stop_reason,
|
|
29
|
+
is_complete, created_at)
|
|
30
|
+
SELECT
|
|
31
|
+
id, request_log_id, provider_id, backend_model, api_type,
|
|
32
|
+
input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens,
|
|
33
|
+
ttft_ms, total_duration_ms, tokens_per_second, stop_reason,
|
|
34
|
+
is_complete, created_at
|
|
35
|
+
FROM request_metrics;
|
|
36
|
+
|
|
37
|
+
-- 回填 router_key_id 和 status_code 从 request_logs
|
|
38
|
+
UPDATE request_metrics_new
|
|
39
|
+
SET
|
|
40
|
+
router_key_id = rl.router_key_id,
|
|
41
|
+
status_code = rl.status_code
|
|
42
|
+
FROM request_logs rl
|
|
43
|
+
WHERE rl.id = request_metrics_new.request_log_id;
|
|
44
|
+
|
|
45
|
+
DROP TABLE request_metrics;
|
|
46
|
+
ALTER TABLE request_metrics_new RENAME TO request_metrics;
|
|
47
|
+
|
|
48
|
+
-- 重建原有索引
|
|
49
|
+
CREATE INDEX idx_metrics_time_provider_model ON request_metrics(created_at, provider_id, backend_model);
|
|
50
|
+
CREATE INDEX idx_metrics_api_type_created_at ON request_metrics(api_type, created_at);
|
|
51
|
+
|
|
52
|
+
-- 2. usage_windows 增加 provider_id 列
|
|
53
|
+
ALTER TABLE usage_windows ADD COLUMN provider_id TEXT;
|
|
54
|
+
CREATE INDEX IF NOT EXISTS idx_usage_windows_provider_id ON usage_windows(provider_id);
|
package/dist/db/stats.d.ts
CHANGED
|
@@ -3,6 +3,7 @@ export interface Stats {
|
|
|
3
3
|
totalRequests: number;
|
|
4
4
|
successRate: number;
|
|
5
5
|
avgTps: number;
|
|
6
|
-
|
|
6
|
+
totalInputTokens: number;
|
|
7
|
+
totalOutputTokens: number;
|
|
7
8
|
}
|
|
8
|
-
export declare function getStats(db: Database.Database, startTime: string, endTime: string, routerKeyId?: string): Stats;
|
|
9
|
+
export declare function getStats(db: Database.Database, startTime: string, endTime: string, routerKeyId?: string, providerId?: string, backendModel?: string): Stats;
|
package/dist/db/stats.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export function getStats(db, startTime, endTime, routerKeyId) {
|
|
1
|
+
export function getStats(db, startTime, endTime, routerKeyId, providerId, backendModel) {
|
|
2
2
|
const conditions = [
|
|
3
3
|
"rm.is_complete = 1",
|
|
4
4
|
"rm.created_at >= datetime(?)",
|
|
@@ -6,18 +6,26 @@ export function getStats(db, startTime, endTime, routerKeyId) {
|
|
|
6
6
|
];
|
|
7
7
|
const params = [startTime, endTime];
|
|
8
8
|
if (routerKeyId) {
|
|
9
|
-
conditions.push("
|
|
9
|
+
conditions.push("rm.router_key_id = ?");
|
|
10
10
|
params.push(routerKeyId);
|
|
11
11
|
}
|
|
12
|
+
if (providerId) {
|
|
13
|
+
conditions.push("rm.provider_id = ?");
|
|
14
|
+
params.push(providerId);
|
|
15
|
+
}
|
|
16
|
+
if (backendModel) {
|
|
17
|
+
conditions.push("rm.backend_model = ?");
|
|
18
|
+
params.push(backendModel);
|
|
19
|
+
}
|
|
12
20
|
const where = conditions.join(" AND ");
|
|
13
21
|
const row = db.prepare(`
|
|
14
22
|
SELECT
|
|
15
23
|
COUNT(*) AS total_requests,
|
|
16
|
-
SUM(CASE WHEN
|
|
24
|
+
SUM(CASE WHEN rm.status_code >= 200 AND rm.status_code < 300 THEN 1 ELSE 0 END) AS success_count,
|
|
17
25
|
AVG(rm.tokens_per_second) AS avg_tps,
|
|
18
|
-
COALESCE(SUM(rm.input_tokens), 0)
|
|
26
|
+
COALESCE(SUM(rm.input_tokens), 0) AS total_input_tokens,
|
|
27
|
+
COALESCE(SUM(rm.output_tokens), 0) AS total_output_tokens
|
|
19
28
|
FROM request_metrics rm
|
|
20
|
-
JOIN request_logs rl ON rl.id = rm.request_log_id
|
|
21
29
|
WHERE ${where}
|
|
22
30
|
`).get(...params);
|
|
23
31
|
const total = row?.total_requests ?? 0;
|
|
@@ -25,6 +33,7 @@ export function getStats(db, startTime, endTime, routerKeyId) {
|
|
|
25
33
|
totalRequests: total,
|
|
26
34
|
successRate: total > 0 ? (row?.success_count ?? 0) / total : 0,
|
|
27
35
|
avgTps: row?.avg_tps ?? 0,
|
|
28
|
-
|
|
36
|
+
totalInputTokens: row?.total_input_tokens ?? 0,
|
|
37
|
+
totalOutputTokens: row?.total_output_tokens ?? 0,
|
|
29
38
|
};
|
|
30
39
|
}
|
|
@@ -2,6 +2,7 @@ import Database from "better-sqlite3";
|
|
|
2
2
|
export interface UsageWindow {
|
|
3
3
|
id: string;
|
|
4
4
|
router_key_id: string | null;
|
|
5
|
+
provider_id: string | null;
|
|
5
6
|
start_time: string;
|
|
6
7
|
end_time: string;
|
|
7
8
|
created_at: string;
|
|
@@ -12,8 +13,8 @@ export interface WindowUsage {
|
|
|
12
13
|
total_output_tokens: number;
|
|
13
14
|
}
|
|
14
15
|
export declare function insertWindow(db: Database.Database, w: Omit<UsageWindow, "created_at">): string;
|
|
15
|
-
export declare function getLatestWindow(db: Database.Database, routerKeyId?: string): UsageWindow | null;
|
|
16
|
-
/** 返回与 [start, end)
|
|
17
|
-
export declare function getWindowsInRange(db: Database.Database, start: string, end: string, routerKeyId?: string): UsageWindow[];
|
|
16
|
+
export declare function getLatestWindow(db: Database.Database, routerKeyId?: string, providerId?: string): UsageWindow | null;
|
|
17
|
+
/** 返回与 [start, end) 区间有重叠的窗口。可选参数不传表示不过滤该维度(与 getLatestWindow 的 IS NULL 语义不同) */
|
|
18
|
+
export declare function getWindowsInRange(db: Database.Database, start: string, end: string, routerKeyId?: string, providerId?: string): UsageWindow[];
|
|
18
19
|
/** 聚合指定时间窗口内的请求计数和 token 用量 */
|
|
19
|
-
export declare function getWindowUsage(db: Database.Database, startTime: string, endTime: string, routerKeyId?: string): WindowUsage;
|
|
20
|
+
export declare function getWindowUsage(db: Database.Database, startTime: string, endTime: string, routerKeyId?: string, providerId?: string): WindowUsage;
|
package/dist/db/usage-windows.js
CHANGED
|
@@ -1,37 +1,65 @@
|
|
|
1
1
|
import { randomUUID } from "crypto";
|
|
2
2
|
export function insertWindow(db, w) {
|
|
3
3
|
const id = w.id || randomUUID();
|
|
4
|
-
db.prepare("INSERT INTO usage_windows (id, router_key_id, start_time, end_time) VALUES (?, ?, ?, ?)").run(id, w.router_key_id ?? null, w.start_time, w.end_time);
|
|
4
|
+
db.prepare("INSERT INTO usage_windows (id, router_key_id, provider_id, start_time, end_time) VALUES (?, ?, ?, ?, ?)").run(id, w.router_key_id ?? null, w.provider_id ?? null, w.start_time, w.end_time);
|
|
5
5
|
return id;
|
|
6
6
|
}
|
|
7
|
-
export function getLatestWindow(db, routerKeyId) {
|
|
8
|
-
const
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
export function getLatestWindow(db, routerKeyId, providerId) {
|
|
8
|
+
const conditions = [];
|
|
9
|
+
const params = [];
|
|
10
|
+
if (routerKeyId) {
|
|
11
|
+
conditions.push("router_key_id = ?");
|
|
12
|
+
params.push(routerKeyId);
|
|
13
|
+
}
|
|
14
|
+
else {
|
|
15
|
+
conditions.push("router_key_id IS NULL");
|
|
16
|
+
}
|
|
17
|
+
if (providerId) {
|
|
18
|
+
conditions.push("provider_id = ?");
|
|
19
|
+
params.push(providerId);
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
conditions.push("provider_id IS NULL");
|
|
23
|
+
}
|
|
24
|
+
const sql = `SELECT * FROM usage_windows WHERE ${conditions.join(" AND ")} ORDER BY start_time DESC LIMIT 1`;
|
|
12
25
|
return db.prepare(sql).get(...params) ?? null;
|
|
13
26
|
}
|
|
14
|
-
/** 返回与 [start, end)
|
|
15
|
-
export function getWindowsInRange(db, start, end, routerKeyId) {
|
|
27
|
+
/** 返回与 [start, end) 区间有重叠的窗口。可选参数不传表示不过滤该维度(与 getLatestWindow 的 IS NULL 语义不同) */
|
|
28
|
+
export function getWindowsInRange(db, start, end, routerKeyId, providerId) {
|
|
29
|
+
const conditions = ["start_time < ?", "end_time > ?"];
|
|
30
|
+
const params = [end, start];
|
|
16
31
|
if (routerKeyId) {
|
|
17
|
-
|
|
32
|
+
conditions.push("router_key_id = ?");
|
|
33
|
+
params.push(routerKeyId);
|
|
34
|
+
}
|
|
35
|
+
if (providerId) {
|
|
36
|
+
conditions.push("provider_id = ?");
|
|
37
|
+
params.push(providerId);
|
|
18
38
|
}
|
|
19
|
-
return db.prepare(
|
|
39
|
+
return db.prepare(`SELECT * FROM usage_windows WHERE ${conditions.join(" AND ")} ORDER BY start_time ASC`).all(...params);
|
|
20
40
|
}
|
|
21
41
|
/** 聚合指定时间窗口内的请求计数和 token 用量 */
|
|
22
|
-
export function getWindowUsage(db, startTime, endTime, routerKeyId) {
|
|
23
|
-
const
|
|
42
|
+
export function getWindowUsage(db, startTime, endTime, routerKeyId, providerId) {
|
|
43
|
+
const conditions = [
|
|
44
|
+
"rm.is_complete = 1",
|
|
45
|
+
"rm.created_at >= datetime(?)",
|
|
46
|
+
"rm.created_at < datetime(?)",
|
|
47
|
+
];
|
|
48
|
+
const params = [startTime, endTime];
|
|
49
|
+
if (routerKeyId) {
|
|
50
|
+
conditions.push("rm.router_key_id = ?");
|
|
51
|
+
params.push(routerKeyId);
|
|
52
|
+
}
|
|
53
|
+
if (providerId) {
|
|
54
|
+
conditions.push("rm.provider_id = ?");
|
|
55
|
+
params.push(providerId);
|
|
56
|
+
}
|
|
57
|
+
return db.prepare(`
|
|
24
58
|
SELECT
|
|
25
59
|
COUNT(*) AS request_count,
|
|
26
60
|
COALESCE(SUM(rm.input_tokens), 0) AS total_input_tokens,
|
|
27
61
|
COALESCE(SUM(rm.output_tokens), 0) AS total_output_tokens
|
|
28
62
|
FROM request_metrics rm
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
AND rm.created_at >= datetime(?)
|
|
32
|
-
AND rm.created_at < datetime(?)`;
|
|
33
|
-
if (routerKeyId) {
|
|
34
|
-
return db.prepare(`${baseSql} AND rl.router_key_id = ?`).get(startTime, endTime, routerKeyId);
|
|
35
|
-
}
|
|
36
|
-
return db.prepare(baseSql).get(startTime, endTime);
|
|
63
|
+
WHERE ${conditions.join(" AND ")}
|
|
64
|
+
`).get(...params);
|
|
37
65
|
}
|
package/dist/middleware/auth.js
CHANGED
|
@@ -53,15 +53,25 @@ const authMiddlewareRaw = (app, options, done) => {
|
|
|
53
53
|
reply.code(HTTP_SERVICE_UNAVAILABLE).send({ error: { message: "Service not initialized" } });
|
|
54
54
|
return reply;
|
|
55
55
|
}
|
|
56
|
+
let token;
|
|
56
57
|
const authHeader = request.headers.authorization;
|
|
57
|
-
if (
|
|
58
|
+
if (authHeader && authHeader.startsWith("Bearer ")) {
|
|
59
|
+
token = authHeader.slice(BEARER_PREFIX_LENGTH);
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
// Fallback: Anthropic SDK sends API key via x-api-key header
|
|
63
|
+
const apiKeyHeader = request.headers["x-api-key"];
|
|
64
|
+
if (apiKeyHeader) {
|
|
65
|
+
token = apiKeyHeader;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
if (!token) {
|
|
58
69
|
if (proxyApiType) {
|
|
59
70
|
logRejectedAuth(options.db, proxyApiType, HTTP_UNAUTHORIZED, "Invalid API key", request);
|
|
60
71
|
}
|
|
61
72
|
unauthorizedReply(reply);
|
|
62
73
|
return reply;
|
|
63
74
|
}
|
|
64
|
-
const token = authHeader.slice(BEARER_PREFIX_LENGTH);
|
|
65
75
|
const hash = createHash("sha256").update(token).digest("hex");
|
|
66
76
|
const row = stmt.get(hash);
|
|
67
77
|
if (!row) {
|
|
@@ -2,7 +2,7 @@ import type { ServerResponse } from "node:http";
|
|
|
2
2
|
import { StatsAggregator } from "./stats-aggregator.js";
|
|
3
3
|
import { RuntimeCollector } from "./runtime-collector.js";
|
|
4
4
|
import type { ProviderSemaphoreManager } from "../proxy/semaphore.js";
|
|
5
|
-
import type { ActiveRequest, ProviderConcurrencySnapshot, RuntimeMetrics, StatsSnapshot } from "./types.js";
|
|
5
|
+
import type { ActiveRequest, AttemptSnapshot, ProviderConcurrencySnapshot, RuntimeMetrics, StatsSnapshot } from "./types.js";
|
|
6
6
|
export interface TrackerLogger {
|
|
7
7
|
debug(obj: Record<string, unknown>, msg: string): void;
|
|
8
8
|
warn(obj: Record<string, unknown>, msg: string): void;
|
|
@@ -31,6 +31,7 @@ export declare class RequestTracker {
|
|
|
31
31
|
complete(id: string, result: {
|
|
32
32
|
status: "completed" | "failed";
|
|
33
33
|
statusCode?: number;
|
|
34
|
+
attempts?: AttemptSnapshot[];
|
|
34
35
|
}): void;
|
|
35
36
|
getActive(): ActiveRequest[];
|
|
36
37
|
getRecent(limit?: number): ActiveRequest[];
|
|
@@ -73,6 +73,7 @@ export class RequestTracker {
|
|
|
73
73
|
...req,
|
|
74
74
|
status: result.status,
|
|
75
75
|
completedAt: now,
|
|
76
|
+
attempts: result.attempts ?? req.attempts,
|
|
76
77
|
};
|
|
77
78
|
this.activeMap.delete(id);
|
|
78
79
|
this.streamAccumulators.delete(id);
|
|
@@ -161,6 +162,7 @@ export class RequestTracker {
|
|
|
161
162
|
if (this.pushTimer)
|
|
162
163
|
return;
|
|
163
164
|
this.tickCount = 0;
|
|
165
|
+
this.runtimeCollector.start();
|
|
164
166
|
this.pushTimer = setInterval(() => {
|
|
165
167
|
this.tickCount++;
|
|
166
168
|
this.cleanupRecent();
|
|
@@ -179,6 +181,7 @@ export class RequestTracker {
|
|
|
179
181
|
clearInterval(this.pushTimer);
|
|
180
182
|
this.pushTimer = null;
|
|
181
183
|
}
|
|
184
|
+
this.runtimeCollector.stop();
|
|
182
185
|
}
|
|
183
186
|
broadcast(event, data) {
|
|
184
187
|
// Strip clientRequest from broadcasts to reduce bandwidth;
|
|
@@ -34,13 +34,13 @@ function emptyAccumulator() {
|
|
|
34
34
|
successCount: 0,
|
|
35
35
|
errorCount: 0,
|
|
36
36
|
retryCount: 0,
|
|
37
|
-
|
|
38
|
-
latencyCount: 0,
|
|
37
|
+
latencyBuffer: new RingBuffer(PROVIDER_LATENCY_CAPACITY),
|
|
39
38
|
errorsByCode: new Map(),
|
|
40
39
|
};
|
|
41
40
|
}
|
|
42
41
|
const TOP_ERRORS_LIMIT = 5;
|
|
43
42
|
const DEFAULT_CAPACITY = 1000;
|
|
43
|
+
const PROVIDER_LATENCY_CAPACITY = 200;
|
|
44
44
|
const HTTP_SUCCESS_RANGE_MIN = 200;
|
|
45
45
|
const HTTP_SUCCESS_RANGE_MAX = 400;
|
|
46
46
|
const PERCENTILE_P50 = 0.5;
|
|
@@ -103,8 +103,7 @@ export class StatsAggregator {
|
|
|
103
103
|
acc = emptyAccumulator();
|
|
104
104
|
this.providers.set(providerId, acc);
|
|
105
105
|
}
|
|
106
|
-
acc.
|
|
107
|
-
acc.latencyCount++;
|
|
106
|
+
acc.latencyBuffer.push(ms);
|
|
108
107
|
}
|
|
109
108
|
getStats() {
|
|
110
109
|
const sorted = this.latencyBuffer.sorted();
|
|
@@ -123,9 +122,7 @@ export class StatsAggregator {
|
|
|
123
122
|
totalRequests: acc.totalRequests,
|
|
124
123
|
successCount: acc.successCount,
|
|
125
124
|
errorCount: acc.errorCount,
|
|
126
|
-
avgLatencyMs: acc.
|
|
127
|
-
? acc.latencySum / acc.latencyCount
|
|
128
|
-
: 0,
|
|
125
|
+
avgLatencyMs: avgFromBuffer(acc.latencyBuffer),
|
|
129
126
|
retryCount: acc.retryCount,
|
|
130
127
|
topErrors,
|
|
131
128
|
};
|
|
@@ -164,3 +161,7 @@ function percentile(sorted, p) {
|
|
|
164
161
|
const idx = Math.ceil(p * sorted.length) - 1;
|
|
165
162
|
return sorted[Math.max(0, Math.min(idx, sorted.length - 1))];
|
|
166
163
|
}
|
|
164
|
+
function avgFromBuffer(buf) {
|
|
165
|
+
const sorted = buf.sorted();
|
|
166
|
+
return sorted.length > 0 ? sorted.reduce((s, v) => s + v, 0) / sorted.length : 0;
|
|
167
|
+
}
|
|
@@ -30,7 +30,12 @@ export class ProxyOrchestrator {
|
|
|
30
30
|
this.deps.trackerScope.markQueued(trackerReq.id, false);
|
|
31
31
|
}
|
|
32
32
|
return this.executeResilience(config, ctx);
|
|
33
|
-
}), (result) => this.extractTrackStatus(result))
|
|
33
|
+
}), (result) => this.extractTrackStatus(result), (result) => result.attempts.map(a => ({
|
|
34
|
+
statusCode: a.statusCode,
|
|
35
|
+
error: a.error,
|
|
36
|
+
latencyMs: a.latencyMs,
|
|
37
|
+
providerId: a.target.provider_id,
|
|
38
|
+
})));
|
|
34
39
|
this.sendResponse(reply, result.result, ctx);
|
|
35
40
|
return result;
|
|
36
41
|
}
|
|
@@ -16,6 +16,15 @@ import { applyProviderPatches } from "./patch/index.js";
|
|
|
16
16
|
const HTTP_ERROR_THRESHOLD = 400;
|
|
17
17
|
const MAX_LOG_FIELD_LENGTH = 80;
|
|
18
18
|
const UPSTREAM_ERROR_STATUS = 502;
|
|
19
|
+
/** 从 TransportResult 中提取最终 HTTP status code */
|
|
20
|
+
function getTransportStatusCode(result) {
|
|
21
|
+
if (result.kind === "success" || result.kind === "error" || result.kind === "stream_error")
|
|
22
|
+
return result.statusCode;
|
|
23
|
+
if (result.kind === "stream_success" || result.kind === "stream_abort")
|
|
24
|
+
return result.statusCode;
|
|
25
|
+
// kind === "throw":无 HTTP 状态码
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
19
28
|
function rejectAndReply(reply, params, error, errorMessage, providerId) {
|
|
20
29
|
insertRejectedLog({
|
|
21
30
|
db: params.db, logId: params.logId, apiType: params.apiType, model: params.model,
|
|
@@ -150,11 +159,11 @@ async function executeFailoverLoop(ctx) {
|
|
|
150
159
|
clientReq, upstreamReqBase, logId, routerKeyId, originalModel, sessionId,
|
|
151
160
|
failover: { isFailoverIteration, rootLogId: rootLogId },
|
|
152
161
|
}, resilienceResult.attempts, resilienceResult.result, startTime);
|
|
153
|
-
collectTransportMetrics(deps.db, apiType, resilienceResult.result, isStream, lastLogId, provider.id, resolved.backend_model, request);
|
|
162
|
+
collectTransportMetrics(deps.db, apiType, resilienceResult.result, isStream, lastLogId, provider.id, resolved.backend_model, request, routerKeyId, getTransportStatusCode(resilienceResult.result));
|
|
154
163
|
const tr = resilienceResult.result;
|
|
155
164
|
const succeeded = tr.kind === "success" || tr.kind === "stream_success" || tr.kind === "stream_abort";
|
|
156
165
|
if (succeeded)
|
|
157
|
-
deps.usageWindowTracker?.recordRequest(routerKeyId ?? undefined);
|
|
166
|
+
deps.usageWindowTracker?.recordRequest(provider.id, routerKeyId ?? undefined);
|
|
158
167
|
if (isStream && deps.tracker) {
|
|
159
168
|
const sc = deps.tracker.get(logId)?.streamContent;
|
|
160
169
|
const blocks = sc?.blocks;
|
|
@@ -23,4 +23,4 @@ export declare function logResilienceResult(db: Database.Database, params: {
|
|
|
23
23
|
sessionId?: string | null;
|
|
24
24
|
failover?: FailoverContext;
|
|
25
25
|
}, attempts: ResilienceAttempt[], result: TransportResult, startTime: number): string;
|
|
26
|
-
export declare function collectTransportMetrics(db: Database.Database, apiType: "openai" | "anthropic", result: TransportResult, isStream: boolean, lastSuccessLogId: string, providerId: string, backendModel: string, request: FastifyRequest): void;
|
|
26
|
+
export declare function collectTransportMetrics(db: Database.Database, apiType: "openai" | "anthropic", result: TransportResult, isStream: boolean, lastSuccessLogId: string, providerId: string, backendModel: string, request: FastifyRequest, routerKeyId?: string | null, statusCode?: number | null): void;
|
|
@@ -116,8 +116,11 @@ export function logResilienceResult(db, params, attempts, result, startTime) {
|
|
|
116
116
|
}
|
|
117
117
|
return lastSuccessLogId;
|
|
118
118
|
}
|
|
119
|
-
export function collectTransportMetrics(db, apiType, result, isStream, lastSuccessLogId, providerId, backendModel, request) {
|
|
120
|
-
const base = {
|
|
119
|
+
export function collectTransportMetrics(db, apiType, result, isStream, lastSuccessLogId, providerId, backendModel, request, routerKeyId, statusCode) {
|
|
120
|
+
const base = {
|
|
121
|
+
request_log_id: lastSuccessLogId, provider_id: providerId, backend_model: backendModel, api_type: apiType,
|
|
122
|
+
router_key_id: routerKeyId ?? null, status_code: statusCode ?? null,
|
|
123
|
+
};
|
|
121
124
|
try {
|
|
122
125
|
if (isStream && (result.kind === "stream_success" || result.kind === "stream_abort")) {
|
|
123
126
|
if (result.metrics) {
|
|
@@ -25,8 +25,6 @@ export interface ResilienceConfig {
|
|
|
25
25
|
failoverThreshold: number;
|
|
26
26
|
ruleMatcher?: RetryRuleMatcher;
|
|
27
27
|
isFailover: boolean;
|
|
28
|
-
/** DB 规则 max_retries 的全局安全阀,防止单规则配置导致过多重试 */
|
|
29
|
-
globalRetryCap?: number;
|
|
30
28
|
/** 全局迭代上限,防止极端配置导致 while(true) 循环过多 */
|
|
31
29
|
iterationCap?: number;
|
|
32
30
|
}
|
package/dist/proxy/resilience.js
CHANGED
|
@@ -29,7 +29,6 @@ export function createStrategy(rule) {
|
|
|
29
29
|
const RETRYABLE_THROW_CODES = new Set(["ETIMEDOUT", "ECONNRESET", "ECONNREFUSED"]);
|
|
30
30
|
const HTTP_TOO_MANY_REQUESTS = 429;
|
|
31
31
|
const DEFAULT_THROW_MAX_RETRIES = 3;
|
|
32
|
-
const DEFAULT_RETRY_CAP = 3;
|
|
33
32
|
const DEFAULT_ITERATION_CAP = 50;
|
|
34
33
|
// ---------- Internal helpers ----------
|
|
35
34
|
function isRetryableThrow(err) {
|
|
@@ -94,7 +93,7 @@ export class ResilienceLayer {
|
|
|
94
93
|
const matchedRule = body && config.ruleMatcher
|
|
95
94
|
? config.ruleMatcher.match(result.statusCode, body)
|
|
96
95
|
: null;
|
|
97
|
-
if (matchedRule && state.attemptCount <
|
|
96
|
+
if (matchedRule && state.attemptCount < matchedRule.max_retries) {
|
|
98
97
|
const strategy = createStrategy(matchedRule);
|
|
99
98
|
const headers = extractHeaders(result);
|
|
100
99
|
const retryAfterMs = result.statusCode === HTTP_TOO_MANY_REQUESTS
|
|
@@ -110,7 +109,7 @@ export class ResilienceLayer {
|
|
|
110
109
|
const body = extractBody(result);
|
|
111
110
|
if (body && config.ruleMatcher) {
|
|
112
111
|
const matchedRule = config.ruleMatcher.match(result.statusCode, body);
|
|
113
|
-
if (matchedRule && state.attemptCount <
|
|
112
|
+
if (matchedRule && state.attemptCount < matchedRule.max_retries) {
|
|
114
113
|
const strategy = createStrategy(matchedRule);
|
|
115
114
|
return { action: "retry", delayMs: strategy.getDelay(state.attemptCount) };
|
|
116
115
|
}
|