llm-simple-router 0.5.4 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/dist/admin/metrics.js +4 -4
  2. package/dist/admin/stats.js +5 -3
  3. package/dist/admin/usage.js +40 -19
  4. package/dist/db/metrics.d.ts +2 -0
  5. package/dist/db/metrics.js +4 -7
  6. package/dist/db/migrations/026_metrics_independent.sql +54 -0
  7. package/dist/db/stats.d.ts +3 -2
  8. package/dist/db/stats.js +15 -6
  9. package/dist/db/usage-windows.d.ts +5 -4
  10. package/dist/db/usage-windows.js +48 -20
  11. package/dist/middleware/auth.js +12 -2
  12. package/dist/monitor/request-tracker.d.ts +2 -1
  13. package/dist/monitor/request-tracker.js +3 -0
  14. package/dist/monitor/stats-aggregator.js +8 -7
  15. package/dist/proxy/orchestrator.js +6 -1
  16. package/dist/proxy/proxy-handler.js +11 -2
  17. package/dist/proxy/proxy-logging.d.ts +1 -1
  18. package/dist/proxy/proxy-logging.js +5 -2
  19. package/dist/proxy/resilience.d.ts +0 -2
  20. package/dist/proxy/resilience.js +2 -3
  21. package/dist/proxy/scope.d.ts +2 -2
  22. package/dist/proxy/scope.js +4 -2
  23. package/dist/proxy/usage-window-tracker.d.ts +5 -3
  24. package/dist/proxy/usage-window-tracker.js +21 -22
  25. package/dist/utils/time-range.d.ts +1 -1
  26. package/dist/utils/time-range.js +13 -7
  27. package/frontend-dist/assets/{CardContent-GNY_j_L3.js → CardContent-ByybpNZM.js} +1 -1
  28. package/frontend-dist/assets/{CardTitle-BhXJbSoh.js → CardTitle-Cv39_iQu.js} +1 -1
  29. package/frontend-dist/assets/{Checkbox-n_sh6Lvx.js → Checkbox-F8_Gy_s5.js} +1 -1
  30. package/frontend-dist/assets/{CollapsibleTrigger-DDCUOXDR.js → CollapsibleTrigger-BPzLViBo.js} +1 -1
  31. package/frontend-dist/assets/{Collection-DbtqQ1jF.js → Collection-Dafpcl-w.js} +1 -1
  32. package/frontend-dist/assets/Dashboard-BTwf4ZtI.js +3 -0
  33. package/frontend-dist/assets/{DialogTitle-BEWUnuJQ.js → DialogTitle-BMNhmnin.js} +1 -1
  34. package/frontend-dist/assets/{Input-CmibY9Fx.js → Input-BkzqSK7i.js} +1 -1
  35. package/frontend-dist/assets/{Label-Cs__wFH0.js → Label-DwqBcp6d.js} +1 -1
  36. package/frontend-dist/assets/{Login-BciEc1TW.js → Login-B7sSST00.js} +1 -1
  37. package/frontend-dist/assets/Logs-DJc0hZ8C.js +1 -0
  38. package/frontend-dist/assets/ModelMappings-BavaEbnL.js +1 -0
  39. package/frontend-dist/assets/Monitor-B4hCGdS-.js +1 -0
  40. package/frontend-dist/assets/{PopoverTrigger-DaKOMSVs.js → PopoverTrigger-vgVugQwU.js} +1 -1
  41. package/frontend-dist/assets/{PopperContent-DZ6plcjf.js → PopperContent-tf2A4fsa.js} +1 -1
  42. package/frontend-dist/assets/{Providers-u8utX74M.js → Providers-BmrsbthR.js} +1 -1
  43. package/frontend-dist/assets/{ProxyEnhancement-8_xhndGt.js → ProxyEnhancement-BQ02PeEF.js} +1 -1
  44. package/frontend-dist/assets/{RetryRules-D1psYDEP.js → RetryRules-H460Dyek.js} +1 -1
  45. package/frontend-dist/assets/{RouterKeys-ovPFGhjy.js → RouterKeys-D8rXsmpq.js} +1 -1
  46. package/frontend-dist/assets/{RovingFocusItem-Dsv9AkP7.js → RovingFocusItem-Bo0dNNmj.js} +1 -1
  47. package/frontend-dist/assets/{SelectValue-BoUWfZAg.js → SelectValue-BU16UnrX.js} +1 -1
  48. package/frontend-dist/assets/{Settings-DXF-6A8C.js → Settings-YiR7zqua.js} +1 -1
  49. package/frontend-dist/assets/{Setup-rVLqiz0d.js → Setup-D7EXZ1Nv.js} +1 -1
  50. package/frontend-dist/assets/{Switch-po5ZVBE3.js → Switch-CA_wdlEs.js} +1 -1
  51. package/frontend-dist/assets/{TableHeader-Zyvq_0p2.js → TableHeader-BuObvzlS.js} +1 -1
  52. package/frontend-dist/assets/{TabsTrigger-CgDhZGkT.js → TabsTrigger-DZIFRVA_.js} +1 -1
  53. package/frontend-dist/assets/{Teleport-CgTHarey.js → Teleport-FxAUQAZT.js} +1 -1
  54. package/frontend-dist/assets/{TooltipTrigger-C2qO21dQ.js → TooltipTrigger-D9nCGsBG.js} +1 -1
  55. package/frontend-dist/assets/{UnifiedRequestDialog-Dksad8eN.js → UnifiedRequestDialog-BPv5B17F.js} +1 -1
  56. package/frontend-dist/assets/{VisuallyHidden-fbPmoMwi.js → VisuallyHidden-clBSgYdG.js} +1 -1
  57. package/frontend-dist/assets/{VisuallyHiddenInput-7j8wkPrW.js → VisuallyHiddenInput-CmDbYWUO.js} +1 -1
  58. package/frontend-dist/assets/{alert-dialog-DbT3PzoF.js → alert-dialog-BvbdNhnK.js} +1 -1
  59. package/frontend-dist/assets/{badge-BVxnlnsH.js → badge-CcCt1-ig.js} +1 -1
  60. package/frontend-dist/assets/{button-BCrIpNwA.js → button-DeOsxcjG.js} +2 -2
  61. package/frontend-dist/assets/chevron-down-D_DCDFPY.js +1 -0
  62. package/frontend-dist/assets/{dialog-BNlCZpHK.js → dialog-CPO2KcC1.js} +1 -1
  63. package/frontend-dist/assets/{file-text-BavS6SrF.js → file-text-C-6LFEhP.js} +1 -1
  64. package/frontend-dist/assets/index-DHONWydQ.css +1 -0
  65. package/frontend-dist/assets/{index-DrBJPq6d.js → index-DW58MMV6.js} +1 -1
  66. package/frontend-dist/assets/{lib-CGpNhf06.js → lib-DkM_rWnj.js} +1 -1
  67. package/frontend-dist/assets/loader-circle-BS4uI1Z4.js +1 -0
  68. package/frontend-dist/assets/{ohash.D__AXeF1-DkJnWU8a.js → ohash.D__AXeF1-CBYQgVou.js} +1 -1
  69. package/frontend-dist/assets/{useClipboard-Bq8yZunx.js → useClipboard-NBCgpr6Z.js} +1 -1
  70. package/frontend-dist/assets/{useLogRetention-BWPm3G_A.js → useLogRetention-B_u8u74J.js} +1 -1
  71. package/frontend-dist/assets/useNonce-D1dqoOZO.js +1 -0
  72. package/frontend-dist/assets/x-DVLhwc3Q.js +1 -0
  73. package/frontend-dist/index.html +19 -19
  74. package/package.json +2 -2
  75. package/frontend-dist/assets/Dashboard-Dy9frcgO.js +0 -3
  76. package/frontend-dist/assets/Logs-BkqwWW0-.js +0 -1
  77. package/frontend-dist/assets/ModelMappings-DrCJ_TCf.js +0 -1
  78. package/frontend-dist/assets/Monitor-C-b4qyuI.js +0 -1
  79. package/frontend-dist/assets/chevron-down-CWBwGxSp.js +0 -1
  80. package/frontend-dist/assets/circle-question-mark-DRkkqjgG.js +0 -1
  81. package/frontend-dist/assets/index-BP4imfye.css +0 -1
  82. package/frontend-dist/assets/loader-circle-Cpd89XQ7.js +0 -1
  83. package/frontend-dist/assets/useNonce-D5lpSPNk.js +0 -1
  84. package/frontend-dist/assets/x-BFIp7DLt.js +0 -1
@@ -33,13 +33,13 @@ const TimeseriesQuerySchema = Type.Object({
33
33
  end_time: Type.Optional(Type.String()),
34
34
  });
35
35
  const DASHBOARD_PERIODS = new Set(["window", "weekly", "monthly"]);
36
- function resolveMetricsTime(query, db, routerKeyId) {
36
+ function resolveMetricsTime(query, db, routerKeyId, providerId) {
37
37
  if (query.start_time && query.end_time) {
38
38
  return { startTime: query.start_time, endTime: query.end_time, legacyPeriod: "30d" };
39
39
  }
40
40
  const period = query.period ?? "weekly";
41
41
  if (DASHBOARD_PERIODS.has(period)) {
42
- const range = resolveTimeRange(period, db, routerKeyId);
42
+ const range = resolveTimeRange(period, db, routerKeyId, providerId);
43
43
  return { startTime: range.startTime, endTime: range.endTime, legacyPeriod: "5h" };
44
44
  }
45
45
  return { legacyPeriod: period };
@@ -48,14 +48,14 @@ export const adminMetricsRoutes = (app, options, done) => {
48
48
  const { db } = options;
49
49
  app.get("/admin/api/metrics/summary", { schema: { querystring: SummaryQuerySchema } }, async (request, reply) => {
50
50
  const query = request.query;
51
- const { startTime, endTime, legacyPeriod } = resolveMetricsTime(query, db, query.router_key_id);
51
+ const { startTime, endTime, legacyPeriod } = resolveMetricsTime(query, db, query.router_key_id, query.provider_id);
52
52
  const summary = getMetricsSummary(db, legacyPeriod, query.provider_id, query.backend_model, query.router_key_id, startTime, endTime);
53
53
  return reply.send(summary);
54
54
  });
55
55
  app.get("/admin/api/metrics/timeseries", { schema: { querystring: TimeseriesQuerySchema } }, async (request, reply) => {
56
56
  const query = request.query;
57
57
  const metric = query.metric;
58
- const { startTime, endTime, legacyPeriod } = resolveMetricsTime(query, db, query.router_key_id);
58
+ const { startTime, endTime, legacyPeriod } = resolveMetricsTime(query, db, query.router_key_id, query.provider_id);
59
59
  const timeseries = getMetricsTimeseries(db, legacyPeriod, metric, query.provider_id, query.backend_model, query.router_key_id, startTime, endTime);
60
60
  return reply.send(timeseries);
61
61
  });
@@ -10,6 +10,8 @@ const StatsQuerySchema = Type.Object({
10
10
  start_time: Type.Optional(Type.String()),
11
11
  end_time: Type.Optional(Type.String()),
12
12
  router_key_id: Type.Optional(Type.String()),
13
+ provider_id: Type.Optional(Type.String()),
14
+ backend_model: Type.Optional(Type.String()),
13
15
  });
14
16
  export const adminStatsRoutes = (app, options, done) => {
15
17
  app.get("/admin/api/stats", { schema: { querystring: StatsQuerySchema } }, async (request, reply) => {
@@ -21,12 +23,12 @@ export const adminStatsRoutes = (app, options, done) => {
21
23
  endTime = query.end_time;
22
24
  }
23
25
  else {
24
- const range = resolveTimeRange((query.period ?? "weekly"), options.db, query.router_key_id);
26
+ const range = resolveTimeRange((query.period ?? "weekly"), options.db, query.router_key_id, query.provider_id);
25
27
  startTime = range.startTime;
26
28
  endTime = range.endTime;
27
29
  }
28
- const stats = getStats(options.db, startTime, endTime, query.router_key_id);
29
- return reply.send(stats);
30
+ const stats = getStats(options.db, startTime, endTime, query.router_key_id, query.provider_id, query.backend_model);
31
+ return reply.send({ ...stats, startTime, endTime });
30
32
  });
31
33
  done();
32
34
  };
@@ -1,16 +1,26 @@
1
1
  import { Type } from "@sinclair/typebox";
2
2
  import { getWindowsInRange, getWindowUsage } from "../db/usage-windows.js";
3
+ import { getProviderById } from "../db/providers.js";
3
4
  import { resolveTimeRange } from "../utils/time-range.js";
4
5
  const UsageQuerySchema = Type.Object({
5
6
  router_key_id: Type.Optional(Type.String()),
7
+ provider_id: Type.Optional(Type.String()),
6
8
  });
7
- function getDailyUsage(db, startTime, endTime, routerKeyId) {
8
- const routerKeyFilter = routerKeyId
9
- ? " AND rl.router_key_id = ?"
10
- : "";
11
- const params = routerKeyId
12
- ? [startTime, endTime, routerKeyId]
13
- : [startTime, endTime];
9
+ function getDailyUsage(db, startTime, endTime, routerKeyId, providerId) {
10
+ const conditions = [
11
+ "rm.is_complete = 1",
12
+ "rm.created_at >= datetime(?)",
13
+ "rm.created_at < datetime(?)",
14
+ ];
15
+ const params = [startTime, endTime];
16
+ if (routerKeyId) {
17
+ conditions.push("rm.router_key_id = ?");
18
+ params.push(routerKeyId);
19
+ }
20
+ if (providerId) {
21
+ conditions.push("rm.provider_id = ?");
22
+ params.push(providerId);
23
+ }
14
24
  return db.prepare(`
15
25
  SELECT
16
26
  date(rm.created_at) AS date,
@@ -18,37 +28,48 @@ function getDailyUsage(db, startTime, endTime, routerKeyId) {
18
28
  COALESCE(SUM(rm.input_tokens), 0) AS total_input_tokens,
19
29
  COALESCE(SUM(rm.output_tokens), 0) AS total_output_tokens
20
30
  FROM request_metrics rm
21
- JOIN request_logs rl ON rl.id = rm.request_log_id
22
- WHERE rm.is_complete = 1
23
- AND rm.created_at >= datetime(?)
24
- AND rm.created_at < datetime(?)
25
- ${routerKeyFilter}
31
+ WHERE ${conditions.join(" AND ")}
26
32
  GROUP BY date(rm.created_at)
27
33
  ORDER BY date ASC
28
34
  `).all(...params);
29
35
  }
36
+ function resolveProviderName(db, providerId) {
37
+ if (!providerId)
38
+ return null;
39
+ return getProviderById(db, providerId)?.name ?? null;
40
+ }
30
41
  export const adminUsageRoutes = (app, options, done) => {
31
42
  const { db } = options;
32
43
  app.get("/admin/api/usage/windows", { schema: { querystring: UsageQuerySchema } }, async (request) => {
33
44
  const query = request.query;
34
- const range = resolveTimeRange("window", db, query.router_key_id);
35
- const windows = getWindowsInRange(db, range.startTime, range.endTime, query.router_key_id);
36
- if (windows.length === 0)
45
+ if (query.provider_id) {
46
+ const range = resolveTimeRange("window", db, query.router_key_id, query.provider_id);
47
+ const windows = getWindowsInRange(db, range.startTime, range.endTime, query.router_key_id, query.provider_id);
48
+ if (windows.length === 0)
49
+ return [];
50
+ return windows.map(w => ({
51
+ window: { ...w, provider_name: resolveProviderName(db, w.provider_id) },
52
+ usage: getWindowUsage(db, w.start_time, w.end_time, query.router_key_id, query.provider_id),
53
+ }));
54
+ }
55
+ const allWindows = getWindowsInRange(db, "1970-01-01", "2099-12-31", query.router_key_id)
56
+ .filter((w) => w.provider_id !== null);
57
+ if (allWindows.length === 0)
37
58
  return [];
38
- return windows.map(w => ({
39
- window: w,
59
+ return allWindows.map(w => ({
60
+ window: { ...w, provider_name: resolveProviderName(db, w.provider_id) },
40
61
  usage: getWindowUsage(db, w.start_time, w.end_time, query.router_key_id),
41
62
  }));
42
63
  });
43
64
  app.get("/admin/api/usage/weekly", { schema: { querystring: UsageQuerySchema } }, async (request) => {
44
65
  const query = request.query;
45
66
  const range = resolveTimeRange("weekly", db, query.router_key_id);
46
- return getDailyUsage(db, range.startTime, range.endTime, query.router_key_id);
67
+ return getDailyUsage(db, range.startTime, range.endTime, query.router_key_id, query.provider_id);
47
68
  });
48
69
  app.get("/admin/api/usage/monthly", { schema: { querystring: UsageQuerySchema } }, async (request) => {
49
70
  const query = request.query;
50
71
  const range = resolveTimeRange("monthly", db, query.router_key_id);
51
- return getDailyUsage(db, range.startTime, range.endTime, query.router_key_id);
72
+ return getDailyUsage(db, range.startTime, range.endTime, query.router_key_id, query.provider_id);
52
73
  });
53
74
  done();
54
75
  };
@@ -23,6 +23,8 @@ export type MetricsInsert = {
23
23
  provider_id: string;
24
24
  backend_model: string;
25
25
  api_type: string;
26
+ router_key_id?: string | null;
27
+ status_code?: number | null;
26
28
  input_tokens?: number | null;
27
29
  output_tokens?: number | null;
28
30
  cache_creation_tokens?: number | null;
@@ -2,8 +2,8 @@ import { randomUUID } from "crypto";
2
2
  import { MS_PER_SECOND } from "../constants.js";
3
3
  export function insertMetrics(db, m) {
4
4
  const id = randomUUID();
5
- db.prepare(`INSERT INTO request_metrics (id, request_log_id, provider_id, backend_model, api_type, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, ttft_ms, total_duration_ms, tokens_per_second, stop_reason, is_complete)
6
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(id, m.request_log_id, m.provider_id, m.backend_model, m.api_type, m.input_tokens ?? null, m.output_tokens ?? null, m.cache_creation_tokens ?? null, m.cache_read_tokens ?? null, m.ttft_ms ?? null, m.total_duration_ms ?? null, m.tokens_per_second ?? null, m.stop_reason ?? null, m.is_complete ?? 1);
5
+ db.prepare(`INSERT INTO request_metrics (id, request_log_id, provider_id, backend_model, api_type, router_key_id, status_code, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, ttft_ms, total_duration_ms, tokens_per_second, stop_reason, is_complete)
6
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(id, m.request_log_id, m.provider_id, m.backend_model, m.api_type, m.router_key_id ?? null, m.status_code ?? null, m.input_tokens ?? null, m.output_tokens ?? null, m.cache_creation_tokens ?? null, m.cache_read_tokens ?? null, m.ttft_ms ?? null, m.total_duration_ms ?? null, m.tokens_per_second ?? null, m.stop_reason ?? null, m.is_complete ?? 1);
7
7
  return id;
8
8
  }
9
9
  const PERIOD_OFFSET = {
@@ -64,8 +64,7 @@ export function getMetricsSummary(db, period, providerId, backendModel, routerKe
64
64
  params.push(backendModel);
65
65
  }
66
66
  if (routerKeyId) {
67
- joins.push("LEFT JOIN request_logs rl ON rl.id = rm.request_log_id");
68
- conditions.push("rl.router_key_id = ?");
67
+ conditions.push("rm.router_key_id = ?");
69
68
  params.push(routerKeyId);
70
69
  }
71
70
  return db.prepare(`
@@ -108,19 +107,17 @@ export function getMetricsTimeseries(db, period, metric, providerId, backendMode
108
107
  params.push(backendModel);
109
108
  }
110
109
  if (routerKeyId) {
111
- conditions.push("rl.router_key_id = ?");
110
+ conditions.push("rm.router_key_id = ?");
112
111
  params.push(routerKeyId);
113
112
  }
114
113
  const where = conditions.join(" AND ");
115
114
  const expr = METRIC_EXPR[metric];
116
- const joinClause = routerKeyId ? "LEFT JOIN request_logs rl ON rl.id = rm.request_log_id" : "";
117
115
  const rows = db.prepare(`
118
116
  SELECT
119
117
  (unixepoch(rm.created_at) / ?) * ? AS bucket_key,
120
118
  ${expr} AS avg_value,
121
119
  COUNT(*) AS count
122
120
  FROM request_metrics rm
123
- ${joinClause}
124
121
  WHERE ${where}
125
122
  GROUP BY bucket_key
126
123
  ORDER BY bucket_key ASC
@@ -0,0 +1,54 @@
1
+ -- Metrics 独立化:request_metrics 增加路由维度列,解除级联删除依赖
2
+ -- usage_windows 增加 provider_id 支持按 provider 维度追踪使用量
3
+
4
+ -- 1. 重建 request_metrics:CASCADE -> SET NULL,同时新增 router_key_id / status_code
5
+ CREATE TABLE request_metrics_new (
6
+ id TEXT PRIMARY KEY,
7
+ request_log_id TEXT UNIQUE REFERENCES request_logs(id) ON DELETE SET NULL,
8
+ provider_id TEXT NOT NULL,
9
+ backend_model TEXT NOT NULL,
10
+ api_type TEXT NOT NULL,
11
+ input_tokens INTEGER,
12
+ output_tokens INTEGER,
13
+ cache_creation_tokens INTEGER,
14
+ cache_read_tokens INTEGER,
15
+ ttft_ms INTEGER,
16
+ total_duration_ms INTEGER,
17
+ tokens_per_second REAL,
18
+ stop_reason TEXT,
19
+ is_complete INTEGER NOT NULL DEFAULT 1,
20
+ router_key_id TEXT,
21
+ status_code INTEGER,
22
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
23
+ );
24
+
25
+ INSERT INTO request_metrics_new
26
+ (id, request_log_id, provider_id, backend_model, api_type,
27
+ input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens,
28
+ ttft_ms, total_duration_ms, tokens_per_second, stop_reason,
29
+ is_complete, created_at)
30
+ SELECT
31
+ id, request_log_id, provider_id, backend_model, api_type,
32
+ input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens,
33
+ ttft_ms, total_duration_ms, tokens_per_second, stop_reason,
34
+ is_complete, created_at
35
+ FROM request_metrics;
36
+
37
+ -- 回填 router_key_id 和 status_code 从 request_logs
38
+ UPDATE request_metrics_new
39
+ SET
40
+ router_key_id = rl.router_key_id,
41
+ status_code = rl.status_code
42
+ FROM request_logs rl
43
+ WHERE rl.id = request_metrics_new.request_log_id;
44
+
45
+ DROP TABLE request_metrics;
46
+ ALTER TABLE request_metrics_new RENAME TO request_metrics;
47
+
48
+ -- 重建原有索引
49
+ CREATE INDEX idx_metrics_time_provider_model ON request_metrics(created_at, provider_id, backend_model);
50
+ CREATE INDEX idx_metrics_api_type_created_at ON request_metrics(api_type, created_at);
51
+
52
+ -- 2. usage_windows 增加 provider_id 列
53
+ ALTER TABLE usage_windows ADD COLUMN provider_id TEXT;
54
+ CREATE INDEX IF NOT EXISTS idx_usage_windows_provider_id ON usage_windows(provider_id);
@@ -3,6 +3,7 @@ export interface Stats {
3
3
  totalRequests: number;
4
4
  successRate: number;
5
5
  avgTps: number;
6
- totalTokens: number;
6
+ totalInputTokens: number;
7
+ totalOutputTokens: number;
7
8
  }
8
- export declare function getStats(db: Database.Database, startTime: string, endTime: string, routerKeyId?: string): Stats;
9
+ export declare function getStats(db: Database.Database, startTime: string, endTime: string, routerKeyId?: string, providerId?: string, backendModel?: string): Stats;
package/dist/db/stats.js CHANGED
@@ -1,4 +1,4 @@
1
- export function getStats(db, startTime, endTime, routerKeyId) {
1
+ export function getStats(db, startTime, endTime, routerKeyId, providerId, backendModel) {
2
2
  const conditions = [
3
3
  "rm.is_complete = 1",
4
4
  "rm.created_at >= datetime(?)",
@@ -6,18 +6,26 @@ export function getStats(db, startTime, endTime, routerKeyId) {
6
6
  ];
7
7
  const params = [startTime, endTime];
8
8
  if (routerKeyId) {
9
- conditions.push("rl.router_key_id = ?");
9
+ conditions.push("rm.router_key_id = ?");
10
10
  params.push(routerKeyId);
11
11
  }
12
+ if (providerId) {
13
+ conditions.push("rm.provider_id = ?");
14
+ params.push(providerId);
15
+ }
16
+ if (backendModel) {
17
+ conditions.push("rm.backend_model = ?");
18
+ params.push(backendModel);
19
+ }
12
20
  const where = conditions.join(" AND ");
13
21
  const row = db.prepare(`
14
22
  SELECT
15
23
  COUNT(*) AS total_requests,
16
- SUM(CASE WHEN rl.status_code >= 200 AND rl.status_code < 300 THEN 1 ELSE 0 END) AS success_count,
24
+ SUM(CASE WHEN rm.status_code >= 200 AND rm.status_code < 300 THEN 1 ELSE 0 END) AS success_count,
17
25
  AVG(rm.tokens_per_second) AS avg_tps,
18
- COALESCE(SUM(rm.input_tokens), 0) + COALESCE(SUM(rm.output_tokens), 0) AS total_tokens
26
+ COALESCE(SUM(rm.input_tokens), 0) AS total_input_tokens,
27
+ COALESCE(SUM(rm.output_tokens), 0) AS total_output_tokens
19
28
  FROM request_metrics rm
20
- JOIN request_logs rl ON rl.id = rm.request_log_id
21
29
  WHERE ${where}
22
30
  `).get(...params);
23
31
  const total = row?.total_requests ?? 0;
@@ -25,6 +33,7 @@ export function getStats(db, startTime, endTime, routerKeyId) {
25
33
  totalRequests: total,
26
34
  successRate: total > 0 ? (row?.success_count ?? 0) / total : 0,
27
35
  avgTps: row?.avg_tps ?? 0,
28
- totalTokens: row?.total_tokens ?? 0,
36
+ totalInputTokens: row?.total_input_tokens ?? 0,
37
+ totalOutputTokens: row?.total_output_tokens ?? 0,
29
38
  };
30
39
  }
@@ -2,6 +2,7 @@ import Database from "better-sqlite3";
2
2
  export interface UsageWindow {
3
3
  id: string;
4
4
  router_key_id: string | null;
5
+ provider_id: string | null;
5
6
  start_time: string;
6
7
  end_time: string;
7
8
  created_at: string;
@@ -12,8 +13,8 @@ export interface WindowUsage {
12
13
  total_output_tokens: number;
13
14
  }
14
15
  export declare function insertWindow(db: Database.Database, w: Omit<UsageWindow, "created_at">): string;
15
- export declare function getLatestWindow(db: Database.Database, routerKeyId?: string): UsageWindow | null;
16
- /** 返回与 [start, end) 区间有重叠的窗口 */
17
- export declare function getWindowsInRange(db: Database.Database, start: string, end: string, routerKeyId?: string): UsageWindow[];
16
+ export declare function getLatestWindow(db: Database.Database, routerKeyId?: string, providerId?: string): UsageWindow | null;
17
+ /** 返回与 [start, end) 区间有重叠的窗口。可选参数不传表示不过滤该维度(与 getLatestWindow 的 IS NULL 语义不同) */
18
+ export declare function getWindowsInRange(db: Database.Database, start: string, end: string, routerKeyId?: string, providerId?: string): UsageWindow[];
18
19
  /** 聚合指定时间窗口内的请求计数和 token 用量 */
19
- export declare function getWindowUsage(db: Database.Database, startTime: string, endTime: string, routerKeyId?: string): WindowUsage;
20
+ export declare function getWindowUsage(db: Database.Database, startTime: string, endTime: string, routerKeyId?: string, providerId?: string): WindowUsage;
@@ -1,37 +1,65 @@
1
1
  import { randomUUID } from "crypto";
2
2
  export function insertWindow(db, w) {
3
3
  const id = w.id || randomUUID();
4
- db.prepare("INSERT INTO usage_windows (id, router_key_id, start_time, end_time) VALUES (?, ?, ?, ?)").run(id, w.router_key_id ?? null, w.start_time, w.end_time);
4
+ db.prepare("INSERT INTO usage_windows (id, router_key_id, provider_id, start_time, end_time) VALUES (?, ?, ?, ?, ?)").run(id, w.router_key_id ?? null, w.provider_id ?? null, w.start_time, w.end_time);
5
5
  return id;
6
6
  }
7
- export function getLatestWindow(db, routerKeyId) {
8
- const sql = routerKeyId
9
- ? "SELECT * FROM usage_windows WHERE router_key_id = ? ORDER BY start_time DESC LIMIT 1"
10
- : "SELECT * FROM usage_windows ORDER BY start_time DESC LIMIT 1";
11
- const params = routerKeyId ? [routerKeyId] : [];
7
+ export function getLatestWindow(db, routerKeyId, providerId) {
8
+ const conditions = [];
9
+ const params = [];
10
+ if (routerKeyId) {
11
+ conditions.push("router_key_id = ?");
12
+ params.push(routerKeyId);
13
+ }
14
+ else {
15
+ conditions.push("router_key_id IS NULL");
16
+ }
17
+ if (providerId) {
18
+ conditions.push("provider_id = ?");
19
+ params.push(providerId);
20
+ }
21
+ else {
22
+ conditions.push("provider_id IS NULL");
23
+ }
24
+ const sql = `SELECT * FROM usage_windows WHERE ${conditions.join(" AND ")} ORDER BY start_time DESC LIMIT 1`;
12
25
  return db.prepare(sql).get(...params) ?? null;
13
26
  }
14
- /** 返回与 [start, end) 区间有重叠的窗口 */
15
- export function getWindowsInRange(db, start, end, routerKeyId) {
27
+ /** 返回与 [start, end) 区间有重叠的窗口。可选参数不传表示不过滤该维度(与 getLatestWindow 的 IS NULL 语义不同) */
28
+ export function getWindowsInRange(db, start, end, routerKeyId, providerId) {
29
+ const conditions = ["start_time < ?", "end_time > ?"];
30
+ const params = [end, start];
16
31
  if (routerKeyId) {
17
- return db.prepare("SELECT * FROM usage_windows WHERE start_time < ? AND end_time > ? AND router_key_id = ? ORDER BY start_time ASC").all(end, start, routerKeyId);
32
+ conditions.push("router_key_id = ?");
33
+ params.push(routerKeyId);
34
+ }
35
+ if (providerId) {
36
+ conditions.push("provider_id = ?");
37
+ params.push(providerId);
18
38
  }
19
- return db.prepare("SELECT * FROM usage_windows WHERE start_time < ? AND end_time > ? ORDER BY start_time ASC").all(end, start);
39
+ return db.prepare(`SELECT * FROM usage_windows WHERE ${conditions.join(" AND ")} ORDER BY start_time ASC`).all(...params);
20
40
  }
21
41
  /** 聚合指定时间窗口内的请求计数和 token 用量 */
22
- export function getWindowUsage(db, startTime, endTime, routerKeyId) {
23
- const baseSql = `
42
+ export function getWindowUsage(db, startTime, endTime, routerKeyId, providerId) {
43
+ const conditions = [
44
+ "rm.is_complete = 1",
45
+ "rm.created_at >= datetime(?)",
46
+ "rm.created_at < datetime(?)",
47
+ ];
48
+ const params = [startTime, endTime];
49
+ if (routerKeyId) {
50
+ conditions.push("rm.router_key_id = ?");
51
+ params.push(routerKeyId);
52
+ }
53
+ if (providerId) {
54
+ conditions.push("rm.provider_id = ?");
55
+ params.push(providerId);
56
+ }
57
+ return db.prepare(`
24
58
  SELECT
25
59
  COUNT(*) AS request_count,
26
60
  COALESCE(SUM(rm.input_tokens), 0) AS total_input_tokens,
27
61
  COALESCE(SUM(rm.output_tokens), 0) AS total_output_tokens
28
62
  FROM request_metrics rm
29
- JOIN request_logs rl ON rl.id = rm.request_log_id
30
- WHERE rm.is_complete = 1
31
- AND rm.created_at >= datetime(?)
32
- AND rm.created_at < datetime(?)`;
33
- if (routerKeyId) {
34
- return db.prepare(`${baseSql} AND rl.router_key_id = ?`).get(startTime, endTime, routerKeyId);
35
- }
36
- return db.prepare(baseSql).get(startTime, endTime);
63
+ WHERE ${conditions.join(" AND ")}
64
+ `).get(...params);
37
65
  }
@@ -53,15 +53,25 @@ const authMiddlewareRaw = (app, options, done) => {
53
53
  reply.code(HTTP_SERVICE_UNAVAILABLE).send({ error: { message: "Service not initialized" } });
54
54
  return reply;
55
55
  }
56
+ let token;
56
57
  const authHeader = request.headers.authorization;
57
- if (!authHeader || !authHeader.startsWith("Bearer ")) {
58
+ if (authHeader && authHeader.startsWith("Bearer ")) {
59
+ token = authHeader.slice(BEARER_PREFIX_LENGTH);
60
+ }
61
+ else {
62
+ // Fallback: Anthropic SDK sends API key via x-api-key header
63
+ const apiKeyHeader = request.headers["x-api-key"];
64
+ if (apiKeyHeader) {
65
+ token = apiKeyHeader;
66
+ }
67
+ }
68
+ if (!token) {
58
69
  if (proxyApiType) {
59
70
  logRejectedAuth(options.db, proxyApiType, HTTP_UNAUTHORIZED, "Invalid API key", request);
60
71
  }
61
72
  unauthorizedReply(reply);
62
73
  return reply;
63
74
  }
64
- const token = authHeader.slice(BEARER_PREFIX_LENGTH);
65
75
  const hash = createHash("sha256").update(token).digest("hex");
66
76
  const row = stmt.get(hash);
67
77
  if (!row) {
@@ -2,7 +2,7 @@ import type { ServerResponse } from "node:http";
2
2
  import { StatsAggregator } from "./stats-aggregator.js";
3
3
  import { RuntimeCollector } from "./runtime-collector.js";
4
4
  import type { ProviderSemaphoreManager } from "../proxy/semaphore.js";
5
- import type { ActiveRequest, ProviderConcurrencySnapshot, RuntimeMetrics, StatsSnapshot } from "./types.js";
5
+ import type { ActiveRequest, AttemptSnapshot, ProviderConcurrencySnapshot, RuntimeMetrics, StatsSnapshot } from "./types.js";
6
6
  export interface TrackerLogger {
7
7
  debug(obj: Record<string, unknown>, msg: string): void;
8
8
  warn(obj: Record<string, unknown>, msg: string): void;
@@ -31,6 +31,7 @@ export declare class RequestTracker {
31
31
  complete(id: string, result: {
32
32
  status: "completed" | "failed";
33
33
  statusCode?: number;
34
+ attempts?: AttemptSnapshot[];
34
35
  }): void;
35
36
  getActive(): ActiveRequest[];
36
37
  getRecent(limit?: number): ActiveRequest[];
@@ -73,6 +73,7 @@ export class RequestTracker {
73
73
  ...req,
74
74
  status: result.status,
75
75
  completedAt: now,
76
+ attempts: result.attempts ?? req.attempts,
76
77
  };
77
78
  this.activeMap.delete(id);
78
79
  this.streamAccumulators.delete(id);
@@ -161,6 +162,7 @@ export class RequestTracker {
161
162
  if (this.pushTimer)
162
163
  return;
163
164
  this.tickCount = 0;
165
+ this.runtimeCollector.start();
164
166
  this.pushTimer = setInterval(() => {
165
167
  this.tickCount++;
166
168
  this.cleanupRecent();
@@ -179,6 +181,7 @@ export class RequestTracker {
179
181
  clearInterval(this.pushTimer);
180
182
  this.pushTimer = null;
181
183
  }
184
+ this.runtimeCollector.stop();
182
185
  }
183
186
  broadcast(event, data) {
184
187
  // Strip clientRequest from broadcasts to reduce bandwidth;
@@ -34,13 +34,13 @@ function emptyAccumulator() {
34
34
  successCount: 0,
35
35
  errorCount: 0,
36
36
  retryCount: 0,
37
- latencySum: 0,
38
- latencyCount: 0,
37
+ latencyBuffer: new RingBuffer(PROVIDER_LATENCY_CAPACITY),
39
38
  errorsByCode: new Map(),
40
39
  };
41
40
  }
42
41
  const TOP_ERRORS_LIMIT = 5;
43
42
  const DEFAULT_CAPACITY = 1000;
43
+ const PROVIDER_LATENCY_CAPACITY = 200;
44
44
  const HTTP_SUCCESS_RANGE_MIN = 200;
45
45
  const HTTP_SUCCESS_RANGE_MAX = 400;
46
46
  const PERCENTILE_P50 = 0.5;
@@ -103,8 +103,7 @@ export class StatsAggregator {
103
103
  acc = emptyAccumulator();
104
104
  this.providers.set(providerId, acc);
105
105
  }
106
- acc.latencySum += ms;
107
- acc.latencyCount++;
106
+ acc.latencyBuffer.push(ms);
108
107
  }
109
108
  getStats() {
110
109
  const sorted = this.latencyBuffer.sorted();
@@ -123,9 +122,7 @@ export class StatsAggregator {
123
122
  totalRequests: acc.totalRequests,
124
123
  successCount: acc.successCount,
125
124
  errorCount: acc.errorCount,
126
- avgLatencyMs: acc.latencyCount > 0
127
- ? acc.latencySum / acc.latencyCount
128
- : 0,
125
+ avgLatencyMs: avgFromBuffer(acc.latencyBuffer),
129
126
  retryCount: acc.retryCount,
130
127
  topErrors,
131
128
  };
@@ -164,3 +161,7 @@ function percentile(sorted, p) {
164
161
  const idx = Math.ceil(p * sorted.length) - 1;
165
162
  return sorted[Math.max(0, Math.min(idx, sorted.length - 1))];
166
163
  }
164
+ function avgFromBuffer(buf) {
165
+ const sorted = buf.sorted();
166
+ return sorted.length > 0 ? sorted.reduce((s, v) => s + v, 0) / sorted.length : 0;
167
+ }
@@ -30,7 +30,12 @@ export class ProxyOrchestrator {
30
30
  this.deps.trackerScope.markQueued(trackerReq.id, false);
31
31
  }
32
32
  return this.executeResilience(config, ctx);
33
- }), (result) => this.extractTrackStatus(result));
33
+ }), (result) => this.extractTrackStatus(result), (result) => result.attempts.map(a => ({
34
+ statusCode: a.statusCode,
35
+ error: a.error,
36
+ latencyMs: a.latencyMs,
37
+ providerId: a.target.provider_id,
38
+ })));
34
39
  this.sendResponse(reply, result.result, ctx);
35
40
  return result;
36
41
  }
@@ -16,6 +16,15 @@ import { applyProviderPatches } from "./patch/index.js";
16
16
  const HTTP_ERROR_THRESHOLD = 400;
17
17
  const MAX_LOG_FIELD_LENGTH = 80;
18
18
  const UPSTREAM_ERROR_STATUS = 502;
19
+ /** 从 TransportResult 中提取最终 HTTP status code */
20
+ function getTransportStatusCode(result) {
21
+ if (result.kind === "success" || result.kind === "error" || result.kind === "stream_error")
22
+ return result.statusCode;
23
+ if (result.kind === "stream_success" || result.kind === "stream_abort")
24
+ return result.statusCode;
25
+ // kind === "throw":无 HTTP 状态码
26
+ return null;
27
+ }
19
28
  function rejectAndReply(reply, params, error, errorMessage, providerId) {
20
29
  insertRejectedLog({
21
30
  db: params.db, logId: params.logId, apiType: params.apiType, model: params.model,
@@ -150,11 +159,11 @@ async function executeFailoverLoop(ctx) {
150
159
  clientReq, upstreamReqBase, logId, routerKeyId, originalModel, sessionId,
151
160
  failover: { isFailoverIteration, rootLogId: rootLogId },
152
161
  }, resilienceResult.attempts, resilienceResult.result, startTime);
153
- collectTransportMetrics(deps.db, apiType, resilienceResult.result, isStream, lastLogId, provider.id, resolved.backend_model, request);
162
+ collectTransportMetrics(deps.db, apiType, resilienceResult.result, isStream, lastLogId, provider.id, resolved.backend_model, request, routerKeyId, getTransportStatusCode(resilienceResult.result));
154
163
  const tr = resilienceResult.result;
155
164
  const succeeded = tr.kind === "success" || tr.kind === "stream_success" || tr.kind === "stream_abort";
156
165
  if (succeeded)
157
- deps.usageWindowTracker?.recordRequest(routerKeyId ?? undefined);
166
+ deps.usageWindowTracker?.recordRequest(provider.id, routerKeyId ?? undefined);
158
167
  if (isStream && deps.tracker) {
159
168
  const sc = deps.tracker.get(logId)?.streamContent;
160
169
  const blocks = sc?.blocks;
@@ -23,4 +23,4 @@ export declare function logResilienceResult(db: Database.Database, params: {
23
23
  sessionId?: string | null;
24
24
  failover?: FailoverContext;
25
25
  }, attempts: ResilienceAttempt[], result: TransportResult, startTime: number): string;
26
- export declare function collectTransportMetrics(db: Database.Database, apiType: "openai" | "anthropic", result: TransportResult, isStream: boolean, lastSuccessLogId: string, providerId: string, backendModel: string, request: FastifyRequest): void;
26
+ export declare function collectTransportMetrics(db: Database.Database, apiType: "openai" | "anthropic", result: TransportResult, isStream: boolean, lastSuccessLogId: string, providerId: string, backendModel: string, request: FastifyRequest, routerKeyId?: string | null, statusCode?: number | null): void;
@@ -116,8 +116,11 @@ export function logResilienceResult(db, params, attempts, result, startTime) {
116
116
  }
117
117
  return lastSuccessLogId;
118
118
  }
119
- export function collectTransportMetrics(db, apiType, result, isStream, lastSuccessLogId, providerId, backendModel, request) {
120
- const base = { request_log_id: lastSuccessLogId, provider_id: providerId, backend_model: backendModel, api_type: apiType };
119
+ export function collectTransportMetrics(db, apiType, result, isStream, lastSuccessLogId, providerId, backendModel, request, routerKeyId, statusCode) {
120
+ const base = {
121
+ request_log_id: lastSuccessLogId, provider_id: providerId, backend_model: backendModel, api_type: apiType,
122
+ router_key_id: routerKeyId ?? null, status_code: statusCode ?? null,
123
+ };
121
124
  try {
122
125
  if (isStream && (result.kind === "stream_success" || result.kind === "stream_abort")) {
123
126
  if (result.metrics) {
@@ -25,8 +25,6 @@ export interface ResilienceConfig {
25
25
  failoverThreshold: number;
26
26
  ruleMatcher?: RetryRuleMatcher;
27
27
  isFailover: boolean;
28
- /** DB 规则 max_retries 的全局安全阀,防止单规则配置导致过多重试 */
29
- globalRetryCap?: number;
30
28
  /** 全局迭代上限,防止极端配置导致 while(true) 循环过多 */
31
29
  iterationCap?: number;
32
30
  }
@@ -29,7 +29,6 @@ export function createStrategy(rule) {
29
29
  const RETRYABLE_THROW_CODES = new Set(["ETIMEDOUT", "ECONNRESET", "ECONNREFUSED"]);
30
30
  const HTTP_TOO_MANY_REQUESTS = 429;
31
31
  const DEFAULT_THROW_MAX_RETRIES = 3;
32
- const DEFAULT_RETRY_CAP = 3;
33
32
  const DEFAULT_ITERATION_CAP = 50;
34
33
  // ---------- Internal helpers ----------
35
34
  function isRetryableThrow(err) {
@@ -94,7 +93,7 @@ export class ResilienceLayer {
94
93
  const matchedRule = body && config.ruleMatcher
95
94
  ? config.ruleMatcher.match(result.statusCode, body)
96
95
  : null;
97
- if (matchedRule && state.attemptCount < Math.min(matchedRule.max_retries, config.globalRetryCap ?? DEFAULT_RETRY_CAP)) {
96
+ if (matchedRule && state.attemptCount < matchedRule.max_retries) {
98
97
  const strategy = createStrategy(matchedRule);
99
98
  const headers = extractHeaders(result);
100
99
  const retryAfterMs = result.statusCode === HTTP_TOO_MANY_REQUESTS
@@ -110,7 +109,7 @@ export class ResilienceLayer {
110
109
  const body = extractBody(result);
111
110
  if (body && config.ruleMatcher) {
112
111
  const matchedRule = config.ruleMatcher.match(result.statusCode, body);
113
- if (matchedRule && state.attemptCount < Math.min(matchedRule.max_retries, config.globalRetryCap ?? DEFAULT_RETRY_CAP)) {
112
+ if (matchedRule && state.attemptCount < matchedRule.max_retries) {
114
113
  const strategy = createStrategy(matchedRule);
115
114
  return { action: "retry", delayMs: strategy.getDelay(state.attemptCount) };
116
115
  }