llm-simple-router 0.2.0 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/README.md +11 -0
  2. package/dist/admin/monitor.d.ts +7 -0
  3. package/dist/admin/monitor.js +25 -0
  4. package/dist/admin/providers.d.ts +4 -0
  5. package/dist/admin/providers.js +57 -9
  6. package/dist/admin/retry-rules.js +6 -3
  7. package/dist/admin/routes.d.ts +4 -0
  8. package/dist/admin/routes.js +3 -1
  9. package/dist/admin/setup.js +8 -5
  10. package/dist/db/index.d.ts +1 -1
  11. package/dist/db/index.js +1 -1
  12. package/dist/db/mappings.js +6 -2
  13. package/dist/db/migrations/017_add_provider_concurrency.sql +3 -0
  14. package/dist/db/providers.d.ts +12 -1
  15. package/dist/db/providers.js +8 -3
  16. package/dist/db/retry-rules.js +4 -1
  17. package/dist/db/router-keys.js +3 -1
  18. package/dist/index.js +36 -5
  19. package/dist/metrics/sse-metrics-transform.d.ts +17 -1
  20. package/dist/metrics/sse-metrics-transform.js +33 -2
  21. package/dist/middleware/auth.js +5 -4
  22. package/dist/monitor/request-tracker.d.ts +49 -0
  23. package/dist/monitor/request-tracker.js +279 -0
  24. package/dist/monitor/runtime-collector.d.ts +11 -0
  25. package/dist/monitor/runtime-collector.js +41 -0
  26. package/dist/monitor/stats-aggregator.d.ts +22 -0
  27. package/dist/monitor/stats-aggregator.js +166 -0
  28. package/dist/monitor/types.d.ts +84 -0
  29. package/dist/monitor/types.js +1 -0
  30. package/dist/proxy/anthropic.d.ts +4 -0
  31. package/dist/proxy/anthropic.js +10 -2
  32. package/dist/proxy/enhancement-handler.js +3 -1
  33. package/dist/proxy/mapping-resolver.js +6 -2
  34. package/dist/proxy/openai.d.ts +4 -0
  35. package/dist/proxy/openai.js +10 -2
  36. package/dist/proxy/proxy-core.d.ts +6 -0
  37. package/dist/proxy/proxy-core.js +176 -85
  38. package/dist/proxy/retry.d.ts +1 -1
  39. package/dist/proxy/retry.js +3 -2
  40. package/dist/proxy/semaphore.d.ts +27 -0
  41. package/dist/proxy/semaphore.js +125 -0
  42. package/dist/utils/password.js +2 -1
  43. package/frontend-dist/assets/{CardContent-BE9fukPi.js → CardContent-DKzAH8lX.js} +1 -1
  44. package/frontend-dist/assets/{CardHeader-D5lVaeAA.js → CardHeader-BBKKDXEh.js} +1 -1
  45. package/frontend-dist/assets/{CardTitle-H-zwhi3Z.js → CardTitle-BQtpsfYd.js} +1 -1
  46. package/frontend-dist/assets/Checkbox-DhBbPKjw.js +1 -0
  47. package/frontend-dist/assets/CollapsibleTrigger-BC4bE5yr.js +1 -0
  48. package/frontend-dist/assets/Dashboard-3z77m9VQ.js +3 -0
  49. package/frontend-dist/assets/DialogTitle-Bh2A7j2j.js +1 -0
  50. package/frontend-dist/assets/Input-C5_w9X6Y.js +1 -0
  51. package/frontend-dist/assets/Label-9sUKofNb.js +1 -0
  52. package/frontend-dist/assets/Login-Dxo1j9ZV.js +1 -0
  53. package/frontend-dist/assets/Logs-KthJmRch.js +3 -0
  54. package/frontend-dist/assets/ModelMappings-Dci1SkBO.js +1 -0
  55. package/frontend-dist/assets/PopperContent-DGr-wo47.js +1 -0
  56. package/frontend-dist/assets/Providers-wZxNIXXh.js +1 -0
  57. package/frontend-dist/assets/ProxyEnhancement-CcvyXQNb.js +1 -0
  58. package/frontend-dist/assets/RetryRules-B9Cw-Ycd.js +1 -0
  59. package/frontend-dist/assets/RouterKeys-yZrj5YNm.js +1 -0
  60. package/frontend-dist/assets/{RovingFocusItem-DnIa_lwH.js → RovingFocusItem-CvMnUs02.js} +1 -1
  61. package/frontend-dist/assets/SelectValue-DhqRtJKk.js +1 -0
  62. package/frontend-dist/assets/Setup-DPk7lIZy.js +1 -0
  63. package/frontend-dist/assets/{TableHeader-D2GkiqRx.js → TableHeader-D9I1uQTp.js} +1 -1
  64. package/frontend-dist/assets/TabsTrigger-D4xjbMaQ.js +1 -0
  65. package/frontend-dist/assets/VisuallyHiddenInput-Dr8wp-H0.js +1 -0
  66. package/frontend-dist/assets/alert-dialog-ChPy9vB2.js +1 -0
  67. package/frontend-dist/assets/badge-HGT44FNA.js +3 -0
  68. package/frontend-dist/assets/{button-C4_mChkc.js → button-B3kgf-D2.js} +1 -1
  69. package/frontend-dist/assets/client-BBW9-06a.js +12 -0
  70. package/frontend-dist/assets/createLucideIcon-CCI4wMy0.js +1 -0
  71. package/frontend-dist/assets/dialog-BFhbK4vw.js +1 -0
  72. package/frontend-dist/assets/index-DSrFEJ7Y.css +1 -0
  73. package/frontend-dist/assets/index-SUCErp6B.js +1 -0
  74. package/frontend-dist/assets/lib-D-4ywYag.js +1 -0
  75. package/frontend-dist/assets/ohash.D__AXeF1-Bj3Sy1wQ.js +1 -0
  76. package/frontend-dist/assets/useClipboard-XyA4kDfF.js +1 -0
  77. package/frontend-dist/assets/useForwardExpose-CIZH3-CG.js +1 -0
  78. package/frontend-dist/assets/x-CTNEl6Fz.js +1 -0
  79. package/frontend-dist/index.html +7 -6
  80. package/package.json +1 -1
  81. package/frontend-dist/assets/Checkbox--1gw0dYW.js +0 -1
  82. package/frontend-dist/assets/CollapsibleTrigger-D_ptA35Y.js +0 -1
  83. package/frontend-dist/assets/Dashboard-D4AwkULO.js +0 -3
  84. package/frontend-dist/assets/Label-GiPfoz7u.js +0 -1
  85. package/frontend-dist/assets/Login-BUet1sbM.js +0 -1
  86. package/frontend-dist/assets/Logs-yztb_F9t.js +0 -3
  87. package/frontend-dist/assets/ModelMappings-MbZhdPNv.js +0 -1
  88. package/frontend-dist/assets/Providers-BjsqH6A2.js +0 -1
  89. package/frontend-dist/assets/RetryRules-C2vvJvLr.js +0 -1
  90. package/frontend-dist/assets/RouterKeys-DavrgpAQ.js +0 -1
  91. package/frontend-dist/assets/SelectValue-BB0Ckbjh.js +0 -1
  92. package/frontend-dist/assets/alert-dialog-CWjBke-O.js +0 -1
  93. package/frontend-dist/assets/badge-_ZHrMEpC.js +0 -3
  94. package/frontend-dist/assets/client-BWw0R36V.js +0 -12
  95. package/frontend-dist/assets/dialog-CUHMcTqp.js +0 -1
  96. package/frontend-dist/assets/index-DEl48bm9.css +0 -1
  97. package/frontend-dist/assets/index-UZK1BnPG.js +0 -1
  98. package/frontend-dist/assets/lib-Qs8xoTas.js +0 -1
  99. package/frontend-dist/assets/useForwardExpose-B-xauF1X.js +0 -1
  100. package/frontend-dist/assets/x-JBJB26JV.js +0 -1
@@ -4,6 +4,7 @@ import { isInitialized } from "../db/settings.js";
4
4
  import { insertRequestLog } from "../db/logs.js";
5
5
  const SKIP_PATHS = ["/health", "/admin"];
6
6
  const HTTP_UNAUTHORIZED = 401;
7
+ const HTTP_SERVICE_UNAVAILABLE = 503;
7
8
  const BEARER_PREFIX_LENGTH = "Bearer ".length;
8
9
  function shouldSkipAuth(url) {
9
10
  const path = url.split("?")[0];
@@ -56,15 +57,15 @@ const authMiddlewareRaw = (app, options, done) => {
56
57
  // 未初始化时代理层不可用
57
58
  if (!isInitialized(options.db)) {
58
59
  if (proxyApiType) {
59
- logRejectedAuth(options.db, proxyApiType, 503, "Service not initialized", request);
60
+ logRejectedAuth(options.db, proxyApiType, HTTP_SERVICE_UNAVAILABLE, "Service not initialized", request);
60
61
  }
61
- reply.code(503).send({ error: { message: "Service not initialized" } });
62
+ reply.code(HTTP_SERVICE_UNAVAILABLE).send({ error: { message: "Service not initialized" } });
62
63
  return reply;
63
64
  }
64
65
  const authHeader = request.headers.authorization;
65
66
  if (!authHeader || !authHeader.startsWith("Bearer ")) {
66
67
  if (proxyApiType) {
67
- logRejectedAuth(options.db, proxyApiType, 401, "Invalid API key", request);
68
+ logRejectedAuth(options.db, proxyApiType, HTTP_UNAUTHORIZED, "Invalid API key", request);
68
69
  }
69
70
  unauthorizedReply(reply);
70
71
  return reply;
@@ -74,7 +75,7 @@ const authMiddlewareRaw = (app, options, done) => {
74
75
  const row = stmt.get(hash);
75
76
  if (!row) {
76
77
  if (proxyApiType) {
77
- logRejectedAuth(options.db, proxyApiType, 401, "Invalid API key", request);
78
+ logRejectedAuth(options.db, proxyApiType, HTTP_UNAUTHORIZED, "Invalid API key", request);
78
79
  }
79
80
  unauthorizedReply(reply);
80
81
  return reply;
@@ -0,0 +1,49 @@
1
+ import type { ServerResponse } from "node:http";
2
+ import { StatsAggregator } from "./stats-aggregator.js";
3
+ import { RuntimeCollector } from "./runtime-collector.js";
4
+ import type { ProviderSemaphoreManager } from "../proxy/semaphore.js";
5
+ import type { ActiveRequest, ProviderConcurrencySnapshot, RuntimeMetrics, StatsSnapshot } from "./types.js";
6
+ export declare class RequestTracker {
7
+ private activeMap;
8
+ private recentCompleted;
9
+ private clients;
10
+ private providerConfigCache;
11
+ private pushTimer;
12
+ private tickCount;
13
+ /** Visible for testing */
14
+ readonly statsAggregator: StatsAggregator;
15
+ readonly runtimeCollector: RuntimeCollector;
16
+ private readonly semaphoreManager?;
17
+ constructor(deps?: {
18
+ semaphoreManager?: ProviderSemaphoreManager;
19
+ runtimeCollector?: RuntimeCollector;
20
+ });
21
+ start(req: ActiveRequest): void;
22
+ update(id: string, patch: Partial<ActiveRequest>): void;
23
+ appendStreamChunk(id: string, rawLine: string, apiType: "openai" | "anthropic", maxRaw: number, maxText: number): void;
24
+ complete(id: string, result: {
25
+ status: "completed" | "failed";
26
+ statusCode?: number;
27
+ }): void;
28
+ getActive(): ActiveRequest[];
29
+ getRecent(limit?: number): ActiveRequest[];
30
+ get(id: string): ActiveRequest | undefined;
31
+ getStats(): StatsSnapshot;
32
+ getConcurrency(): ProviderConcurrencySnapshot[];
33
+ getRuntime(): RuntimeMetrics;
34
+ addClient(res: ServerResponse): void;
35
+ removeClient(res: ServerResponse): void;
36
+ startPushInterval(): void;
37
+ stopPushInterval(): void;
38
+ broadcast(event: string, data: unknown): void;
39
+ updateProviderConfig(providerId: string, config: {
40
+ name: string;
41
+ maxConcurrency: number;
42
+ queueTimeoutMs: number;
43
+ maxQueueSize: number;
44
+ }): void;
45
+ removeProviderConfig(providerId: string): void;
46
+ private cleanupRecent;
47
+ /** 最终一致性兜底:清理异常残留的 active 条目 */
48
+ private cleanupStaleActive;
49
+ }
@@ -0,0 +1,279 @@
1
+ import { StatsAggregator } from "./stats-aggregator.js";
2
+ import { RuntimeCollector } from "./runtime-collector.js";
3
+ function extractStreamText(line, apiType) {
4
+ const empty = { text: '', block: null };
5
+ if (!line.startsWith(SSE_DATA_PREFIX))
6
+ return empty;
7
+ const jsonStr = line.slice(SSE_DATA_PREFIX.length);
8
+ if (jsonStr === '[DONE]')
9
+ return empty;
10
+ let obj;
11
+ try {
12
+ obj = JSON.parse(jsonStr);
13
+ }
14
+ catch {
15
+ return empty;
16
+ }
17
+ if (apiType === 'openai') {
18
+ const choices = obj.choices;
19
+ const delta = choices?.[0]?.delta;
20
+ const text = delta?.content ?? '';
21
+ return { text, block: text ? { index: 0, type: 'text', content: text } : null };
22
+ }
23
+ // Anthropic
24
+ const type = obj.type;
25
+ const index = obj.index;
26
+ const delta = obj.delta;
27
+ if (type === 'content_block_start') {
28
+ const contentBlock = obj.content_block;
29
+ const blockType = contentBlock?.type;
30
+ const name = blockType === 'tool_use' ? contentBlock?.name : undefined;
31
+ if (blockType === 'thinking' || blockType === 'text' || blockType === 'tool_use') {
32
+ return { text: '', block: { index: index ?? 0, type: blockType, content: '', name } };
33
+ }
34
+ return empty;
35
+ }
36
+ if (type === 'content_block_delta' && delta) {
37
+ const deltaType = delta.type;
38
+ if (deltaType === 'thinking_delta') {
39
+ const thinking = delta.thinking ?? '';
40
+ return { text: '', block: { index: index ?? 0, type: 'thinking', content: thinking } };
41
+ }
42
+ if (deltaType === 'text_delta') {
43
+ const text = delta.text ?? '';
44
+ return { text, block: { index: index ?? 0, type: 'text', content: text } };
45
+ }
46
+ if (deltaType === 'input_json_delta') {
47
+ const partialJson = delta.partial_json ?? '';
48
+ return { text: '', block: { index: index ?? 0, type: 'tool_use', content: partialJson } };
49
+ }
50
+ }
51
+ return empty;
52
+ }
53
+ const SSE_DATA_PREFIX = "data: ";
54
+ const RUNTIME_PUSH_TICK_INTERVAL = 2;
55
+ const RECENT_COMPLETED_MAX = 200;
56
+ const RECENT_TTL_MS = 5 * 60 * 1000; // eslint-disable-line no-magic-numbers
57
+ const ACTIVE_MAX_AGE_MS = 60 * 60 * 1000; // eslint-disable-line no-magic-numbers
58
+ const PUSH_INTERVAL_MS = 5000;
59
+ export class RequestTracker {
60
+ activeMap = new Map();
61
+ recentCompleted = [];
62
+ clients = new Set();
63
+ providerConfigCache = new Map();
64
+ pushTimer = null;
65
+ tickCount = 0;
66
+ /** Visible for testing */
67
+ statsAggregator;
68
+ runtimeCollector;
69
+ semaphoreManager;
70
+ constructor(deps) {
71
+ this.semaphoreManager = deps?.semaphoreManager;
72
+ this.runtimeCollector = deps?.runtimeCollector ?? new RuntimeCollector();
73
+ this.statsAggregator = new StatsAggregator();
74
+ }
75
+ // --- Core methods ---
76
+ start(req) {
77
+ this.activeMap.set(req.id, { ...req });
78
+ this.broadcast("request_start", req);
79
+ }
80
+ update(id, patch) {
81
+ const req = this.activeMap.get(id);
82
+ if (!req)
83
+ return;
84
+ const prevQueued = req.queued;
85
+ Object.assign(req, patch);
86
+ // queued 状态变化时立即广播,让前端即时看到排队/取消排队
87
+ if (patch.queued !== undefined && patch.queued !== prevQueued) {
88
+ this.broadcast("request_update", this.getActive());
89
+ }
90
+ }
91
+ appendStreamChunk(id, rawLine, apiType, maxRaw, maxText) {
92
+ const req = this.activeMap.get(id);
93
+ if (!req)
94
+ return;
95
+ if (!req.streamContent) {
96
+ req.streamContent = { rawChunks: "", textContent: "", totalChars: 0, blocks: [] };
97
+ }
98
+ const sc = req.streamContent;
99
+ sc.totalChars += rawLine.length;
100
+ // 环形缓冲区:超过限制时截断保留尾部
101
+ sc.rawChunks += rawLine + "\n";
102
+ if (sc.rawChunks.length > maxRaw) {
103
+ sc.rawChunks = sc.rawChunks.slice(-maxRaw);
104
+ }
105
+ // 初始化 blocks 数组
106
+ if (!sc.blocks) {
107
+ sc.blocks = [];
108
+ }
109
+ const extracted = extractStreamText(rawLine, apiType);
110
+ // 拼接纯文本(text 和 text_delta)
111
+ if (extracted.text) {
112
+ sc.textContent += extracted.text;
113
+ if (sc.textContent.length > maxText) {
114
+ sc.textContent = sc.textContent.slice(-maxText);
115
+ }
116
+ }
117
+ // 维护结构化内容块
118
+ if (extracted.block) {
119
+ const { index, type, content, name } = extracted.block;
120
+ while (sc.blocks.length <= index) {
121
+ sc.blocks.push({ type: 'text', content: '' });
122
+ }
123
+ if (name) {
124
+ sc.blocks[index].name = name;
125
+ }
126
+ if (content === '' && type !== 'text') {
127
+ sc.blocks[index].type = type;
128
+ }
129
+ else if (content) {
130
+ sc.blocks[index].content += content;
131
+ sc.blocks[index].type = type;
132
+ }
133
+ const MAX_BLOCK_CONTENT = maxText;
134
+ for (const block of sc.blocks) {
135
+ if (block.content.length > MAX_BLOCK_CONTENT) {
136
+ block.content = block.content.slice(-MAX_BLOCK_CONTENT);
137
+ }
138
+ }
139
+ }
140
+ }
141
+ complete(id, result) {
142
+ const req = this.activeMap.get(id);
143
+ if (!req)
144
+ return;
145
+ const now = Date.now();
146
+ const latency = now - req.startTime;
147
+ const statusCode = result.statusCode ?? 0;
148
+ this.statsAggregator.recordLatency(latency);
149
+ this.statsAggregator.recordRequest(req.providerId, req.providerName, statusCode, req.retryCount > 0, false);
150
+ this.statsAggregator.recordProviderLatency(req.providerId, latency);
151
+ const completed = {
152
+ ...req,
153
+ status: result.status,
154
+ completedAt: now,
155
+ };
156
+ this.activeMap.delete(id);
157
+ this.recentCompleted.unshift(completed);
158
+ if (this.recentCompleted.length > RECENT_COMPLETED_MAX) {
159
+ this.recentCompleted.length = RECENT_COMPLETED_MAX;
160
+ }
161
+ this.broadcast("request_complete", completed);
162
+ }
163
+ // --- Query methods ---
164
+ getActive() {
165
+ const result = [];
166
+ for (const req of this.activeMap.values()) {
167
+ if (req.status === "pending")
168
+ result.push(req);
169
+ }
170
+ return result;
171
+ }
172
+ getRecent(limit) {
173
+ const list = limit != null ? this.recentCompleted.slice(0, limit) : this.recentCompleted;
174
+ return list;
175
+ }
176
+ get(id) {
177
+ return this.activeMap.get(id) ?? this.recentCompleted.find((r) => r.id === id);
178
+ }
179
+ // --- Stats / monitoring ---
180
+ getStats() {
181
+ return this.statsAggregator.getStats();
182
+ }
183
+ getConcurrency() {
184
+ if (!this.semaphoreManager)
185
+ return [];
186
+ const result = [];
187
+ for (const [providerId, config] of this.providerConfigCache) {
188
+ const status = this.semaphoreManager.getStatus(providerId);
189
+ result.push({
190
+ providerId,
191
+ providerName: config.name,
192
+ maxConcurrency: config.maxConcurrency,
193
+ active: status.active,
194
+ queued: status.queued,
195
+ queueTimeoutMs: config.queueTimeoutMs,
196
+ maxQueueSize: config.maxQueueSize,
197
+ });
198
+ }
199
+ return result;
200
+ }
201
+ getRuntime() {
202
+ return this.runtimeCollector.collect();
203
+ }
204
+ // --- SSE client management ---
205
+ addClient(res) {
206
+ this.clients.add(res);
207
+ res.on("close", () => {
208
+ this.clients.delete(res);
209
+ });
210
+ }
211
+ removeClient(res) {
212
+ this.clients.delete(res);
213
+ }
214
+ // --- Push interval ---
215
+ startPushInterval() {
216
+ if (this.pushTimer)
217
+ return;
218
+ this.tickCount = 0;
219
+ this.pushTimer = setInterval(() => {
220
+ this.tickCount++;
221
+ this.cleanupRecent();
222
+ this.cleanupStaleActive();
223
+ this.broadcast("request_update", this.getActive());
224
+ this.broadcast("concurrency_update", this.getConcurrency());
225
+ this.broadcast("stats_update", this.getStats());
226
+ // Every 10s (every 2nd tick)
227
+ if (this.tickCount % RUNTIME_PUSH_TICK_INTERVAL === 0) {
228
+ this.broadcast("runtime_update", this.getRuntime());
229
+ }
230
+ }, PUSH_INTERVAL_MS);
231
+ }
232
+ stopPushInterval() {
233
+ if (this.pushTimer) {
234
+ clearInterval(this.pushTimer);
235
+ this.pushTimer = null;
236
+ }
237
+ }
238
+ broadcast(event, data) {
239
+ const msg = `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
240
+ for (const client of this.clients) {
241
+ try {
242
+ if (!client.writableEnded)
243
+ client.write(msg);
244
+ }
245
+ catch {
246
+ this.clients.delete(client);
247
+ }
248
+ }
249
+ }
250
+ // --- Provider config cache ---
251
+ updateProviderConfig(providerId, config) {
252
+ this.providerConfigCache.set(providerId, config);
253
+ }
254
+ removeProviderConfig(providerId) {
255
+ this.providerConfigCache.delete(providerId);
256
+ }
257
+ // --- Internal ---
258
+ cleanupRecent() {
259
+ const cutoff = Date.now() - RECENT_TTL_MS;
260
+ // recentCompleted is sorted desc by completedAt, so we find the cutoff index
261
+ let i = 0;
262
+ for (; i < this.recentCompleted.length; i++) {
263
+ if (this.recentCompleted[i].completedAt != null &&
264
+ this.recentCompleted[i].completedAt < cutoff) {
265
+ break;
266
+ }
267
+ }
268
+ this.recentCompleted = this.recentCompleted.slice(0, Math.min(i, RECENT_COMPLETED_MAX));
269
+ }
270
+ /** 最终一致性兜底:清理异常残留的 active 条目 */
271
+ cleanupStaleActive() {
272
+ const cutoff = Date.now() - ACTIVE_MAX_AGE_MS;
273
+ for (const [id, req] of this.activeMap) {
274
+ if (req.startTime < cutoff) {
275
+ this.activeMap.delete(id);
276
+ }
277
+ }
278
+ }
279
+ }
@@ -0,0 +1,11 @@
1
+ import type { RuntimeMetrics } from "./types.js";
2
+ export declare class RuntimeCollector {
3
+ private histogram?;
4
+ /** Start monitoring the event loop delay histogram */
5
+ start(): void;
6
+ /** Stop monitoring and disable the histogram */
7
+ stop(): void;
8
+ /** Collect a single runtime metrics snapshot */
9
+ collect(): RuntimeMetrics;
10
+ private getEventLoopDelayMs;
11
+ }
@@ -0,0 +1,41 @@
1
+ import { performance } from "node:perf_hooks";
2
+ const MS_PER_SECOND = 1000;
3
+ const NS_PER_MS = 1e6;
4
+ const perf = performance;
5
+ const proc = process;
6
+ export class RuntimeCollector {
7
+ histogram;
8
+ /** Start monitoring the event loop delay histogram */
9
+ start() {
10
+ if (this.histogram)
11
+ return;
12
+ if (typeof perf.monitorEventLoopDelay !== "function")
13
+ return;
14
+ this.histogram = perf.monitorEventLoopDelay({ resolution: 1 });
15
+ this.histogram.enable();
16
+ }
17
+ /** Stop monitoring and disable the histogram */
18
+ stop() {
19
+ if (this.histogram) {
20
+ this.histogram.disable();
21
+ this.histogram = undefined;
22
+ }
23
+ }
24
+ /** Collect a single runtime metrics snapshot */
25
+ collect() {
26
+ return {
27
+ uptimeMs: process.uptime() * MS_PER_SECOND,
28
+ memoryUsage: process.memoryUsage(),
29
+ activeHandles: proc._getActiveHandles().length,
30
+ activeRequests: proc._getActiveRequests().length,
31
+ eventLoopDelayMs: this.getEventLoopDelayMs(),
32
+ };
33
+ }
34
+ getEventLoopDelayMs() {
35
+ if (!this.histogram) {
36
+ return 0;
37
+ }
38
+ // mean is in nanoseconds; convert to milliseconds
39
+ return this.histogram.mean / NS_PER_MS;
40
+ }
41
+ }
@@ -0,0 +1,22 @@
1
+ import type { StatsSnapshot } from "./types.js";
2
+ export declare class StatsAggregator {
3
+ private latencyBuffer;
4
+ private totalRequests;
5
+ private successCount;
6
+ private errorCount;
7
+ private retryCount;
8
+ private failoverCount;
9
+ private byStatusCode;
10
+ private providers;
11
+ private providerNames;
12
+ constructor(capacity?: number);
13
+ recordLatency(ms: number): void;
14
+ recordRequest(providerId: string, providerName: string, statusCode: number, isRetry: boolean, isFailover: boolean): void;
15
+ /**
16
+ * Associate a latency sample with a provider for per-provider avgLatencyMs.
17
+ * Must be called alongside recordRequest for accurate per-provider latency.
18
+ */
19
+ recordProviderLatency(providerId: string, ms: number): void;
20
+ getStats(): StatsSnapshot;
21
+ reset(): void;
22
+ }
@@ -0,0 +1,166 @@
1
+ /**
2
+ * Fixed-size circular buffer for latency samples.
3
+ * Overwrites oldest entries when full — O(1) append.
4
+ */
5
+ class RingBuffer {
6
+ capacity;
7
+ buf;
8
+ head = 0; // next write position
9
+ len = 0;
10
+ constructor(capacity) {
11
+ this.capacity = capacity;
12
+ this.buf = new Array(capacity);
13
+ }
14
+ push(value) {
15
+ this.buf[this.head] = value;
16
+ this.head = (this.head + 1) % this.capacity;
17
+ if (this.len < this.capacity)
18
+ this.len++;
19
+ }
20
+ /** Returns a sorted copy of current values. */
21
+ sorted() {
22
+ const slice = this.buf.slice(0, this.len);
23
+ slice.sort((a, b) => a - b);
24
+ return slice;
25
+ }
26
+ clear() {
27
+ this.head = 0;
28
+ this.len = 0;
29
+ }
30
+ }
31
+ function emptyAccumulator() {
32
+ return {
33
+ totalRequests: 0,
34
+ successCount: 0,
35
+ errorCount: 0,
36
+ retryCount: 0,
37
+ latencySum: 0,
38
+ latencyCount: 0,
39
+ errorsByCode: new Map(),
40
+ };
41
+ }
42
+ const TOP_ERRORS_LIMIT = 5;
43
+ const DEFAULT_CAPACITY = 1000;
44
+ const HTTP_SUCCESS_RANGE_MIN = 200;
45
+ const HTTP_SUCCESS_RANGE_MAX = 400;
46
+ const PERCENTILE_P50 = 0.5;
47
+ const PERCENTILE_P99 = 0.99;
48
+ export class StatsAggregator {
49
+ latencyBuffer;
50
+ totalRequests = 0;
51
+ successCount = 0;
52
+ errorCount = 0;
53
+ retryCount = 0;
54
+ failoverCount = 0;
55
+ byStatusCode = new Map();
56
+ providers = new Map();
57
+ providerNames = new Map();
58
+ constructor(capacity = DEFAULT_CAPACITY) {
59
+ this.latencyBuffer = new RingBuffer(Math.max(1, capacity));
60
+ }
61
+ recordLatency(ms) {
62
+ this.latencyBuffer.push(ms);
63
+ }
64
+ recordRequest(providerId, providerName, statusCode, isRetry, isFailover) {
65
+ this.totalRequests++;
66
+ this.providerNames.set(providerId, providerName);
67
+ // Global status code counters
68
+ this.byStatusCode.set(statusCode, (this.byStatusCode.get(statusCode) ?? 0) + 1);
69
+ if (statusCode >= HTTP_SUCCESS_RANGE_MIN && statusCode < HTTP_SUCCESS_RANGE_MAX) {
70
+ this.successCount++;
71
+ }
72
+ else {
73
+ this.errorCount++;
74
+ }
75
+ if (isRetry)
76
+ this.retryCount++;
77
+ if (isFailover)
78
+ this.failoverCount++;
79
+ // Per-provider accumulator
80
+ let acc = this.providers.get(providerId);
81
+ if (!acc) {
82
+ acc = emptyAccumulator();
83
+ this.providers.set(providerId, acc);
84
+ }
85
+ acc.totalRequests++;
86
+ if (statusCode >= HTTP_SUCCESS_RANGE_MIN && statusCode < HTTP_SUCCESS_RANGE_MAX) {
87
+ acc.successCount++;
88
+ }
89
+ else {
90
+ acc.errorCount++;
91
+ acc.errorsByCode.set(statusCode, (acc.errorsByCode.get(statusCode) ?? 0) + 1);
92
+ }
93
+ if (isRetry)
94
+ acc.retryCount++;
95
+ }
96
+ /**
97
+ * Associate a latency sample with a provider for per-provider avgLatencyMs.
98
+ * Must be called alongside recordRequest for accurate per-provider latency.
99
+ */
100
+ recordProviderLatency(providerId, ms) {
101
+ let acc = this.providers.get(providerId);
102
+ if (!acc) {
103
+ acc = emptyAccumulator();
104
+ this.providers.set(providerId, acc);
105
+ }
106
+ acc.latencySum += ms;
107
+ acc.latencyCount++;
108
+ }
109
+ getStats() {
110
+ const sorted = this.latencyBuffer.sorted();
111
+ const count = sorted.length;
112
+ const avgLatencyMs = count > 0 ? sorted.reduce((s, v) => s + v, 0) / count : 0;
113
+ const p50LatencyMs = count > 0 ? percentile(sorted, PERCENTILE_P50) : 0;
114
+ const p99LatencyMs = count > 0 ? percentile(sorted, PERCENTILE_P99) : 0;
115
+ const byProvider = {};
116
+ for (const [id, acc] of this.providers) {
117
+ const topErrors = [...acc.errorsByCode.entries()]
118
+ .map(([code, count]) => ({ code, count }))
119
+ .sort((a, b) => b.count - a.count)
120
+ .slice(0, TOP_ERRORS_LIMIT);
121
+ byProvider[id] = {
122
+ providerName: this.providerNames.get(id) ?? id,
123
+ totalRequests: acc.totalRequests,
124
+ successCount: acc.successCount,
125
+ errorCount: acc.errorCount,
126
+ avgLatencyMs: acc.latencyCount > 0
127
+ ? acc.latencySum / acc.latencyCount
128
+ : 0,
129
+ retryCount: acc.retryCount,
130
+ topErrors,
131
+ };
132
+ }
133
+ const byStatusCode = {};
134
+ for (const [code, cnt] of this.byStatusCode) {
135
+ byStatusCode[code] = cnt;
136
+ }
137
+ return {
138
+ totalRequests: this.totalRequests,
139
+ successCount: this.successCount,
140
+ errorCount: this.errorCount,
141
+ retryCount: this.retryCount,
142
+ failoverCount: this.failoverCount,
143
+ avgLatencyMs,
144
+ p50LatencyMs,
145
+ p99LatencyMs,
146
+ byProvider,
147
+ byStatusCode,
148
+ };
149
+ }
150
+ reset() {
151
+ this.latencyBuffer.clear();
152
+ this.totalRequests = 0;
153
+ this.successCount = 0;
154
+ this.errorCount = 0;
155
+ this.retryCount = 0;
156
+ this.failoverCount = 0;
157
+ this.byStatusCode.clear();
158
+ this.providers.clear();
159
+ this.providerNames.clear();
160
+ }
161
+ }
162
+ /** Nearest-rank percentile on a pre-sorted ascending array. */
163
+ function percentile(sorted, p) {
164
+ const idx = Math.ceil(p * sorted.length) - 1;
165
+ return sorted[Math.max(0, Math.min(idx, sorted.length - 1))];
166
+ }
@@ -0,0 +1,84 @@
1
+ export interface ContentBlock {
2
+ type: 'thinking' | 'text' | 'tool_use';
3
+ content: string;
4
+ name?: string;
5
+ }
6
+ export interface StreamContentSnapshot {
7
+ /** 最近的原始 SSE 文本,环形缓冲区(最多 ~8KB) */
8
+ rawChunks: string;
9
+ /** 从 SSE 事件中提取并拼接的文本内容(最多 ~4KB) */
10
+ textContent: string;
11
+ /** 累计接收的流字符数 */
12
+ totalChars: number;
13
+ blocks?: ContentBlock[];
14
+ }
15
+ export interface ActiveRequest {
16
+ id: string;
17
+ apiType: "openai" | "anthropic";
18
+ model: string;
19
+ providerId: string;
20
+ providerName: string;
21
+ isStream: boolean;
22
+ queued?: boolean;
23
+ startTime: number;
24
+ status: "pending" | "completed" | "failed";
25
+ retryCount: number;
26
+ attempts: AttemptSnapshot[];
27
+ streamMetrics?: StreamMetricsSnapshot;
28
+ streamContent?: StreamContentSnapshot;
29
+ clientIp?: string;
30
+ completedAt?: number;
31
+ }
32
+ export interface AttemptSnapshot {
33
+ statusCode: number | null;
34
+ error: string | null;
35
+ latencyMs: number;
36
+ providerId: string;
37
+ }
38
+ export interface StreamMetricsSnapshot {
39
+ inputTokens: number | null;
40
+ outputTokens: number | null;
41
+ ttftMs: number | null;
42
+ stopReason: string | null;
43
+ isComplete: boolean;
44
+ }
45
+ export interface ProviderConcurrencySnapshot {
46
+ providerId: string;
47
+ providerName: string;
48
+ maxConcurrency: number;
49
+ active: number;
50
+ queued: number;
51
+ queueTimeoutMs: number;
52
+ maxQueueSize: number;
53
+ }
54
+ export interface StatsSnapshot {
55
+ totalRequests: number;
56
+ successCount: number;
57
+ errorCount: number;
58
+ retryCount: number;
59
+ failoverCount: number;
60
+ avgLatencyMs: number;
61
+ p50LatencyMs: number;
62
+ p99LatencyMs: number;
63
+ byProvider: Record<string, ProviderStats>;
64
+ byStatusCode: Record<number, number>;
65
+ }
66
+ export interface ProviderStats {
67
+ providerName: string;
68
+ totalRequests: number;
69
+ successCount: number;
70
+ errorCount: number;
71
+ avgLatencyMs: number;
72
+ retryCount: number;
73
+ topErrors: Array<{
74
+ code: number;
75
+ count: number;
76
+ }>;
77
+ }
78
+ export interface RuntimeMetrics {
79
+ uptimeMs: number;
80
+ memoryUsage: NodeJS.MemoryUsage;
81
+ activeHandles: number;
82
+ activeRequests: number;
83
+ eventLoopDelayMs: number;
84
+ }
@@ -0,0 +1 @@
1
+ export {};