llm-simple-router 0.10.5 → 0.10.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/dist/config/model-context.d.ts +2 -0
  2. package/dist/config/model-context.js +15 -4
  3. package/dist/core/concurrency/adaptive-controller.d.ts +2 -0
  4. package/dist/core/concurrency/adaptive-controller.js +58 -36
  5. package/dist/core/concurrency/types.d.ts +3 -1
  6. package/dist/core/monitor/request-tracker.d.ts +1 -0
  7. package/dist/core/monitor/request-tracker.js +7 -1
  8. package/dist/core/types.d.ts +2 -0
  9. package/dist/db/helpers.d.ts +1 -0
  10. package/dist/db/helpers.js +15 -0
  11. package/dist/db/index.js +6 -0
  12. package/dist/db/logs.d.ts +1 -1
  13. package/dist/db/logs.js +35 -18
  14. package/dist/db/metrics.js +2 -1
  15. package/dist/db/migrations/044_add_performance_indexes.sql +8 -0
  16. package/dist/db/settings.js +22 -5
  17. package/dist/index.js +2 -0
  18. package/dist/metrics/metrics-extractor.d.ts +8 -3
  19. package/dist/metrics/metrics-extractor.js +33 -20
  20. package/dist/metrics/sse-parser.js +5 -2
  21. package/dist/middleware/auth.d.ts +1 -1
  22. package/dist/middleware/auth.js +12 -20
  23. package/dist/proxy/handler/create-proxy-handler.js +6 -2
  24. package/dist/proxy/handler/failover-loop.js +40 -22
  25. package/dist/proxy/hooks/builtin/allowed-models.js +8 -18
  26. package/dist/proxy/hooks/builtin/cache-estimation.js +4 -0
  27. package/dist/proxy/orchestration/orchestrator.js +5 -3
  28. package/dist/proxy/orchestration/resilience.js +4 -2
  29. package/dist/proxy/proxy-logging.d.ts +1 -1
  30. package/dist/proxy/proxy-logging.js +32 -17
  31. package/dist/proxy/routing/enhancement-config.d.ts +2 -0
  32. package/dist/proxy/routing/enhancement-config.js +21 -4
  33. package/dist/proxy/routing/mapping-resolver.d.ts +3 -1
  34. package/dist/proxy/routing/mapping-resolver.js +4 -2
  35. package/dist/proxy/transport/proxy-agent.d.ts +8 -0
  36. package/dist/proxy/transport/proxy-agent.js +21 -0
  37. package/dist/proxy/transport/stream.js +25 -5
  38. package/dist/proxy/transport/transport-fn.js +3 -1
  39. package/dist/storage/log-file-writer.d.ts +8 -1
  40. package/dist/storage/log-file-writer.js +41 -3
  41. package/frontend-dist/assets/{CardContent-CkdwrZW4.js → CardContent-CyvsM1dh.js} +1 -1
  42. package/frontend-dist/assets/{CardTitle-DSy7RCiB.js → CardTitle-CKxB6jLn.js} +1 -1
  43. package/frontend-dist/assets/{Checkbox-C3NmNtqa.js → Checkbox-RMorZJxv.js} +1 -1
  44. package/frontend-dist/assets/CollapsibleContent-lX7fgPmd.js +1 -0
  45. package/frontend-dist/assets/CollapsibleTrigger-BGLyyoL-.js +1 -0
  46. package/frontend-dist/assets/Dashboard-CUUJmaq6.js +3 -0
  47. package/frontend-dist/assets/{Input-CWz3gSq7.js → Input-qvtpg1mU.js} +1 -1
  48. package/frontend-dist/assets/Label-KbeWdVSH.js +1 -0
  49. package/frontend-dist/assets/Login-CFEaXT7h.js +1 -0
  50. package/frontend-dist/assets/Logs-ItgRttZ7.js +1 -0
  51. package/frontend-dist/assets/MappingEntryEditor-JtB6vvsH.js +1 -0
  52. package/frontend-dist/assets/ModelCard-C0-g-WOb.js +1 -0
  53. package/frontend-dist/assets/ModelMappings-RVrNS0aC.js +1 -0
  54. package/frontend-dist/assets/Monitor-3xt32Bbl.js +1 -0
  55. package/frontend-dist/assets/Providers-Ro-AjfzZ.js +1 -0
  56. package/frontend-dist/assets/ProxyEnhancement-BlDtv_4M.js +1 -0
  57. package/frontend-dist/assets/QuickSetup-lEC9e6wW.js +1 -0
  58. package/frontend-dist/assets/RetryRules-DJMh2I-9.js +1 -0
  59. package/frontend-dist/assets/RouterKeys-DvXfMOW6.js +1 -0
  60. package/frontend-dist/assets/{RovingFocusItem-BLxXLvHz.js → RovingFocusItem-CTJ2Vqf3.js} +1 -1
  61. package/frontend-dist/assets/Schedules-DKWJFNUk.js +1 -0
  62. package/frontend-dist/assets/Settings-DkIN3IsO.js +6 -0
  63. package/frontend-dist/assets/Setup-DIGMoSiu.js +1 -0
  64. package/frontend-dist/assets/{Switch-Cvlk-GzL.js → Switch-EL-MDXz3.js} +1 -1
  65. package/frontend-dist/assets/{TooltipTrigger-Caej0jjH.js → TooltipTrigger-B5rqpLBL.js} +1 -1
  66. package/frontend-dist/assets/TransformRulesForm-CrYYvjo8.js +1 -0
  67. package/frontend-dist/assets/UnifiedRequestDialog-Dzes8usX.js +3 -0
  68. package/frontend-dist/assets/{VisuallyHiddenInput-DVhdgqSs.js → VisuallyHiddenInput-CNa5CPMY.js} +1 -1
  69. package/frontend-dist/assets/{button--Qf6nmZk.js → button-Bq3kR6lP.js} +2 -2
  70. package/frontend-dist/assets/{copy-DBByuQcn.js → copy-B7rgYQq3.js} +1 -1
  71. package/frontend-dist/assets/{dashboard-B1pq4be7.js → dashboard-COCyp2p_.js} +1 -1
  72. package/frontend-dist/assets/{dashboard-BVRlMB_W.js → dashboard-DjgmcUG5.js} +1 -1
  73. package/frontend-dist/assets/dialog-XzDsW73J.js +1 -0
  74. package/frontend-dist/assets/index-Ba1Z6tMV.js +3 -0
  75. package/frontend-dist/assets/mappings-6w7mc8YK.js +1 -0
  76. package/frontend-dist/assets/mappings-C1fK_e70.js +1 -0
  77. package/frontend-dist/assets/{schedules-d2NQ-xEH.js → schedules-Bd66RL7P.js} +1 -1
  78. package/frontend-dist/assets/{schedules-Dul_xl7u.js → schedules-HDwMuDgX.js} +1 -1
  79. package/frontend-dist/assets/{trash-2-C1sEBLn-.js → trash-2-DCsvygvC.js} +1 -1
  80. package/frontend-dist/assets/{useClipboard-BQ-_hkN0.js → useClipboard-D32vuT2b.js} +1 -1
  81. package/frontend-dist/assets/{useLogRetention-PhhUFWsW.js → useLogRetention-D20-qsiv.js} +1 -1
  82. package/frontend-dist/index.html +2 -2
  83. package/package.json +1 -1
  84. package/frontend-dist/assets/CollapsibleContent-DBzDxLSb.js +0 -1
  85. package/frontend-dist/assets/CollapsibleTrigger-B2f-xQJ0.js +0 -1
  86. package/frontend-dist/assets/Dashboard-D8wX4CUe.js +0 -3
  87. package/frontend-dist/assets/Label-DtNVUGfD.js +0 -1
  88. package/frontend-dist/assets/Login-CWveR_5r.js +0 -1
  89. package/frontend-dist/assets/Logs-Bi1whdhz.js +0 -1
  90. package/frontend-dist/assets/MappingEntryEditor-BRm2vENX.js +0 -1
  91. package/frontend-dist/assets/ModelCard-7k6e0d6o.js +0 -1
  92. package/frontend-dist/assets/ModelMappings-BA2biFmT.js +0 -1
  93. package/frontend-dist/assets/Monitor-B0ZTNvv5.js +0 -1
  94. package/frontend-dist/assets/Providers-62LJNLRi.js +0 -1
  95. package/frontend-dist/assets/ProxyEnhancement-dcYVsc3f.js +0 -1
  96. package/frontend-dist/assets/QuickSetup-CRcUhnmK.js +0 -1
  97. package/frontend-dist/assets/RetryRules-B-Yaery1.js +0 -1
  98. package/frontend-dist/assets/RouterKeys-HMyzbiSY.js +0 -1
  99. package/frontend-dist/assets/Schedules-DuXBLzKL.js +0 -1
  100. package/frontend-dist/assets/Settings--oVZQg3A.js +0 -6
  101. package/frontend-dist/assets/Setup-DCtJiJxI.js +0 -1
  102. package/frontend-dist/assets/TransformRulesForm-kWP-wmEh.js +0 -1
  103. package/frontend-dist/assets/UnifiedRequestDialog-Vwxh-lNJ.js +0 -3
  104. package/frontend-dist/assets/dialog-MkZTr6jd.js +0 -1
  105. package/frontend-dist/assets/index-Bg5CP0c1.js +0 -3
  106. package/frontend-dist/assets/mappings-Cazz3EF4.js +0 -1
  107. package/frontend-dist/assets/mappings-DQRteuwa.js +0 -1
@@ -16,5 +16,7 @@ export declare const OVERFLOW_THRESHOLD = 1000000;
16
16
  export declare function lookupContextWindow(modelName: string): number;
17
17
  /** 标准化 patch 名称:连字符 → 下划线 */
18
18
  export declare function normalizePatchName(name: string): string;
19
+ /** 清除缓存(仅供测试使用) */
20
+ export declare function clearModelsCache(): void;
19
21
  export declare function parseModels(raw: string): ModelEntry[];
20
22
  export declare function buildModelInfoList(modelEntries: ModelEntry[], overrides: Map<string, number>): ModelInfo[];
@@ -104,14 +104,23 @@ const PATCH_ID_MIGRATION = {
104
104
  non_ds_tools: "thinking_consistency",
105
105
  cache_control: "thinking_consistency",
106
106
  };
107
+ // parseModels 缓存,key 为 raw 字符串引用
108
+ const modelsCache = new Map();
109
+ /** 清除缓存(仅供测试使用) */
110
+ export function clearModelsCache() {
111
+ modelsCache.clear();
112
+ }
107
113
  export function parseModels(raw) {
108
114
  if (!raw)
109
115
  return [];
116
+ const cached = modelsCache.get(raw);
117
+ if (cached)
118
+ return cached;
110
119
  try {
111
120
  const parsed = JSON.parse(raw);
112
121
  if (!Array.isArray(parsed))
113
122
  return [];
114
- return parsed.map((item) => {
123
+ const result = parsed.map((item) => {
115
124
  if (typeof item === 'string') {
116
125
  return item ? { name: item, patches: [] } : null;
117
126
  }
@@ -124,14 +133,16 @@ export function parseModels(raw) {
124
133
  const rawPatches = (obj.patches ?? []).map(normalizePatchName);
125
134
  const migrated = rawPatches.map(p => PATCH_ID_MIGRATION[p] ?? p);
126
135
  const patches = [...new Set(migrated)];
127
- const result = {
136
+ const entry = {
128
137
  name: modelName,
129
138
  patches,
130
139
  };
131
140
  if (obj.stream_timeout_ms != null)
132
- result.stream_timeout_ms = obj.stream_timeout_ms;
133
- return result;
141
+ entry.stream_timeout_ms = obj.stream_timeout_ms;
142
+ return entry;
134
143
  }).filter((e) => e !== null);
144
+ modelsCache.set(raw, result);
145
+ return result;
135
146
  }
136
147
  catch {
137
148
  return [];
@@ -18,6 +18,8 @@ export declare class AdaptiveController {
18
18
  onRequestComplete(providerId: string, result: AdaptiveResult): void;
19
19
  getStatus(providerId: string): AdaptiveState | undefined;
20
20
  syncProvider(providerId: string, p: ProviderConcurrencyParams): void;
21
+ /** 根据当前位置和容量推导行为参数,实现水位梯度控制 */
22
+ private deriveProfile;
21
23
  private transitionSuccess;
22
24
  private transitionFailure;
23
25
  private syncToSemaphore;
@@ -1,11 +1,18 @@
1
- const SUCCESS_THRESHOLD = 3;
2
- const FAILURE_THRESHOLD = 3;
3
- const DECREASE_STEP = 2;
4
- const COOLDOWN_MS = 30_000;
5
1
  const RATE_LIMIT_STATUS = 429;
6
- const HALF_DIVISOR = 2;
7
2
  const HTTP_SERVER_ERROR_MIN = 500;
8
3
  const ADAPTIVE_MIN = 1;
4
+ // deriveProfile 参数常量
5
+ const CAPACITY_LOG_BASE = 7;
6
+ const CLIMB_BASE = 2;
7
+ const CLIMB_CAPACITY_WEIGHT = 2;
8
+ const CLIMB_LEVEL_WEIGHT = 2;
9
+ const DROP_BASE = 5;
10
+ const DROP_CAPACITY_WEIGHT = 2;
11
+ const DROP_LEVEL_WEIGHT = 2;
12
+ const KEEP_RATIO_MIN = 0.5;
13
+ const COOLDOWN_BASE_MS = 10_000;
14
+ const COOLDOWN_LEVEL_MS = 10_000;
15
+ const SAFE_ZONE_DIVISOR = 2;
9
16
  export class AdaptiveController {
10
17
  semaphoreControl;
11
18
  logger;
@@ -19,7 +26,7 @@ export class AdaptiveController {
19
26
  this.entries.set(providerId, {
20
27
  state: {
21
28
  currentLimit: initialLimit,
22
- probeActive: true,
29
+ limitReached: false,
23
30
  consecutiveSuccesses: 0,
24
31
  consecutiveFailures: 0,
25
32
  cooldownUntil: 0,
@@ -78,26 +85,40 @@ export class AdaptiveController {
78
85
  });
79
86
  }
80
87
  }
88
+ /** 根据当前位置和容量推导行为参数,实现水位梯度控制 */
89
+ deriveProfile(currentLimit, max) {
90
+ const level = Math.min(1, currentLimit / max);
91
+ const capacity = Math.min(1, Math.log2(max) / CAPACITY_LOG_BASE);
92
+ return {
93
+ climbThreshold: Math.max(CLIMB_BASE, Math.round(CLIMB_BASE + capacity * CLIMB_CAPACITY_WEIGHT + level * CLIMB_LEVEL_WEIGHT)),
94
+ dropThreshold: Math.max(1, Math.round(DROP_BASE - capacity * DROP_CAPACITY_WEIGHT - level * DROP_LEVEL_WEIGHT)),
95
+ keepRatio: currentLimit > 1 ? 1 - 1 / currentLimit : KEEP_RATIO_MIN,
96
+ cooldownMs: Math.round(COOLDOWN_BASE_MS + level * COOLDOWN_LEVEL_MS),
97
+ };
98
+ }
81
99
  transitionSuccess(providerId, entry, result) {
82
100
  const s = entry.state;
83
- s.consecutiveSuccesses++;
84
- s.consecutiveFailures = 0;
101
+ // 冷却期内不累计成功计数
85
102
  if (Date.now() < s.cooldownUntil)
86
103
  return;
87
- if (s.consecutiveSuccesses >= SUCCESS_THRESHOLD) {
88
- if (!s.probeActive) {
89
- s.probeActive = true;
90
- s.consecutiveSuccesses = 0;
91
- const effective = Math.min(Math.max(s.currentLimit + 1, ADAPTIVE_MIN), entry.max);
92
- this.logger?.info?.({ providerId, requestId: result.requestId, prevLimit: s.currentLimit, newLimit: s.currentLimit, effectiveLimit: effective, action: "probe_open" }, "Adaptive: probe window opened");
93
- }
94
- else {
104
+ s.consecutiveSuccesses++;
105
+ s.consecutiveFailures = 0;
106
+ // 利用率信号:请求排过队说明 limit 被实际触及
107
+ if (result.wasQueued) {
108
+ s.limitReached = true;
109
+ }
110
+ const profile = this.deriveProfile(s.currentLimit, entry.max);
111
+ if (s.consecutiveSuccesses >= profile.climbThreshold) {
112
+ // 利用率门控:安全区(limit <= max/2) 或 limitReached 才爬升
113
+ const safeZone = s.currentLimit <= Math.floor(entry.max / SAFE_ZONE_DIVISOR);
114
+ if (safeZone || s.limitReached) {
95
115
  const prevLimit = s.currentLimit;
96
116
  s.currentLimit = Math.min(s.currentLimit + 1, entry.max);
97
- s.consecutiveSuccesses = 0;
98
- const effective = Math.min(Math.max(s.currentLimit + 1, ADAPTIVE_MIN), entry.max);
99
- this.logger?.info?.({ providerId, requestId: result.requestId, prevLimit, newLimit: s.currentLimit, effectiveLimit: effective, max: entry.max, action: "limit_increased" }, "Adaptive: limit increased by 1");
117
+ this.logger?.info?.({ providerId, requestId: result.requestId, prevLimit, newLimit: s.currentLimit, action: "limit_increased" }, "Adaptive: limit increased by 1");
100
118
  }
119
+ // 无论是否爬升,都重置计数周期
120
+ s.consecutiveSuccesses = 0;
121
+ s.limitReached = false;
101
122
  this.syncToSemaphore(providerId);
102
123
  }
103
124
  }
@@ -105,10 +126,10 @@ export class AdaptiveController {
105
126
  const statusCode = result.statusCode;
106
127
  // 过滤非并发相关的错误:
107
128
  // - retryRuleMatched=true → resilience 层根据重试规则判断为可重试的失败,计入退避
108
- // - 429: 限流,计入退避
129
+ // - 429: 限流,计入退避(含信号量超时/队列满,orchestrator 统一传入 429)
109
130
  // - 5xx: 服务端错误(可能过载),计入退避
110
131
  // - undefined: 网络异常,计入退避
111
- // - 2xx/4xx 且 retryRuleMatched!=true: 非并发问题(如 upstream 200 body error 但未命中重试规则),不触发退避
132
+ // - 2xx/4xx 且 retryRuleMatched!=true: 非并发问题,不触发退避
112
133
  if (!result.retryRuleMatched && statusCode !== undefined && statusCode !== RATE_LIMIT_STATUS && statusCode < HTTP_SERVER_ERROR_MIN) {
113
134
  this.logger?.debug?.({ providerId, statusCode, action: "failure_ignored" }, "Adaptive: non-concurrency failure ignored");
114
135
  return;
@@ -117,31 +138,32 @@ export class AdaptiveController {
117
138
  s.consecutiveFailures++;
118
139
  s.consecutiveSuccesses = 0;
119
140
  if (statusCode === RATE_LIMIT_STATUS) {
141
+ // 429 和信号量错误:丢 1 格 + 冷却
142
+ const profile = this.deriveProfile(s.currentLimit, entry.max);
120
143
  const prevLimit = s.currentLimit;
121
- s.currentLimit = Math.max(Math.floor(s.currentLimit / HALF_DIVISOR), ADAPTIVE_MIN);
122
- s.probeActive = false;
123
- s.cooldownUntil = Date.now() + COOLDOWN_MS;
144
+ s.currentLimit = Math.max(Math.floor(s.currentLimit * profile.keepRatio), ADAPTIVE_MIN);
145
+ s.cooldownUntil = Date.now() + profile.cooldownMs;
124
146
  s.consecutiveFailures = 0;
125
147
  this.syncToSemaphore(providerId);
126
- this.logger?.warn?.({ providerId, requestId: result.requestId, prevLimit, newLimit: s.currentLimit, cooldownMs: COOLDOWN_MS, statusCode, action: "rate_limit_backoff" }, "Adaptive: 429 rate limit, halved concurrency and entered cooldown");
148
+ this.logger?.warn?.({ providerId, requestId: result.requestId, prevLimit, newLimit: s.currentLimit, cooldownMs: profile.cooldownMs, statusCode, action: "rate_limit_backoff" }, "Adaptive: 429/semaphore, lost 1 slot and entered cooldown");
127
149
  }
128
- else if (s.consecutiveFailures >= FAILURE_THRESHOLD) {
129
- const prevLimit = s.currentLimit;
130
- s.currentLimit = Math.max(s.currentLimit - DECREASE_STEP, ADAPTIVE_MIN);
131
- s.probeActive = false;
132
- s.consecutiveFailures = 0;
133
- this.syncToSemaphore(providerId);
134
- this.logger?.warn?.({ providerId, requestId: result.requestId, prevLimit, newLimit: s.currentLimit, statusCode, retryRuleMatched: result.retryRuleMatched ?? false, action: "failure_backoff" }, "Adaptive: sustained failures, decreased concurrency");
150
+ else {
151
+ // 5xx / 网络错误(statusCode=undefined):连续失败退避
152
+ const profile = this.deriveProfile(s.currentLimit, entry.max);
153
+ if (s.consecutiveFailures >= profile.dropThreshold) {
154
+ const prevLimit = s.currentLimit;
155
+ s.currentLimit = Math.max(s.currentLimit - 1, ADAPTIVE_MIN);
156
+ s.consecutiveFailures = 0;
157
+ this.syncToSemaphore(providerId);
158
+ this.logger?.warn?.({ providerId, requestId: result.requestId, prevLimit, newLimit: s.currentLimit, statusCode, retryRuleMatched: result.retryRuleMatched ?? false, action: "failure_backoff" }, "Adaptive: sustained failures, decreased concurrency");
159
+ }
135
160
  }
136
161
  }
137
162
  syncToSemaphore(providerId) {
138
163
  const entry = this.entries.get(providerId);
139
164
  if (!entry)
140
165
  return;
141
- // probeActive 时额外加 1 个探针槽位,但不超过 max
142
- const effectiveLimit = entry.state.probeActive
143
- ? Math.min(Math.max(entry.state.currentLimit + 1, ADAPTIVE_MIN), entry.max)
144
- : Math.max(entry.state.currentLimit, ADAPTIVE_MIN);
166
+ const effectiveLimit = Math.max(entry.state.currentLimit, ADAPTIVE_MIN);
145
167
  this.semaphoreControl.updateConfig(providerId, {
146
168
  maxConcurrency: effectiveLimit,
147
169
  queueTimeoutMs: entry.queueTimeoutMs,
@@ -7,7 +7,7 @@ export interface ConcurrencyConfig {
7
7
  /** Internal state of adaptive concurrency for a provider. */
8
8
  export interface AdaptiveState {
9
9
  currentLimit: number;
10
- probeActive: boolean;
10
+ limitReached: boolean;
11
11
  consecutiveSuccesses: number;
12
12
  consecutiveFailures: number;
13
13
  cooldownUntil: number;
@@ -18,6 +18,8 @@ export interface AdaptiveResult {
18
18
  statusCode?: number;
19
19
  /** 重试规则是否匹配(resilience 层判断为可重试的失败),为 true 时忽略 statusCode 过滤 */
20
20
  retryRuleMatched?: boolean;
21
+ /** 此请求是否曾经排过队 */
22
+ wasQueued?: boolean;
21
23
  /** 触发此反馈的请求日志 ID,用于日志关联 */
22
24
  requestId?: string;
23
25
  }
@@ -23,6 +23,7 @@ export declare class RequestTracker {
23
23
  private providerConfigCache;
24
24
  private pushTimer;
25
25
  private tickCount;
26
+ private requestUpdateDirty;
26
27
  private streamAccumulators;
27
28
  private streamContentPending;
28
29
  private streamContentTimer;
@@ -15,6 +15,7 @@ export class RequestTracker {
15
15
  providerConfigCache = new Map();
16
16
  pushTimer = null;
17
17
  tickCount = 0;
18
+ requestUpdateDirty = true;
18
19
  streamAccumulators = new Map();
19
20
  streamContentPending = new Set();
20
21
  streamContentTimer = null;
@@ -38,6 +39,7 @@ export class RequestTracker {
38
39
  // --- Core methods ---
39
40
  start(req) {
40
41
  this.activeMap.set(req.id, { ...req });
42
+ this.requestUpdateDirty = true;
41
43
  this.logger?.debug?.({ reqId: req.id, model: req.model, providerId: req.providerId, activeCount: this.activeMap.size }, "Tracker: start");
42
44
  this.broadcast("request_start", req);
43
45
  }
@@ -133,6 +135,7 @@ export class RequestTracker {
133
135
  this.recentCompleted.length = RECENT_COMPLETED_MAX;
134
136
  }
135
137
  this.logger?.debug?.({ reqId: id, status: result.status, statusCode, latency, activeCount: this.activeMap.size }, "Tracker: complete");
138
+ this.requestUpdateDirty = true;
136
139
  this.broadcast("request_complete", completed);
137
140
  }
138
141
  /** Update stream metrics for a completed request (e.g., after cache estimation) */
@@ -278,7 +281,10 @@ export class RequestTracker {
278
281
  this.tickCount++;
279
282
  this.cleanupRecent();
280
283
  this.cleanupStaleActive();
281
- this.broadcast("request_update", this.getActive());
284
+ if (this.requestUpdateDirty) {
285
+ this.broadcast("request_update", this.getActive());
286
+ this.requestUpdateDirty = false;
287
+ }
282
288
  this.broadcast("concurrency_update", this.getConcurrency());
283
289
  this.broadcast("stats_update", this.getStats());
284
290
  // Every 10s (every 2nd tick)
@@ -26,6 +26,8 @@ export interface ResolveResult {
26
26
  concurrency_override?: ConcurrencyOverride;
27
27
  /** 活跃规则(schedule 或 base)中的 target 总数,用于 failover 判断 */
28
28
  targetCount: number;
29
+ /** 排除前的完整 target 列表,用于请求级缓存(BP-H2) */
30
+ allTargets?: Target[];
29
31
  }
30
32
  export interface MetricsResult {
31
33
  input_tokens: number | null;
@@ -1,4 +1,5 @@
1
1
  import Database from "better-sqlite3";
2
+ export declare function getCachedStmt(db: Database.Database, sql: string): Database.Statement;
2
3
  /**
3
4
  * 通用 UPDATE 构建器。
4
5
  * 用白名单过滤安全字段,拼接 SET 子句。
@@ -1,3 +1,18 @@
1
+ /** WeakMap 按 db 实例缓存 prepared statements,避免重复 prepare() */
2
+ const stmtCache = new WeakMap();
3
+ export function getCachedStmt(db, sql) {
4
+ let cache = stmtCache.get(db);
5
+ if (!cache) {
6
+ cache = new Map();
7
+ stmtCache.set(db, cache);
8
+ }
9
+ let stmt = cache.get(sql);
10
+ if (!stmt) {
11
+ stmt = db.prepare(sql);
12
+ cache.set(sql, stmt);
13
+ }
14
+ return stmt;
15
+ }
1
16
  /**
2
17
  * 通用 UPDATE 构建器。
3
18
  * 用白名单过滤安全字段,拼接 SET 子句。
package/dist/db/index.js CHANGED
@@ -28,6 +28,12 @@ export function initDatabase(dbPath) {
28
28
  db.pragma("journal_mode = WAL");
29
29
  db.pragma("auto_vacuum = INCREMENTAL");
30
30
  db.pragma("foreign_keys = ON");
31
+ db.pragma("synchronous = NORMAL");
32
+ db.pragma("cache_size = -16000");
33
+ db.pragma("busy_timeout = 5000");
34
+ db.pragma("temp_store = MEMORY");
35
+ db.pragma("mmap_size = 67108864");
36
+ db.pragma("journal_size_limit = 67108864");
31
37
  db.exec(`
32
38
  CREATE TABLE IF NOT EXISTS migrations (
33
39
  name TEXT PRIMARY KEY,
package/dist/db/logs.d.ts CHANGED
@@ -74,7 +74,7 @@ export declare function updateLogStreamContent(db: Database.Database, logId: str
74
74
  /** 当 router 返回给客户端的 status code 与上游不同时,记录实际发送的 status */
75
75
  export declare function updateLogClientStatus(db: Database.Database, logId: string, clientStatusCode: number): void;
76
76
  export declare function deleteLogsBefore(db: Database.Database, beforeDate: string): number;
77
- /** 估算 request_logs 表占用字节数 */
77
+ /** 采样估算 request_logs 表占用字节数(避免全表 SUM 扫描) */
78
78
  export declare function estimateLogTableSize(db: Database.Database): number;
79
79
  /** 删除最旧的日志,保留 keepCount 条,返回实际删除条数。分批删除避免长时间锁表 */
80
80
  export declare function deleteOldestLogs(db: Database.Database, keepCount: number): number;
package/dist/db/logs.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import { shouldPreserveDetail } from "../proxy/log-detail-policy.js";
2
+ import { getCachedStmt } from "./helpers.js";
2
3
  // --- request_logs ---
3
4
  const LOG_LIST_SELECT = `rl.id, rl.api_type, rl.model, rl.provider_id, rl.status_code, rl.client_status_code, rl.latency_ms,
4
5
  rl.is_stream, rl.error_message, rl.created_at, rl.is_retry, rl.is_failover, rl.original_request_id, rl.original_model,
@@ -26,7 +27,7 @@ export function insertRequestLog(db, log, writeContext) {
26
27
  }
27
28
  // 详情保留判定
28
29
  const preserveDetail = shouldPreserveDetail(log.status_code, writeContext?.responseBody ?? null, writeContext?.matcher ?? null, !!writeContext?.logFileWriter);
29
- db.prepare(`INSERT INTO request_logs (id, api_type, model, provider_id, status_code, client_status_code, latency_ms,
30
+ getCachedStmt(db, `INSERT INTO request_logs (id, api_type, model, provider_id, status_code, client_status_code, latency_ms,
30
31
  is_stream, error_message, created_at, client_request, upstream_request, upstream_response,
31
32
  is_retry, is_failover, original_request_id, router_key_id, original_model, session_id, pipeline_snapshot)
32
33
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(log.id, log.api_type, log.model, log.provider_id, log.status_code, log.client_status_code ?? null, log.latency_ms, log.is_stream, log.error_message, log.created_at, preserveDetail ? (log.client_request ?? null) : null, preserveDetail ? (log.upstream_request ?? null) : null, preserveDetail ? (log.upstream_response ?? null) : null, log.is_retry ?? 0, log.is_failover ?? 0, log.original_request_id ?? null, log.router_key_id ?? null, log.original_model ?? null, log.session_id ?? null, log.pipeline_snapshot ?? null);
@@ -92,11 +93,11 @@ export function getRequestLogById(db, id) {
92
93
  }
93
94
  /** 流式请求完成后,将 tracker 中累积的文本内容写入 request_logs */
94
95
  export function updateLogStreamContent(db, logId, textContent) {
95
- db.prepare("UPDATE request_logs SET stream_text_content = ? WHERE id = ?").run(textContent, logId);
96
+ getCachedStmt(db, "UPDATE request_logs SET stream_text_content = ? WHERE id = ?").run(textContent, logId);
96
97
  }
97
98
  /** 当 router 返回给客户端的 status code 与上游不同时,记录实际发送的 status */
98
99
  export function updateLogClientStatus(db, logId, clientStatusCode) {
99
- db.prepare("UPDATE request_logs SET client_status_code = ? WHERE id = ?").run(clientStatusCode, logId);
100
+ getCachedStmt(db, "UPDATE request_logs SET client_status_code = ? WHERE id = ?").run(clientStatusCode, logId);
100
101
  }
101
102
  export function deleteLogsBefore(db, beforeDate) {
102
103
  const changes = db.prepare("DELETE FROM request_logs WHERE created_at < ?").run(beforeDate).changes;
@@ -107,17 +108,20 @@ export function deleteLogsBefore(db, beforeDate) {
107
108
  }
108
109
  /** 每行元数据(数字列+索引)的估算字节数 */
109
110
  const ROW_METADATA_BYTES = 500;
110
- /** 估算 request_logs 表占用字节数 */
111
+ /** 采样估算 request_logs 表占用字节数(避免全表 SUM 扫描) */
111
112
  export function estimateLogTableSize(db) {
112
- const row = db.prepare(`
113
- SELECT COALESCE(SUM(
114
- COALESCE(length(client_request), 0) + COALESCE(length(upstream_request), 0) +
115
- COALESCE(length(upstream_response), 0) + COALESCE(length(stream_text_content), 0) +
116
- COALESCE(length(error_message), 0) + COALESCE(length(pipeline_snapshot), 0) + ?
117
- ), 0) as size
118
- FROM request_logs
119
- `).get(ROW_METADATA_BYTES);
120
- return row.size;
113
+ const countRow = db.prepare("SELECT COUNT(*) as cnt FROM request_logs").get();
114
+ if (countRow.cnt === 0)
115
+ return 0;
116
+ // 采样最近 100 行,计算平均行大小
117
+ const samples = db.prepare(`
118
+ SELECT COALESCE(length(client_request), 0) + COALESCE(length(upstream_request), 0) +
119
+ COALESCE(length(upstream_response), 0) + COALESCE(length(stream_text_content), 0) +
120
+ COALESCE(length(error_message), 0) + COALESCE(length(pipeline_snapshot), 0) + ? AS row_size
121
+ FROM request_logs ORDER BY created_at DESC LIMIT 100
122
+ `).all(ROW_METADATA_BYTES);
123
+ const avgRowSize = samples.reduce((s, r) => s + r.row_size, 0) / samples.length;
124
+ return Math.round(avgRowSize * countRow.cnt);
121
125
  }
122
126
  const DELETE_BATCH_SIZE = 1000;
123
127
  /** 删除最旧的日志,保留 keepCount 条,返回实际删除条数。分批删除避免长时间锁表 */
@@ -164,15 +168,28 @@ export function getRequestLogsGrouped(db, options) {
164
168
  const total = db.prepare(`SELECT COUNT(*) as count FROM request_logs rl WHERE ${where}`).get(...params).count;
165
169
  const offset = (options.page - 1) * options.limit;
166
170
  const data = db
167
- .prepare(`SELECT ${LOG_LIST_SELECT},
168
- (SELECT COUNT(*) FROM request_logs c WHERE c.original_request_id = rl.id) AS child_count
169
- FROM request_logs rl
171
+ .prepare(`WITH page_ids AS (
172
+ SELECT rl.id FROM request_logs rl
173
+ ${LOG_LIST_JOIN}
174
+ WHERE ${where}
175
+ ORDER BY rl.created_at DESC LIMIT ? OFFSET ?
176
+ )
177
+ SELECT ${LOG_LIST_SELECT},
178
+ COALESCE(child.cnt, 0) AS child_count
179
+ FROM page_ids pg
180
+ JOIN request_logs rl ON rl.id = pg.id
170
181
  ${LOG_LIST_JOIN}
171
- WHERE ${where} ORDER BY rl.created_at DESC LIMIT ? OFFSET ?`)
182
+ LEFT JOIN (
183
+ SELECT original_request_id, COUNT(*) AS cnt
184
+ FROM request_logs
185
+ WHERE original_request_id IN (SELECT id FROM page_ids)
186
+ GROUP BY original_request_id
187
+ ) child ON child.original_request_id = rl.id
188
+ ORDER BY rl.created_at DESC`)
172
189
  .all(...params, options.limit, offset);
173
190
  return { data, total };
174
191
  }
175
192
  /** 后续 pipeline 阶段完成后,回写 snapshot 到已有日志 */
176
193
  export function updateLogPipelineSnapshot(db, logId, snapshot) {
177
- db.prepare("UPDATE request_logs SET pipeline_snapshot = ? WHERE id = ?").run(snapshot, logId);
194
+ getCachedStmt(db, "UPDATE request_logs SET pipeline_snapshot = ? WHERE id = ?").run(snapshot, logId);
178
195
  }
@@ -1,8 +1,9 @@
1
1
  import { randomUUID } from "crypto";
2
2
  import { MS_PER_SECOND } from "../core/constants.js";
3
+ import { getCachedStmt } from "./helpers.js";
3
4
  export function insertMetrics(db, m) {
4
5
  const id = randomUUID();
5
- db.prepare(`INSERT INTO request_metrics (id, request_log_id, provider_id, backend_model, api_type, router_key_id, status_code,
6
+ getCachedStmt(db, `INSERT INTO request_metrics (id, request_log_id, provider_id, backend_model, api_type, router_key_id, status_code,
6
7
  input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, ttft_ms, total_duration_ms, tokens_per_second, stop_reason, is_complete, input_tokens_estimated,
7
8
  client_type, cache_read_tokens_estimated,
8
9
  thinking_tokens, text_tokens, tool_use_tokens, thinking_duration_ms,
@@ -0,0 +1,8 @@
1
+ -- 覆盖 provider_id 过滤 + 时间范围分页
2
+ CREATE INDEX IF NOT EXISTS idx_request_logs_provider_id ON request_logs(provider_id);
3
+ CREATE INDEX IF NOT EXISTS idx_request_logs_created_at_provider ON request_logs(created_at DESC, provider_id);
4
+ CREATE INDEX IF NOT EXISTS idx_request_logs_created_at_router_key ON request_logs(created_at DESC, router_key_id);
5
+
6
+ -- 覆盖按密钥过滤的聚合查询
7
+ CREATE INDEX IF NOT EXISTS idx_metrics_router_key ON request_metrics(router_key_id);
8
+ CREATE INDEX IF NOT EXISTS idx_metrics_created_at_router_key ON request_metrics(created_at, router_key_id);
@@ -1,9 +1,26 @@
1
+ import { getCachedStmt } from "./helpers.js";
2
+ // TTL 缓存:WeakMap 按 db 实例隔离,确保测试中 :memory: db 互不干扰
3
+ const settingsCache = new WeakMap();
4
+ const CACHE_TTL_MS = 30_000;
1
5
  export function getSetting(db, key) {
2
- const row = db.prepare("SELECT value FROM settings WHERE key = ?").get(key);
3
- return row?.value ?? null;
6
+ let cache = settingsCache.get(db);
7
+ if (!cache) {
8
+ cache = new Map();
9
+ settingsCache.set(db, cache);
10
+ }
11
+ const cached = cache.get(key);
12
+ if (cached && Date.now() < cached.expiresAt)
13
+ return cached.value;
14
+ const row = getCachedStmt(db, "SELECT value FROM settings WHERE key = ?").get(key);
15
+ const value = row?.value ?? null;
16
+ cache.set(key, { value, expiresAt: Date.now() + CACHE_TTL_MS });
17
+ return value;
4
18
  }
5
19
  export function setSetting(db, key, value) {
6
- db.prepare("INSERT OR REPLACE INTO settings (key, value) VALUES (?, ?)").run(key, value);
20
+ getCachedStmt(db, "INSERT OR REPLACE INTO settings (key, value) VALUES (?, ?)").run(key, value);
21
+ const cache = settingsCache.get(db);
22
+ if (cache)
23
+ cache.delete(key);
7
24
  }
8
25
  export function isInitialized(db) {
9
26
  return getSetting(db, "initialized") === "true";
@@ -40,7 +57,7 @@ export function setConfigSyncSource(db, source) {
40
57
  setSetting(db, "config_sync_source", source);
41
58
  }
42
59
  export function getDetailLogEnabled(db) {
43
- const row = db.prepare("SELECT value FROM settings WHERE key = ?").get("detail_log_enabled");
60
+ const row = getCachedStmt(db, "SELECT value FROM settings WHERE key = ?").get("detail_log_enabled");
44
61
  return row ? row.value !== "0" : true;
45
62
  }
46
63
  export function getTokenEstimationEnabled(db) {
@@ -52,7 +69,7 @@ export function setTokenEstimationEnabled(db, enabled) {
52
69
  }
53
70
  const DEFAULT_LOG_FILE_RETENTION_DAYS = 3;
54
71
  export function getLogFileRetentionDays(db) {
55
- const row = db.prepare("SELECT value FROM settings WHERE key = ?").get("log_file_retention_days");
72
+ const row = getCachedStmt(db, "SELECT value FROM settings WHERE key = ?").get("log_file_retention_days");
56
73
  return row ? parseInt(row.value, 10) : DEFAULT_LOG_FILE_RETENTION_DAYS;
57
74
  }
58
75
  const DEFAULT_CLIENT_SESSION_HEADERS = [
package/dist/index.js CHANGED
@@ -330,6 +330,8 @@ export async function buildApp(options) {
330
330
  proxyAgentFactory.invalidateAll();
331
331
  const sessionTracker = container.resolve(SERVICE_KEYS.sessionTracker);
332
332
  sessionTracker.stop();
333
+ // Flush LogFileWriter 的 WriteStream 缓冲数据到磁盘
334
+ await logFileWriter?.stop();
333
335
  // 等待活跃代理请求自然完成,超时后强制关闭所有连接。
334
336
  // 先调用 app.close() 停止接受新连接并等待现有连接结束,
335
337
  // 如果 2 秒内未完成则调用 closeAllConnections() 强制断开,防止 SSE 长连接导致无限等待。
@@ -13,12 +13,17 @@ export declare class MetricsExtractor {
13
13
  private stopReason;
14
14
  private firstContentReceived;
15
15
  private complete;
16
- private thinkingContentBuffer;
16
+ /** Buffer 容量上限,超过后停止 push 避免内存膨胀 */
17
+ private static readonly MAX_BUFFER_SIZE;
18
+ private thinkingChunks;
19
+ private thinkingTotalLength;
17
20
  private thinkingStreamStartTime;
18
21
  private thinkingStreamEndTime;
19
- private textContentBuffer;
22
+ private textChunks;
23
+ private textTotalLength;
20
24
  private textStreamStartTime;
21
- private toolUseContentBuffer;
25
+ private toolUseChunks;
26
+ private toolUseTotalLength;
22
27
  private toolUseStreamStartTime;
23
28
  constructor(apiType: "openai" | "openai-responses" | "anthropic", requestStartTime: number);
24
29
  processEvent(event: SSEEvent): void;