llm-simple-router 0.10.6 → 0.10.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config/model-context.d.ts +2 -0
- package/dist/config/model-context.js +15 -4
- package/dist/core/monitor/request-tracker.d.ts +1 -0
- package/dist/core/monitor/request-tracker.js +7 -1
- package/dist/core/types.d.ts +2 -0
- package/dist/db/helpers.d.ts +1 -0
- package/dist/db/helpers.js +15 -0
- package/dist/db/index.js +6 -0
- package/dist/db/logs.d.ts +1 -1
- package/dist/db/logs.js +35 -18
- package/dist/db/metrics.js +2 -1
- package/dist/db/migrations/044_add_performance_indexes.sql +8 -0
- package/dist/db/settings.js +22 -5
- package/dist/index.js +2 -0
- package/dist/metrics/metrics-extractor.d.ts +8 -3
- package/dist/metrics/metrics-extractor.js +33 -20
- package/dist/metrics/sse-parser.js +5 -2
- package/dist/middleware/auth.d.ts +1 -1
- package/dist/middleware/auth.js +12 -20
- package/dist/proxy/handler/create-proxy-handler.js +6 -2
- package/dist/proxy/handler/failover-loop.js +40 -22
- package/dist/proxy/hooks/builtin/allowed-models.js +8 -18
- package/dist/proxy/hooks/builtin/cache-estimation.js +4 -0
- package/dist/proxy/orchestration/resilience.js +4 -2
- package/dist/proxy/proxy-logging.d.ts +1 -1
- package/dist/proxy/proxy-logging.js +32 -17
- package/dist/proxy/routing/enhancement-config.d.ts +2 -0
- package/dist/proxy/routing/enhancement-config.js +21 -4
- package/dist/proxy/routing/mapping-resolver.d.ts +3 -1
- package/dist/proxy/routing/mapping-resolver.js +4 -2
- package/dist/proxy/transport/proxy-agent.d.ts +8 -0
- package/dist/proxy/transport/proxy-agent.js +21 -0
- package/dist/proxy/transport/stream.js +25 -5
- package/dist/proxy/transport/transport-fn.js +3 -1
- package/dist/storage/log-file-writer.d.ts +8 -1
- package/dist/storage/log-file-writer.js +41 -3
- package/frontend-dist/assets/{CardContent-B3BkvaAc.js → CardContent-CyvsM1dh.js} +1 -1
- package/frontend-dist/assets/{CardTitle-_AfAmHWW.js → CardTitle-CKxB6jLn.js} +1 -1
- package/frontend-dist/assets/{Checkbox-Bq_JpeJR.js → Checkbox-RMorZJxv.js} +1 -1
- package/frontend-dist/assets/CollapsibleContent-lX7fgPmd.js +1 -0
- package/frontend-dist/assets/CollapsibleTrigger-BGLyyoL-.js +1 -0
- package/frontend-dist/assets/Dashboard-CUUJmaq6.js +3 -0
- package/frontend-dist/assets/{Input-BwMPjZew.js → Input-qvtpg1mU.js} +1 -1
- package/frontend-dist/assets/Label-KbeWdVSH.js +1 -0
- package/frontend-dist/assets/Login-CFEaXT7h.js +1 -0
- package/frontend-dist/assets/Logs-ItgRttZ7.js +1 -0
- package/frontend-dist/assets/MappingEntryEditor-JtB6vvsH.js +1 -0
- package/frontend-dist/assets/ModelCard-C0-g-WOb.js +1 -0
- package/frontend-dist/assets/ModelMappings-RVrNS0aC.js +1 -0
- package/frontend-dist/assets/Monitor-3xt32Bbl.js +1 -0
- package/frontend-dist/assets/Providers-Ro-AjfzZ.js +1 -0
- package/frontend-dist/assets/ProxyEnhancement-BlDtv_4M.js +1 -0
- package/frontend-dist/assets/QuickSetup-lEC9e6wW.js +1 -0
- package/frontend-dist/assets/RetryRules-DJMh2I-9.js +1 -0
- package/frontend-dist/assets/RouterKeys-DvXfMOW6.js +1 -0
- package/frontend-dist/assets/{RovingFocusItem-CmGUQVbI.js → RovingFocusItem-CTJ2Vqf3.js} +1 -1
- package/frontend-dist/assets/Schedules-DKWJFNUk.js +1 -0
- package/frontend-dist/assets/Settings-DkIN3IsO.js +6 -0
- package/frontend-dist/assets/Setup-DIGMoSiu.js +1 -0
- package/frontend-dist/assets/{Switch-CWJEJhAE.js → Switch-EL-MDXz3.js} +1 -1
- package/frontend-dist/assets/{TooltipTrigger-BirqVXYf.js → TooltipTrigger-B5rqpLBL.js} +1 -1
- package/frontend-dist/assets/TransformRulesForm-CrYYvjo8.js +1 -0
- package/frontend-dist/assets/UnifiedRequestDialog-Dzes8usX.js +3 -0
- package/frontend-dist/assets/{VisuallyHiddenInput-D-mGxG1B.js → VisuallyHiddenInput-CNa5CPMY.js} +1 -1
- package/frontend-dist/assets/{button-DtYZp433.js → button-Bq3kR6lP.js} +2 -2
- package/frontend-dist/assets/{copy-DEIL_qqy.js → copy-B7rgYQq3.js} +1 -1
- package/frontend-dist/assets/{dashboard-B1pq4be7.js → dashboard-COCyp2p_.js} +1 -1
- package/frontend-dist/assets/{dashboard-BVRlMB_W.js → dashboard-DjgmcUG5.js} +1 -1
- package/frontend-dist/assets/dialog-XzDsW73J.js +1 -0
- package/frontend-dist/assets/index-Ba1Z6tMV.js +3 -0
- package/frontend-dist/assets/mappings-6w7mc8YK.js +1 -0
- package/frontend-dist/assets/mappings-C1fK_e70.js +1 -0
- package/frontend-dist/assets/{schedules-d2NQ-xEH.js → schedules-Bd66RL7P.js} +1 -1
- package/frontend-dist/assets/{schedules-Dul_xl7u.js → schedules-HDwMuDgX.js} +1 -1
- package/frontend-dist/assets/{trash-2-CYe-L1uQ.js → trash-2-DCsvygvC.js} +1 -1
- package/frontend-dist/assets/{useClipboard-DojwGFBn.js → useClipboard-D32vuT2b.js} +1 -1
- package/frontend-dist/assets/{useLogRetention-BrYP2mf7.js → useLogRetention-D20-qsiv.js} +1 -1
- package/frontend-dist/index.html +2 -2
- package/package.json +1 -1
- package/frontend-dist/assets/CollapsibleContent-DqPh91QX.js +0 -1
- package/frontend-dist/assets/CollapsibleTrigger-LG3l2pdm.js +0 -1
- package/frontend-dist/assets/Dashboard-0LPjTck9.js +0 -3
- package/frontend-dist/assets/Label-rIqXe61w.js +0 -1
- package/frontend-dist/assets/Login-W85mNIn5.js +0 -1
- package/frontend-dist/assets/Logs-ahc8KSDe.js +0 -1
- package/frontend-dist/assets/MappingEntryEditor-7Kf2-J2B.js +0 -1
- package/frontend-dist/assets/ModelCard-BfAUo6un.js +0 -1
- package/frontend-dist/assets/ModelMappings-BSbzeof5.js +0 -1
- package/frontend-dist/assets/Monitor-dCya3SFN.js +0 -1
- package/frontend-dist/assets/Providers-1wDl4D_R.js +0 -1
- package/frontend-dist/assets/ProxyEnhancement-D_IU9PcA.js +0 -1
- package/frontend-dist/assets/QuickSetup-xS9ROA_-.js +0 -1
- package/frontend-dist/assets/RetryRules-16bxf7eE.js +0 -1
- package/frontend-dist/assets/RouterKeys-busp00XZ.js +0 -1
- package/frontend-dist/assets/Schedules-DIOQSB85.js +0 -1
- package/frontend-dist/assets/Settings-BZ40lTsk.js +0 -6
- package/frontend-dist/assets/Setup-kCvg6E-U.js +0 -1
- package/frontend-dist/assets/TransformRulesForm-CxfgQX02.js +0 -1
- package/frontend-dist/assets/UnifiedRequestDialog-CDQ17q1s.js +0 -3
- package/frontend-dist/assets/dialog-CKP56XIn.js +0 -1
- package/frontend-dist/assets/index-CWlf_u-I.js +0 -3
- package/frontend-dist/assets/mappings-Cazz3EF4.js +0 -1
- package/frontend-dist/assets/mappings-DQRteuwa.js +0 -1
|
@@ -16,5 +16,7 @@ export declare const OVERFLOW_THRESHOLD = 1000000;
|
|
|
16
16
|
export declare function lookupContextWindow(modelName: string): number;
|
|
17
17
|
/** 标准化 patch 名称:连字符 → 下划线 */
|
|
18
18
|
export declare function normalizePatchName(name: string): string;
|
|
19
|
+
/** 清除缓存(仅供测试使用) */
|
|
20
|
+
export declare function clearModelsCache(): void;
|
|
19
21
|
export declare function parseModels(raw: string): ModelEntry[];
|
|
20
22
|
export declare function buildModelInfoList(modelEntries: ModelEntry[], overrides: Map<string, number>): ModelInfo[];
|
|
@@ -104,14 +104,23 @@ const PATCH_ID_MIGRATION = {
|
|
|
104
104
|
non_ds_tools: "thinking_consistency",
|
|
105
105
|
cache_control: "thinking_consistency",
|
|
106
106
|
};
|
|
107
|
+
// parseModels 缓存,key 为 raw 字符串引用
|
|
108
|
+
const modelsCache = new Map();
|
|
109
|
+
/** 清除缓存(仅供测试使用) */
|
|
110
|
+
export function clearModelsCache() {
|
|
111
|
+
modelsCache.clear();
|
|
112
|
+
}
|
|
107
113
|
export function parseModels(raw) {
|
|
108
114
|
if (!raw)
|
|
109
115
|
return [];
|
|
116
|
+
const cached = modelsCache.get(raw);
|
|
117
|
+
if (cached)
|
|
118
|
+
return cached;
|
|
110
119
|
try {
|
|
111
120
|
const parsed = JSON.parse(raw);
|
|
112
121
|
if (!Array.isArray(parsed))
|
|
113
122
|
return [];
|
|
114
|
-
|
|
123
|
+
const result = parsed.map((item) => {
|
|
115
124
|
if (typeof item === 'string') {
|
|
116
125
|
return item ? { name: item, patches: [] } : null;
|
|
117
126
|
}
|
|
@@ -124,14 +133,16 @@ export function parseModels(raw) {
|
|
|
124
133
|
const rawPatches = (obj.patches ?? []).map(normalizePatchName);
|
|
125
134
|
const migrated = rawPatches.map(p => PATCH_ID_MIGRATION[p] ?? p);
|
|
126
135
|
const patches = [...new Set(migrated)];
|
|
127
|
-
const
|
|
136
|
+
const entry = {
|
|
128
137
|
name: modelName,
|
|
129
138
|
patches,
|
|
130
139
|
};
|
|
131
140
|
if (obj.stream_timeout_ms != null)
|
|
132
|
-
|
|
133
|
-
return
|
|
141
|
+
entry.stream_timeout_ms = obj.stream_timeout_ms;
|
|
142
|
+
return entry;
|
|
134
143
|
}).filter((e) => e !== null);
|
|
144
|
+
modelsCache.set(raw, result);
|
|
145
|
+
return result;
|
|
135
146
|
}
|
|
136
147
|
catch {
|
|
137
148
|
return [];
|
|
@@ -15,6 +15,7 @@ export class RequestTracker {
|
|
|
15
15
|
providerConfigCache = new Map();
|
|
16
16
|
pushTimer = null;
|
|
17
17
|
tickCount = 0;
|
|
18
|
+
requestUpdateDirty = true;
|
|
18
19
|
streamAccumulators = new Map();
|
|
19
20
|
streamContentPending = new Set();
|
|
20
21
|
streamContentTimer = null;
|
|
@@ -38,6 +39,7 @@ export class RequestTracker {
|
|
|
38
39
|
// --- Core methods ---
|
|
39
40
|
start(req) {
|
|
40
41
|
this.activeMap.set(req.id, { ...req });
|
|
42
|
+
this.requestUpdateDirty = true;
|
|
41
43
|
this.logger?.debug?.({ reqId: req.id, model: req.model, providerId: req.providerId, activeCount: this.activeMap.size }, "Tracker: start");
|
|
42
44
|
this.broadcast("request_start", req);
|
|
43
45
|
}
|
|
@@ -133,6 +135,7 @@ export class RequestTracker {
|
|
|
133
135
|
this.recentCompleted.length = RECENT_COMPLETED_MAX;
|
|
134
136
|
}
|
|
135
137
|
this.logger?.debug?.({ reqId: id, status: result.status, statusCode, latency, activeCount: this.activeMap.size }, "Tracker: complete");
|
|
138
|
+
this.requestUpdateDirty = true;
|
|
136
139
|
this.broadcast("request_complete", completed);
|
|
137
140
|
}
|
|
138
141
|
/** Update stream metrics for a completed request (e.g., after cache estimation) */
|
|
@@ -278,7 +281,10 @@ export class RequestTracker {
|
|
|
278
281
|
this.tickCount++;
|
|
279
282
|
this.cleanupRecent();
|
|
280
283
|
this.cleanupStaleActive();
|
|
281
|
-
|
|
284
|
+
if (this.requestUpdateDirty) {
|
|
285
|
+
this.broadcast("request_update", this.getActive());
|
|
286
|
+
this.requestUpdateDirty = false;
|
|
287
|
+
}
|
|
282
288
|
this.broadcast("concurrency_update", this.getConcurrency());
|
|
283
289
|
this.broadcast("stats_update", this.getStats());
|
|
284
290
|
// Every 10s (every 2nd tick)
|
package/dist/core/types.d.ts
CHANGED
|
@@ -26,6 +26,8 @@ export interface ResolveResult {
|
|
|
26
26
|
concurrency_override?: ConcurrencyOverride;
|
|
27
27
|
/** 活跃规则(schedule 或 base)中的 target 总数,用于 failover 判断 */
|
|
28
28
|
targetCount: number;
|
|
29
|
+
/** 排除前的完整 target 列表,用于请求级缓存(BP-H2) */
|
|
30
|
+
allTargets?: Target[];
|
|
29
31
|
}
|
|
30
32
|
export interface MetricsResult {
|
|
31
33
|
input_tokens: number | null;
|
package/dist/db/helpers.d.ts
CHANGED
package/dist/db/helpers.js
CHANGED
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
/** WeakMap 按 db 实例缓存 prepared statements,避免重复 prepare() */
|
|
2
|
+
const stmtCache = new WeakMap();
|
|
3
|
+
export function getCachedStmt(db, sql) {
|
|
4
|
+
let cache = stmtCache.get(db);
|
|
5
|
+
if (!cache) {
|
|
6
|
+
cache = new Map();
|
|
7
|
+
stmtCache.set(db, cache);
|
|
8
|
+
}
|
|
9
|
+
let stmt = cache.get(sql);
|
|
10
|
+
if (!stmt) {
|
|
11
|
+
stmt = db.prepare(sql);
|
|
12
|
+
cache.set(sql, stmt);
|
|
13
|
+
}
|
|
14
|
+
return stmt;
|
|
15
|
+
}
|
|
1
16
|
/**
|
|
2
17
|
* 通用 UPDATE 构建器。
|
|
3
18
|
* 用白名单过滤安全字段,拼接 SET 子句。
|
package/dist/db/index.js
CHANGED
|
@@ -28,6 +28,12 @@ export function initDatabase(dbPath) {
|
|
|
28
28
|
db.pragma("journal_mode = WAL");
|
|
29
29
|
db.pragma("auto_vacuum = INCREMENTAL");
|
|
30
30
|
db.pragma("foreign_keys = ON");
|
|
31
|
+
db.pragma("synchronous = NORMAL");
|
|
32
|
+
db.pragma("cache_size = -16000");
|
|
33
|
+
db.pragma("busy_timeout = 5000");
|
|
34
|
+
db.pragma("temp_store = MEMORY");
|
|
35
|
+
db.pragma("mmap_size = 67108864");
|
|
36
|
+
db.pragma("journal_size_limit = 67108864");
|
|
31
37
|
db.exec(`
|
|
32
38
|
CREATE TABLE IF NOT EXISTS migrations (
|
|
33
39
|
name TEXT PRIMARY KEY,
|
package/dist/db/logs.d.ts
CHANGED
|
@@ -74,7 +74,7 @@ export declare function updateLogStreamContent(db: Database.Database, logId: str
|
|
|
74
74
|
/** 当 router 返回给客户端的 status code 与上游不同时,记录实际发送的 status */
|
|
75
75
|
export declare function updateLogClientStatus(db: Database.Database, logId: string, clientStatusCode: number): void;
|
|
76
76
|
export declare function deleteLogsBefore(db: Database.Database, beforeDate: string): number;
|
|
77
|
-
/**
|
|
77
|
+
/** 采样估算 request_logs 表占用字节数(避免全表 SUM 扫描) */
|
|
78
78
|
export declare function estimateLogTableSize(db: Database.Database): number;
|
|
79
79
|
/** 删除最旧的日志,保留 keepCount 条,返回实际删除条数。分批删除避免长时间锁表 */
|
|
80
80
|
export declare function deleteOldestLogs(db: Database.Database, keepCount: number): number;
|
package/dist/db/logs.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { shouldPreserveDetail } from "../proxy/log-detail-policy.js";
|
|
2
|
+
import { getCachedStmt } from "./helpers.js";
|
|
2
3
|
// --- request_logs ---
|
|
3
4
|
const LOG_LIST_SELECT = `rl.id, rl.api_type, rl.model, rl.provider_id, rl.status_code, rl.client_status_code, rl.latency_ms,
|
|
4
5
|
rl.is_stream, rl.error_message, rl.created_at, rl.is_retry, rl.is_failover, rl.original_request_id, rl.original_model,
|
|
@@ -26,7 +27,7 @@ export function insertRequestLog(db, log, writeContext) {
|
|
|
26
27
|
}
|
|
27
28
|
// 详情保留判定
|
|
28
29
|
const preserveDetail = shouldPreserveDetail(log.status_code, writeContext?.responseBody ?? null, writeContext?.matcher ?? null, !!writeContext?.logFileWriter);
|
|
29
|
-
db
|
|
30
|
+
getCachedStmt(db, `INSERT INTO request_logs (id, api_type, model, provider_id, status_code, client_status_code, latency_ms,
|
|
30
31
|
is_stream, error_message, created_at, client_request, upstream_request, upstream_response,
|
|
31
32
|
is_retry, is_failover, original_request_id, router_key_id, original_model, session_id, pipeline_snapshot)
|
|
32
33
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(log.id, log.api_type, log.model, log.provider_id, log.status_code, log.client_status_code ?? null, log.latency_ms, log.is_stream, log.error_message, log.created_at, preserveDetail ? (log.client_request ?? null) : null, preserveDetail ? (log.upstream_request ?? null) : null, preserveDetail ? (log.upstream_response ?? null) : null, log.is_retry ?? 0, log.is_failover ?? 0, log.original_request_id ?? null, log.router_key_id ?? null, log.original_model ?? null, log.session_id ?? null, log.pipeline_snapshot ?? null);
|
|
@@ -92,11 +93,11 @@ export function getRequestLogById(db, id) {
|
|
|
92
93
|
}
|
|
93
94
|
/** 流式请求完成后,将 tracker 中累积的文本内容写入 request_logs */
|
|
94
95
|
export function updateLogStreamContent(db, logId, textContent) {
|
|
95
|
-
db
|
|
96
|
+
getCachedStmt(db, "UPDATE request_logs SET stream_text_content = ? WHERE id = ?").run(textContent, logId);
|
|
96
97
|
}
|
|
97
98
|
/** 当 router 返回给客户端的 status code 与上游不同时,记录实际发送的 status */
|
|
98
99
|
export function updateLogClientStatus(db, logId, clientStatusCode) {
|
|
99
|
-
db
|
|
100
|
+
getCachedStmt(db, "UPDATE request_logs SET client_status_code = ? WHERE id = ?").run(clientStatusCode, logId);
|
|
100
101
|
}
|
|
101
102
|
export function deleteLogsBefore(db, beforeDate) {
|
|
102
103
|
const changes = db.prepare("DELETE FROM request_logs WHERE created_at < ?").run(beforeDate).changes;
|
|
@@ -107,17 +108,20 @@ export function deleteLogsBefore(db, beforeDate) {
|
|
|
107
108
|
}
|
|
108
109
|
/** 每行元数据(数字列+索引)的估算字节数 */
|
|
109
110
|
const ROW_METADATA_BYTES = 500;
|
|
110
|
-
/**
|
|
111
|
+
/** 采样估算 request_logs 表占用字节数(避免全表 SUM 扫描) */
|
|
111
112
|
export function estimateLogTableSize(db) {
|
|
112
|
-
const
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
), 0)
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
113
|
+
const countRow = db.prepare("SELECT COUNT(*) as cnt FROM request_logs").get();
|
|
114
|
+
if (countRow.cnt === 0)
|
|
115
|
+
return 0;
|
|
116
|
+
// 采样最近 100 行,计算平均行大小
|
|
117
|
+
const samples = db.prepare(`
|
|
118
|
+
SELECT COALESCE(length(client_request), 0) + COALESCE(length(upstream_request), 0) +
|
|
119
|
+
COALESCE(length(upstream_response), 0) + COALESCE(length(stream_text_content), 0) +
|
|
120
|
+
COALESCE(length(error_message), 0) + COALESCE(length(pipeline_snapshot), 0) + ? AS row_size
|
|
121
|
+
FROM request_logs ORDER BY created_at DESC LIMIT 100
|
|
122
|
+
`).all(ROW_METADATA_BYTES);
|
|
123
|
+
const avgRowSize = samples.reduce((s, r) => s + r.row_size, 0) / samples.length;
|
|
124
|
+
return Math.round(avgRowSize * countRow.cnt);
|
|
121
125
|
}
|
|
122
126
|
const DELETE_BATCH_SIZE = 1000;
|
|
123
127
|
/** 删除最旧的日志,保留 keepCount 条,返回实际删除条数。分批删除避免长时间锁表 */
|
|
@@ -164,15 +168,28 @@ export function getRequestLogsGrouped(db, options) {
|
|
|
164
168
|
const total = db.prepare(`SELECT COUNT(*) as count FROM request_logs rl WHERE ${where}`).get(...params).count;
|
|
165
169
|
const offset = (options.page - 1) * options.limit;
|
|
166
170
|
const data = db
|
|
167
|
-
.prepare(`
|
|
168
|
-
|
|
169
|
-
|
|
171
|
+
.prepare(`WITH page_ids AS (
|
|
172
|
+
SELECT rl.id FROM request_logs rl
|
|
173
|
+
${LOG_LIST_JOIN}
|
|
174
|
+
WHERE ${where}
|
|
175
|
+
ORDER BY rl.created_at DESC LIMIT ? OFFSET ?
|
|
176
|
+
)
|
|
177
|
+
SELECT ${LOG_LIST_SELECT},
|
|
178
|
+
COALESCE(child.cnt, 0) AS child_count
|
|
179
|
+
FROM page_ids pg
|
|
180
|
+
JOIN request_logs rl ON rl.id = pg.id
|
|
170
181
|
${LOG_LIST_JOIN}
|
|
171
|
-
|
|
182
|
+
LEFT JOIN (
|
|
183
|
+
SELECT original_request_id, COUNT(*) AS cnt
|
|
184
|
+
FROM request_logs
|
|
185
|
+
WHERE original_request_id IN (SELECT id FROM page_ids)
|
|
186
|
+
GROUP BY original_request_id
|
|
187
|
+
) child ON child.original_request_id = rl.id
|
|
188
|
+
ORDER BY rl.created_at DESC`)
|
|
172
189
|
.all(...params, options.limit, offset);
|
|
173
190
|
return { data, total };
|
|
174
191
|
}
|
|
175
192
|
/** 后续 pipeline 阶段完成后,回写 snapshot 到已有日志 */
|
|
176
193
|
export function updateLogPipelineSnapshot(db, logId, snapshot) {
|
|
177
|
-
db
|
|
194
|
+
getCachedStmt(db, "UPDATE request_logs SET pipeline_snapshot = ? WHERE id = ?").run(snapshot, logId);
|
|
178
195
|
}
|
package/dist/db/metrics.js
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { randomUUID } from "crypto";
|
|
2
2
|
import { MS_PER_SECOND } from "../core/constants.js";
|
|
3
|
+
import { getCachedStmt } from "./helpers.js";
|
|
3
4
|
export function insertMetrics(db, m) {
|
|
4
5
|
const id = randomUUID();
|
|
5
|
-
db
|
|
6
|
+
getCachedStmt(db, `INSERT INTO request_metrics (id, request_log_id, provider_id, backend_model, api_type, router_key_id, status_code,
|
|
6
7
|
input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, ttft_ms, total_duration_ms, tokens_per_second, stop_reason, is_complete, input_tokens_estimated,
|
|
7
8
|
client_type, cache_read_tokens_estimated,
|
|
8
9
|
thinking_tokens, text_tokens, tool_use_tokens, thinking_duration_ms,
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
-- 覆盖 provider_id 过滤 + 时间范围分页
|
|
2
|
+
CREATE INDEX IF NOT EXISTS idx_request_logs_provider_id ON request_logs(provider_id);
|
|
3
|
+
CREATE INDEX IF NOT EXISTS idx_request_logs_created_at_provider ON request_logs(created_at DESC, provider_id);
|
|
4
|
+
CREATE INDEX IF NOT EXISTS idx_request_logs_created_at_router_key ON request_logs(created_at DESC, router_key_id);
|
|
5
|
+
|
|
6
|
+
-- 覆盖按密钥过滤的聚合查询
|
|
7
|
+
CREATE INDEX IF NOT EXISTS idx_metrics_router_key ON request_metrics(router_key_id);
|
|
8
|
+
CREATE INDEX IF NOT EXISTS idx_metrics_created_at_router_key ON request_metrics(created_at, router_key_id);
|
package/dist/db/settings.js
CHANGED
|
@@ -1,9 +1,26 @@
|
|
|
1
|
+
import { getCachedStmt } from "./helpers.js";
|
|
2
|
+
// TTL 缓存:WeakMap 按 db 实例隔离,确保测试中 :memory: db 互不干扰
|
|
3
|
+
const settingsCache = new WeakMap();
|
|
4
|
+
const CACHE_TTL_MS = 30_000;
|
|
1
5
|
export function getSetting(db, key) {
|
|
2
|
-
|
|
3
|
-
|
|
6
|
+
let cache = settingsCache.get(db);
|
|
7
|
+
if (!cache) {
|
|
8
|
+
cache = new Map();
|
|
9
|
+
settingsCache.set(db, cache);
|
|
10
|
+
}
|
|
11
|
+
const cached = cache.get(key);
|
|
12
|
+
if (cached && Date.now() < cached.expiresAt)
|
|
13
|
+
return cached.value;
|
|
14
|
+
const row = getCachedStmt(db, "SELECT value FROM settings WHERE key = ?").get(key);
|
|
15
|
+
const value = row?.value ?? null;
|
|
16
|
+
cache.set(key, { value, expiresAt: Date.now() + CACHE_TTL_MS });
|
|
17
|
+
return value;
|
|
4
18
|
}
|
|
5
19
|
export function setSetting(db, key, value) {
|
|
6
|
-
db
|
|
20
|
+
getCachedStmt(db, "INSERT OR REPLACE INTO settings (key, value) VALUES (?, ?)").run(key, value);
|
|
21
|
+
const cache = settingsCache.get(db);
|
|
22
|
+
if (cache)
|
|
23
|
+
cache.delete(key);
|
|
7
24
|
}
|
|
8
25
|
export function isInitialized(db) {
|
|
9
26
|
return getSetting(db, "initialized") === "true";
|
|
@@ -40,7 +57,7 @@ export function setConfigSyncSource(db, source) {
|
|
|
40
57
|
setSetting(db, "config_sync_source", source);
|
|
41
58
|
}
|
|
42
59
|
export function getDetailLogEnabled(db) {
|
|
43
|
-
const row = db
|
|
60
|
+
const row = getCachedStmt(db, "SELECT value FROM settings WHERE key = ?").get("detail_log_enabled");
|
|
44
61
|
return row ? row.value !== "0" : true;
|
|
45
62
|
}
|
|
46
63
|
export function getTokenEstimationEnabled(db) {
|
|
@@ -52,7 +69,7 @@ export function setTokenEstimationEnabled(db, enabled) {
|
|
|
52
69
|
}
|
|
53
70
|
const DEFAULT_LOG_FILE_RETENTION_DAYS = 3;
|
|
54
71
|
export function getLogFileRetentionDays(db) {
|
|
55
|
-
const row = db
|
|
72
|
+
const row = getCachedStmt(db, "SELECT value FROM settings WHERE key = ?").get("log_file_retention_days");
|
|
56
73
|
return row ? parseInt(row.value, 10) : DEFAULT_LOG_FILE_RETENTION_DAYS;
|
|
57
74
|
}
|
|
58
75
|
const DEFAULT_CLIENT_SESSION_HEADERS = [
|
package/dist/index.js
CHANGED
|
@@ -330,6 +330,8 @@ export async function buildApp(options) {
|
|
|
330
330
|
proxyAgentFactory.invalidateAll();
|
|
331
331
|
const sessionTracker = container.resolve(SERVICE_KEYS.sessionTracker);
|
|
332
332
|
sessionTracker.stop();
|
|
333
|
+
// Flush LogFileWriter 的 WriteStream 缓冲数据到磁盘
|
|
334
|
+
await logFileWriter?.stop();
|
|
333
335
|
// 等待活跃代理请求自然完成,超时后强制关闭所有连接。
|
|
334
336
|
// 先调用 app.close() 停止接受新连接并等待现有连接结束,
|
|
335
337
|
// 如果 2 秒内未完成则调用 closeAllConnections() 强制断开,防止 SSE 长连接导致无限等待。
|
|
@@ -13,12 +13,17 @@ export declare class MetricsExtractor {
|
|
|
13
13
|
private stopReason;
|
|
14
14
|
private firstContentReceived;
|
|
15
15
|
private complete;
|
|
16
|
-
|
|
16
|
+
/** Buffer 容量上限,超过后停止 push 避免内存膨胀 */
|
|
17
|
+
private static readonly MAX_BUFFER_SIZE;
|
|
18
|
+
private thinkingChunks;
|
|
19
|
+
private thinkingTotalLength;
|
|
17
20
|
private thinkingStreamStartTime;
|
|
18
21
|
private thinkingStreamEndTime;
|
|
19
|
-
private
|
|
22
|
+
private textChunks;
|
|
23
|
+
private textTotalLength;
|
|
20
24
|
private textStreamStartTime;
|
|
21
|
-
private
|
|
25
|
+
private toolUseChunks;
|
|
26
|
+
private toolUseTotalLength;
|
|
22
27
|
private toolUseStreamStartTime;
|
|
23
28
|
constructor(apiType: "openai" | "openai-responses" | "anthropic", requestStartTime: number);
|
|
24
29
|
processEvent(event: SSEEvent): void;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// TODO: 当文件超过 400 行时拆分为 metrics-streaming.ts(流式事件处理 + TPS 计算)和 metrics-extractor.ts(非流式 + 类型)
|
|
2
2
|
import { MS_PER_SECOND } from "../core/constants.js";
|
|
3
|
-
import {
|
|
3
|
+
import { countTokens } from "../utils/token-counter.js";
|
|
4
4
|
export class MetricsExtractor {
|
|
5
5
|
apiType;
|
|
6
6
|
requestStartTime;
|
|
@@ -14,13 +14,18 @@ export class MetricsExtractor {
|
|
|
14
14
|
stopReason = null;
|
|
15
15
|
firstContentReceived = false;
|
|
16
16
|
complete = false;
|
|
17
|
-
// --- Phase content buffers + timing ---
|
|
18
|
-
|
|
17
|
+
// --- Phase content buffers (array-based) + timing ---
|
|
18
|
+
/** Buffer 容量上限,超过后停止 push 避免内存膨胀 */
|
|
19
|
+
static MAX_BUFFER_SIZE = 500_000; // eslint-disable-line no-magic-numbers
|
|
20
|
+
thinkingChunks = [];
|
|
21
|
+
thinkingTotalLength = 0;
|
|
19
22
|
thinkingStreamStartTime = null;
|
|
20
23
|
thinkingStreamEndTime = null;
|
|
21
|
-
|
|
24
|
+
textChunks = [];
|
|
25
|
+
textTotalLength = 0;
|
|
22
26
|
textStreamStartTime = null;
|
|
23
|
-
|
|
27
|
+
toolUseChunks = [];
|
|
28
|
+
toolUseTotalLength = 0;
|
|
24
29
|
toolUseStreamStartTime = null;
|
|
25
30
|
constructor(apiType, requestStartTime) {
|
|
26
31
|
this.apiType = apiType;
|
|
@@ -49,7 +54,8 @@ export class MetricsExtractor {
|
|
|
49
54
|
let thinkingDurationMs = null;
|
|
50
55
|
let textTokens = null;
|
|
51
56
|
let toolUseTokens = null;
|
|
52
|
-
const
|
|
57
|
+
const thinkingContent = this.thinkingChunks.join("");
|
|
58
|
+
const hasThinking = thinkingContent.length > 0;
|
|
53
59
|
if (this.streamEndTime !== null &&
|
|
54
60
|
this.outputTokens !== null) {
|
|
55
61
|
// total_duration: T6 - T0 (proxy end-to-end, not just stream window)
|
|
@@ -58,7 +64,7 @@ export class MetricsExtractor {
|
|
|
58
64
|
totalTps = this.outputTokens / (totalDurationMs / MS_PER_SECOND);
|
|
59
65
|
}
|
|
60
66
|
if (hasThinking) {
|
|
61
|
-
thinkingTokens =
|
|
67
|
+
thinkingTokens = countTokens(thinkingContent);
|
|
62
68
|
// thinking_duration: T3 - T0 (includes network RTT + generation)
|
|
63
69
|
if (this.thinkingStreamEndTime !== null) {
|
|
64
70
|
thinkingDurationMs = this.thinkingStreamEndTime - this.requestStartTime;
|
|
@@ -81,11 +87,13 @@ export class MetricsExtractor {
|
|
|
81
87
|
}
|
|
82
88
|
}
|
|
83
89
|
// content token counts (for analysis only)
|
|
84
|
-
|
|
85
|
-
|
|
90
|
+
const textContent = this.textChunks.join("");
|
|
91
|
+
if (textContent.length > 0) {
|
|
92
|
+
textTokens = countTokens(textContent);
|
|
86
93
|
}
|
|
87
|
-
|
|
88
|
-
|
|
94
|
+
const toolUseContent = this.toolUseChunks.join("");
|
|
95
|
+
if (toolUseContent.length > 0) {
|
|
96
|
+
toolUseTokens = countTokens(toolUseContent);
|
|
89
97
|
}
|
|
90
98
|
}
|
|
91
99
|
return {
|
|
@@ -140,7 +148,8 @@ export class MetricsExtractor {
|
|
|
140
148
|
this.firstContentReceived = true;
|
|
141
149
|
this.ttftMs = Date.now() - this.requestStartTime;
|
|
142
150
|
}
|
|
143
|
-
this.
|
|
151
|
+
this.textChunks.push(delta);
|
|
152
|
+
this.textTotalLength += delta.length;
|
|
144
153
|
}
|
|
145
154
|
// Track completion
|
|
146
155
|
if (type === "response.completed" || type === "response.incomplete") {
|
|
@@ -193,8 +202,9 @@ export class MetricsExtractor {
|
|
|
193
202
|
this.thinkingStreamStartTime = Date.now();
|
|
194
203
|
}
|
|
195
204
|
const thinking = delta.thinking ?? "";
|
|
196
|
-
if (thinking) {
|
|
197
|
-
this.
|
|
205
|
+
if (thinking && this.thinkingTotalLength < MetricsExtractor.MAX_BUFFER_SIZE) {
|
|
206
|
+
this.thinkingChunks.push(thinking);
|
|
207
|
+
this.thinkingTotalLength += thinking.length;
|
|
198
208
|
this.thinkingStreamEndTime = Date.now();
|
|
199
209
|
}
|
|
200
210
|
}
|
|
@@ -202,16 +212,18 @@ export class MetricsExtractor {
|
|
|
202
212
|
if (this.textStreamStartTime === null) {
|
|
203
213
|
this.textStreamStartTime = Date.now();
|
|
204
214
|
}
|
|
205
|
-
if (delta.text) {
|
|
206
|
-
this.
|
|
215
|
+
if (delta.text && this.textTotalLength < MetricsExtractor.MAX_BUFFER_SIZE) {
|
|
216
|
+
this.textChunks.push(delta.text);
|
|
217
|
+
this.textTotalLength += delta.text.length;
|
|
207
218
|
}
|
|
208
219
|
}
|
|
209
220
|
else if (delta?.type === "input_json_delta") {
|
|
210
221
|
if (this.toolUseStreamStartTime === null) {
|
|
211
222
|
this.toolUseStreamStartTime = Date.now();
|
|
212
223
|
}
|
|
213
|
-
if (delta.partial_json) {
|
|
214
|
-
this.
|
|
224
|
+
if (delta.partial_json && this.toolUseTotalLength < MetricsExtractor.MAX_BUFFER_SIZE) {
|
|
225
|
+
this.toolUseChunks.push(delta.partial_json);
|
|
226
|
+
this.toolUseTotalLength += delta.partial_json.length;
|
|
215
227
|
}
|
|
216
228
|
}
|
|
217
229
|
}
|
|
@@ -252,8 +264,9 @@ export class MetricsExtractor {
|
|
|
252
264
|
this.ttftMs = Date.now() - this.requestStartTime;
|
|
253
265
|
this.textStreamStartTime = Date.now();
|
|
254
266
|
}
|
|
255
|
-
if (delta?.content) {
|
|
256
|
-
this.
|
|
267
|
+
if (delta?.content && this.textTotalLength < MetricsExtractor.MAX_BUFFER_SIZE) {
|
|
268
|
+
this.textChunks.push(delta.content);
|
|
269
|
+
this.textTotalLength += delta.content.length;
|
|
257
270
|
}
|
|
258
271
|
if (choice.finish_reason) {
|
|
259
272
|
this.stopReason = choice.finish_reason;
|
|
@@ -9,8 +9,11 @@ export class SSEParser {
|
|
|
9
9
|
if (this.isDone)
|
|
10
10
|
return [];
|
|
11
11
|
this.buffer += chunk;
|
|
12
|
-
// SSE 规范允许
|
|
13
|
-
|
|
12
|
+
// SSE 规范允许 \\r\\n 行尾,统一为 \\n
|
|
13
|
+
// 绝大多数 chunk 不含 \\r,跳过全局 replace
|
|
14
|
+
if (this.buffer.includes('\r')) {
|
|
15
|
+
this.buffer = this.buffer.replace(/\r\n/g, "\n");
|
|
16
|
+
}
|
|
14
17
|
return this.drainEvents();
|
|
15
18
|
}
|
|
16
19
|
flush() {
|
package/dist/middleware/auth.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
import { createHash
|
|
1
|
+
import { createHash } from "crypto";
|
|
2
2
|
import fp from "fastify-plugin";
|
|
3
3
|
import { isInitialized } from "../db/settings.js";
|
|
4
|
-
import { insertRequestLog } from "../db/logs.js";
|
|
5
4
|
import { getProxyApiType, HTTP_SERVICE_UNAVAILABLE } from "../core/constants.js";
|
|
6
5
|
const SKIP_PATHS = ["/health", "/admin"];
|
|
7
6
|
const HTTP_UNAUTHORIZED = 401;
|
|
@@ -19,20 +18,6 @@ function unauthorizedReply(reply) {
|
|
|
19
18
|
},
|
|
20
19
|
});
|
|
21
20
|
}
|
|
22
|
-
function logRejectedAuth(db, apiType, statusCode, errorMessage, request) {
|
|
23
|
-
insertRequestLog(db, {
|
|
24
|
-
id: randomUUID(),
|
|
25
|
-
api_type: apiType,
|
|
26
|
-
model: null,
|
|
27
|
-
provider_id: null,
|
|
28
|
-
status_code: statusCode,
|
|
29
|
-
latency_ms: 0,
|
|
30
|
-
is_stream: 0,
|
|
31
|
-
error_message: errorMessage,
|
|
32
|
-
created_at: new Date().toISOString(),
|
|
33
|
-
client_request: JSON.stringify({ method: request.method, ip: request.ip, headers: request.headers }),
|
|
34
|
-
});
|
|
35
|
-
}
|
|
36
21
|
const authMiddlewareRaw = (app, options, done) => {
|
|
37
22
|
const stmt = options.db.prepare("SELECT id, name, allowed_models FROM router_keys WHERE key_hash = ? AND is_active = 1");
|
|
38
23
|
app.addHook("onRequest", async (request, reply) => {
|
|
@@ -47,7 +32,7 @@ const authMiddlewareRaw = (app, options, done) => {
|
|
|
47
32
|
// 未初始化时代理层不可用
|
|
48
33
|
if (!isInitialized(options.db)) {
|
|
49
34
|
if (proxyApiType) {
|
|
50
|
-
|
|
35
|
+
request.log.info({ method: request.method, url: request.url, ip: request.ip }, `Rejected: service not initialized [${proxyApiType}]`);
|
|
51
36
|
}
|
|
52
37
|
reply.code(HTTP_SERVICE_UNAVAILABLE).send({ error: { message: "Service not initialized" } });
|
|
53
38
|
return reply;
|
|
@@ -66,7 +51,7 @@ const authMiddlewareRaw = (app, options, done) => {
|
|
|
66
51
|
}
|
|
67
52
|
if (!token) {
|
|
68
53
|
if (proxyApiType) {
|
|
69
|
-
|
|
54
|
+
request.log.info({ method: request.method, url: request.url, ip: request.ip }, `Rejected: no API key [${proxyApiType}]`);
|
|
70
55
|
}
|
|
71
56
|
unauthorizedReply(reply);
|
|
72
57
|
return reply;
|
|
@@ -75,12 +60,19 @@ const authMiddlewareRaw = (app, options, done) => {
|
|
|
75
60
|
const row = stmt.get(hash);
|
|
76
61
|
if (!row) {
|
|
77
62
|
if (proxyApiType) {
|
|
78
|
-
|
|
63
|
+
request.log.info({ method: request.method, url: request.url, ip: request.ip }, `Rejected: invalid API key [${proxyApiType}]`);
|
|
79
64
|
}
|
|
80
65
|
unauthorizedReply(reply);
|
|
81
66
|
return reply;
|
|
82
67
|
}
|
|
83
|
-
|
|
68
|
+
let parsedAllowedModels = null;
|
|
69
|
+
if (row.allowed_models) {
|
|
70
|
+
try {
|
|
71
|
+
parsedAllowedModels = JSON.parse(row.allowed_models);
|
|
72
|
+
}
|
|
73
|
+
catch { /* JSON 解析失败时 allowed_models 保持为 null,允许所有模型 */ } // eslint-disable-line taste/no-silent-catch
|
|
74
|
+
}
|
|
75
|
+
request.routerKey = { id: row.id, name: row.name, allowed_models: parsedAllowedModels };
|
|
84
76
|
});
|
|
85
77
|
done();
|
|
86
78
|
};
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
import { randomUUID } from "crypto";
|
|
12
12
|
import fp from "fastify-plugin";
|
|
13
13
|
import { insertRequestLog, getAllProviders } from "../../db/index.js";
|
|
14
|
+
import { parseModels } from "../../config/model-context.js";
|
|
14
15
|
import { createErrorFormatter } from "../proxy-core.js";
|
|
15
16
|
import { createOrchestrator } from "../orchestration/orchestrator.js";
|
|
16
17
|
import { HTTP_OK, HTTP_BAD_GATEWAY, HTTP_CLIENT_CLOSED, MS_PER_SECOND } from "../../core/constants.js";
|
|
@@ -24,7 +25,6 @@ import { HTTP_UNPROCESSABLE_ENTITY } from "../../core/constants.js";
|
|
|
24
25
|
import { PipelineAbort } from "../pipeline/types.js";
|
|
25
26
|
import { applyToolRoundLimit } from "../patch/tool-round-limiter.js";
|
|
26
27
|
import { extractLastToolUse } from "./proxy-handler-utils.js";
|
|
27
|
-
import { parseModels } from "../../config/model-context.js";
|
|
28
28
|
// ---------- Models handler (shared across openai/anthropic) ----------
|
|
29
29
|
const ANTHROPIC_DEFAULT_PAGE_SIZE = 20;
|
|
30
30
|
const ANTHROPIC_MAX_PAGE_SIZE = 1000;
|
|
@@ -44,7 +44,11 @@ function handleModelsRequest(db) {
|
|
|
44
44
|
continue;
|
|
45
45
|
}
|
|
46
46
|
}
|
|
47
|
-
|
|
47
|
+
// 如果请求的 key 配置了 allowed_models 白名单,则过滤
|
|
48
|
+
const allowedModels = request.routerKey?.allowed_models;
|
|
49
|
+
const sortedIds = allowedModels
|
|
50
|
+
? [...modelMeta.keys()].filter(id => allowedModels.includes(id)).sort()
|
|
51
|
+
: [...modelMeta.keys()].sort();
|
|
48
52
|
const isAnthropicFormat = !!request.headers["anthropic-version"];
|
|
49
53
|
if (isAnthropicFormat) {
|
|
50
54
|
const query = request.query;
|