aicodeswitch 5.2.10 → 5.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,377 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ exports.ServicePerformanceTracker = void 0;
16
+ /**
17
+ * ServicePerformanceTracker - 服务性能统计全局聚合模块
18
+ *
19
+ * 设计要点(详见 docs/PRD/service-performance-tpm.md):
20
+ * - 全局统计,与 AUTH 模式无关;普通路由 + AccessKey 路由流量统一采集。
21
+ * - 两个数据点:TTFT(首 Token 返回时间)、TPM(每分钟吐 token 数)。
22
+ * - 三级聚合:供应商 → 服务 → 模型;上卷基于 sum+count 加权,avg 由 sum/count 派生。
23
+ * - 走势按小时桶(键 "YYYY-MM-DD HH",保留 72 桶)。
24
+ * - 内存增量 + debounce(5s) flush + 原子写(tmp+rename)。
25
+ *
26
+ * recordPerformance 为纯内存同步操作,可在请求完成路径无开销调用。
27
+ */
28
+ const path_1 = __importDefault(require("path"));
29
+ const promises_1 = __importDefault(require("fs/promises"));
30
+ const HOURLY_BUCKET_LIMIT = 72; // 保留最近 72 个小时桶(约 3 天)
31
+ class ServicePerformanceTracker {
32
+ constructor(dataPath) {
33
+ Object.defineProperty(this, "dataPath", {
34
+ enumerable: true,
35
+ configurable: true,
36
+ writable: true,
37
+ value: void 0
38
+ });
39
+ Object.defineProperty(this, "file", {
40
+ enumerable: true,
41
+ configurable: true,
42
+ writable: true,
43
+ value: { vendors: {} }
44
+ });
45
+ Object.defineProperty(this, "dirty", {
46
+ enumerable: true,
47
+ configurable: true,
48
+ writable: true,
49
+ value: false
50
+ });
51
+ Object.defineProperty(this, "flushTimer", {
52
+ enumerable: true,
53
+ configurable: true,
54
+ writable: true,
55
+ value: null
56
+ });
57
+ Object.defineProperty(this, "FLUSH_INTERVAL", {
58
+ enumerable: true,
59
+ configurable: true,
60
+ writable: true,
61
+ value: 5000
62
+ }); // 5s
63
+ this.dataPath = dataPath;
64
+ }
65
+ /** 初始化:加载已有数据文件 */
66
+ initialize() {
67
+ return __awaiter(this, void 0, void 0, function* () {
68
+ try {
69
+ const raw = yield promises_1.default.readFile(this.filePath, 'utf-8');
70
+ const parsed = JSON.parse(raw);
71
+ if (parsed && parsed.vendors)
72
+ this.file = parsed;
73
+ }
74
+ catch (_a) {
75
+ // 首次启动或文件损坏,使用空桶
76
+ this.file = { vendors: {} };
77
+ }
78
+ });
79
+ }
80
+ /**
81
+ * 记录一次请求的性能数据点(三级同步聚合)。
82
+ * 纯内存操作,不阻塞调用方。
83
+ */
84
+ recordPerformance(vendorId, vendorName, serviceId, serviceName, model, metrics, timestamp = Date.now()) {
85
+ if (!vendorId || !serviceId || !model)
86
+ return;
87
+ const hour = this.formatHourKey(timestamp);
88
+ // 模型级(最细,维护极值)
89
+ const modelAgg = this.ensureModel(vendorId, vendorName, serviceId, serviceName, model);
90
+ this.accumulate(modelAgg, metrics, hour, /* withExtremes */ true);
91
+ // 服务级上卷
92
+ const serviceAgg = this.ensureService(vendorId, vendorName, serviceId, serviceName);
93
+ this.accumulate(serviceAgg, metrics, hour, /* withExtremes */ false);
94
+ // 供应商级上卷
95
+ const vendorAgg = this.ensureVendor(vendorId, vendorName);
96
+ this.accumulate(vendorAgg, metrics, hour, /* withExtremes */ false);
97
+ this.dirty = true;
98
+ }
99
+ // ---------------- 读取(派生视图) ----------------
100
+ /** 全部供应商一览(vendorRollup 派生) */
101
+ getVendorsOverview() {
102
+ return Object.entries(this.file.vendors).map(([vendorId, v]) => ({
103
+ vendorId,
104
+ vendorName: v.vendorName,
105
+ derived: this.derive(v.vendorRollup),
106
+ }));
107
+ }
108
+ /** 全部 API 服务平铺一览(含所属供应商),用于「API 服务」维度对比 */
109
+ getServicesOverview() {
110
+ const out = [];
111
+ for (const [vendorId, v] of Object.entries(this.file.vendors)) {
112
+ for (const [serviceId, s] of Object.entries(v.services)) {
113
+ out.push({
114
+ serviceId,
115
+ serviceName: s.serviceName,
116
+ vendorId,
117
+ vendorName: v.vendorName,
118
+ derived: this.derive(s.serviceRollup),
119
+ });
120
+ }
121
+ }
122
+ return out;
123
+ }
124
+ /** 某供应商:自身 rollup + 其下所有服务 rollup */
125
+ getVendorDetail(vendorId) {
126
+ const v = this.file.vendors[vendorId];
127
+ if (!v)
128
+ return null;
129
+ return {
130
+ vendorName: v.vendorName,
131
+ derived: this.derive(v.vendorRollup),
132
+ hourly: this.trendFrom(vendorId, undefined, undefined),
133
+ services: Object.entries(v.services).map(([serviceId, s]) => ({
134
+ serviceId,
135
+ serviceName: s.serviceName,
136
+ derived: this.derive(s.serviceRollup),
137
+ })),
138
+ };
139
+ }
140
+ /** 某服务:自身 rollup + 其下所有模型 */
141
+ getServiceDetail(serviceId) {
142
+ const found = this.locateService(serviceId);
143
+ if (!found)
144
+ return null;
145
+ const { vendorId, vendorName, serviceEntry } = found;
146
+ return {
147
+ vendorId,
148
+ vendorName,
149
+ serviceName: serviceEntry.serviceName,
150
+ derived: this.derive(serviceEntry.serviceRollup),
151
+ hourly: this.trendFrom(vendorId, serviceId, undefined),
152
+ models: Object.entries(serviceEntry.models).map(([model, agg]) => ({
153
+ model,
154
+ derived: this.derive(agg),
155
+ })),
156
+ };
157
+ }
158
+ /** 单模型:派生 + 小时走势 + 极值 */
159
+ getModelDetail(serviceId, model) {
160
+ const found = this.locateService(serviceId);
161
+ if (!found)
162
+ return null;
163
+ const agg = found.serviceEntry.models[model];
164
+ if (!agg)
165
+ return null;
166
+ return {
167
+ derived: this.derive(agg),
168
+ hourly: this.trendFrom(found.vendorId, serviceId, model),
169
+ };
170
+ }
171
+ // ---------------- 持久化 ----------------
172
+ flush() {
173
+ return __awaiter(this, void 0, void 0, function* () {
174
+ if (!this.dirty)
175
+ return;
176
+ this.dirty = false;
177
+ yield this.save();
178
+ });
179
+ }
180
+ startAutoFlush() {
181
+ if (this.flushTimer)
182
+ return;
183
+ this.flushTimer = setInterval(() => {
184
+ this.flush().catch(err => console.error('[PerformanceTracker] Auto flush error:', err));
185
+ }, this.FLUSH_INTERVAL);
186
+ }
187
+ stopAutoFlush() {
188
+ if (this.flushTimer) {
189
+ clearInterval(this.flushTimer);
190
+ this.flushTimer = null;
191
+ }
192
+ }
193
+ get filePath() {
194
+ return path_1.default.join(this.dataPath, 'service-performance.json');
195
+ }
196
+ save() {
197
+ return __awaiter(this, void 0, void 0, function* () {
198
+ const tmp = this.filePath + '.tmp';
199
+ yield promises_1.default.mkdir(path_1.default.dirname(this.filePath), { recursive: true });
200
+ yield promises_1.default.writeFile(tmp, JSON.stringify(this.file, null, 2), 'utf-8');
201
+ yield promises_1.default.rename(tmp, this.filePath);
202
+ });
203
+ }
204
+ // ---------------- 内部:结构创建 ----------------
205
+ ensureVendor(vendorId, vendorName) {
206
+ let v = this.file.vendors[vendorId];
207
+ if (!v) {
208
+ v = {
209
+ vendorName,
210
+ vendorRollup: this.emptyAggregate(),
211
+ services: {},
212
+ };
213
+ this.file.vendors[vendorId] = v;
214
+ }
215
+ if (vendorName && !v.vendorName)
216
+ v.vendorName = vendorName;
217
+ return v.vendorRollup;
218
+ }
219
+ ensureService(vendorId, vendorName, serviceId, serviceName) {
220
+ var _a;
221
+ const v = (_a = this.file.vendors[vendorId]) !== null && _a !== void 0 ? _a : (this.file.vendors[vendorId] = {
222
+ vendorName, vendorRollup: this.emptyAggregate(), services: {},
223
+ });
224
+ let s = v.services[serviceId];
225
+ if (!s) {
226
+ s = {
227
+ serviceName,
228
+ serviceRollup: this.emptyAggregate(),
229
+ models: {},
230
+ updatedAt: Date.now(),
231
+ };
232
+ v.services[serviceId] = s;
233
+ }
234
+ if (serviceName && !s.serviceName)
235
+ s.serviceName = serviceName;
236
+ s.updatedAt = Date.now();
237
+ return s.serviceRollup;
238
+ }
239
+ ensureModel(vendorId, vendorName, serviceId, serviceName, model) {
240
+ // 确保供应商 + 服务节点存在(service rollup 由 recordPerformance 单独累加)
241
+ this.ensureService(vendorId, vendorName, serviceId, serviceName);
242
+ const s = this.file.vendors[vendorId].services[serviceId];
243
+ let m = s.models[model];
244
+ if (!m) {
245
+ m = this.emptyAggregate();
246
+ s.models[model] = m;
247
+ }
248
+ return m;
249
+ }
250
+ // ---------------- 内部:累加 ----------------
251
+ emptyAggregate() {
252
+ return {
253
+ precise: this.emptyBucket(),
254
+ estimated: this.emptyBucket(),
255
+ errorCount: 0,
256
+ hourly: {},
257
+ };
258
+ }
259
+ emptyBucket() {
260
+ return { count: 0, sumTtftMs: 0, sumTps: 0, totalOutputTokens: 0 };
261
+ }
262
+ accumulate(agg, m, hour, withExtremes) {
263
+ var _a;
264
+ if (m.isError) {
265
+ agg.errorCount += 1;
266
+ return;
267
+ }
268
+ const bucket = m.timingAccuracy === 'precise' ? agg.precise : agg.estimated;
269
+ const hasTtft = m.timingAccuracy === 'precise' && typeof m.ttftMs === 'number';
270
+ const hasTps = typeof m.tokensPerSecond === 'number';
271
+ bucket.count += 1;
272
+ if (hasTtft)
273
+ bucket.sumTtftMs += m.ttftMs;
274
+ if (hasTps)
275
+ bucket.sumTps += m.tokensPerSecond;
276
+ if (m.outputTokens)
277
+ bucket.totalOutputTokens += m.outputTokens;
278
+ // 小时桶(仅精确样本计入走势,避免估算样本污染)
279
+ if (m.timingAccuracy === 'precise') {
280
+ const hb = (_a = agg.hourly[hour]) !== null && _a !== void 0 ? _a : (agg.hourly[hour] = this.emptyBucket());
281
+ hb.count += 1;
282
+ if (hasTtft)
283
+ hb.sumTtftMs += m.ttftMs;
284
+ if (hasTps)
285
+ hb.sumTps += m.tokensPerSecond;
286
+ if (m.outputTokens)
287
+ hb.totalOutputTokens += m.outputTokens;
288
+ this.trimHourly(agg.hourly);
289
+ }
290
+ // 极值(仅模型级、仅精确样本)
291
+ if (withExtremes && m.timingAccuracy === 'precise') {
292
+ if (hasTtft) {
293
+ if (agg.minTtftMs === undefined || m.ttftMs < agg.minTtftMs)
294
+ agg.minTtftMs = m.ttftMs;
295
+ if (agg.maxTtftMs === undefined || m.ttftMs > agg.maxTtftMs)
296
+ agg.maxTtftMs = m.ttftMs;
297
+ }
298
+ if (hasTps) {
299
+ if (agg.minTps === undefined || m.tokensPerSecond < agg.minTps)
300
+ agg.minTps = m.tokensPerSecond;
301
+ if (agg.maxTps === undefined || m.tokensPerSecond > agg.maxTps)
302
+ agg.maxTps = m.tokensPerSecond;
303
+ }
304
+ }
305
+ }
306
+ trimHourly(hourly) {
307
+ const keys = Object.keys(hourly);
308
+ if (keys.length <= HOURLY_BUCKET_LIMIT)
309
+ return;
310
+ keys.sort(); // "YYYY-MM-DD HH" 字典序即时间序
311
+ const drop = keys.length - HOURLY_BUCKET_LIMIT;
312
+ for (let i = 0; i < drop; i++)
313
+ delete hourly[keys[i]];
314
+ }
315
+ // ---------------- 内部:派生 ----------------
316
+ derive(agg) {
317
+ const p = agg.precise;
318
+ const count = p.count;
319
+ const avgTtftMs = count > 0 ? p.sumTtftMs / count : 0;
320
+ const avgTps = count > 0 ? p.sumTps / count : 0;
321
+ return {
322
+ count,
323
+ avgTtftMs,
324
+ avgTpm: avgTps * 60,
325
+ minTtftMs: agg.minTtftMs,
326
+ maxTtftMs: agg.maxTtftMs,
327
+ minTps: agg.minTps,
328
+ maxTps: agg.maxTps,
329
+ errorCount: agg.errorCount,
330
+ totalOutputTokens: p.totalOutputTokens,
331
+ successRate: count + agg.errorCount > 0 ? count / (count + agg.errorCount) : 0,
332
+ };
333
+ }
334
+ trendFrom(vendorId, serviceId, model) {
335
+ var _a, _b;
336
+ const v = this.file.vendors[vendorId];
337
+ if (!v)
338
+ return [];
339
+ let agg;
340
+ if (model && serviceId) {
341
+ agg = (_a = v.services[serviceId]) === null || _a === void 0 ? void 0 : _a.models[model];
342
+ }
343
+ else if (serviceId) {
344
+ agg = (_b = v.services[serviceId]) === null || _b === void 0 ? void 0 : _b.serviceRollup;
345
+ }
346
+ else {
347
+ agg = v.vendorRollup;
348
+ }
349
+ if (!agg)
350
+ return [];
351
+ return Object.entries(agg.hourly)
352
+ .sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0))
353
+ .map(([hour, b]) => ({
354
+ hour,
355
+ count: b.count,
356
+ avgTtftMs: b.count > 0 ? b.sumTtftMs / b.count : 0,
357
+ avgTpm: b.count > 0 ? (b.sumTps / b.count) * 60 : 0,
358
+ }));
359
+ }
360
+ locateService(serviceId) {
361
+ for (const [vendorId, v] of Object.entries(this.file.vendors)) {
362
+ const s = v.services[serviceId];
363
+ if (s)
364
+ return { vendorId, vendorName: v.vendorName, serviceEntry: s };
365
+ }
366
+ return null;
367
+ }
368
+ formatHourKey(ts) {
369
+ const d = new Date(ts);
370
+ const yyyy = d.getFullYear();
371
+ const mm = String(d.getMonth() + 1).padStart(2, '0');
372
+ const dd = String(d.getDate()).padStart(2, '0');
373
+ const hh = String(d.getHours()).padStart(2, '0');
374
+ return `${yyyy}-${mm}-${dd} ${hh}`;
375
+ }
376
+ }
377
+ exports.ServicePerformanceTracker = ServicePerformanceTracker;
@@ -52,6 +52,7 @@ const crypto_1 = __importDefault(require("crypto"));
52
52
  const streaming_1 = require("./transformers/streaming");
53
53
  const model_rewrite_transform_1 = require("./transformers/model-rewrite-transform");
54
54
  const chunk_collector_1 = require("./transformers/chunk-collector");
55
+ const stream_timing_transform_1 = require("./transformers/stream-timing-transform");
55
56
  const rules_status_service_1 = require("./rules-status-service");
56
57
  const index_1 = require("./conversions/index");
57
58
  const stream_converter_adapter_1 = require("./conversions/stream-converter-adapter");
@@ -219,6 +220,12 @@ class ProxyServer {
219
220
  writable: true,
220
221
  value: null
221
222
  });
223
+ Object.defineProperty(this, "performanceTracker", {
224
+ enumerable: true,
225
+ configurable: true,
226
+ writable: true,
227
+ value: null
228
+ });
222
229
  // 请求去重缓存:用于防止同一个请求被重复计数(如网络重试)
223
230
  // key: requestHash, value: timestamp
224
231
  Object.defineProperty(this, "requestDedupeCache", {
@@ -253,6 +260,43 @@ class ProxyServer {
253
260
  getAccessKeyModule() {
254
261
  return this.accessKeyModule;
255
262
  }
263
+ /** 设置服务性能统计 tracker(全局,与 AUTH 无关) */
264
+ setPerformanceTracker(tracker) {
265
+ this.performanceTracker = tracker;
266
+ }
267
+ /** 获取服务性能统计 tracker */
268
+ getPerformanceTracker() {
269
+ return this.performanceTracker;
270
+ }
271
+ /**
272
+ * 采集一次请求的服务性能数据点(全局,与 AUTH 无关)。
273
+ * 在两条转发路径的 finalizeLog 公共点调用,覆盖 AccessKey + 普通路由。
274
+ * 流式:依据 streamTiming 精确计算 TTFT 与生成阶段吞吐;非流式:端到端估算(estimated)。
275
+ */
276
+ emitPerformance(params) {
277
+ const tracker = this.performanceTracker;
278
+ if (!tracker)
279
+ return;
280
+ const { statusCode, startTime, usage, streamTiming, service, vendorId, vendorName, model } = params;
281
+ const isError = statusCode >= 400;
282
+ const outputTokens = usage === null || usage === void 0 ? void 0 : usage.outputTokens;
283
+ const responseMs = Date.now() - startTime;
284
+ let ttftMs;
285
+ let tokensPerSecond;
286
+ let timingAccuracy = 'estimated';
287
+ if (streamTiming && streamTiming.hasTiming()) {
288
+ timingAccuracy = 'precise';
289
+ ttftMs = streamTiming.firstEventAt - startTime;
290
+ const generationMs = streamTiming.lastEventAt - streamTiming.firstEventAt;
291
+ if (outputTokens && generationMs > 0) {
292
+ tokensPerSecond = outputTokens / (generationMs / 1000);
293
+ }
294
+ }
295
+ else if (outputTokens && responseMs > 0) {
296
+ tokensPerSecond = outputTokens / (responseMs / 1000);
297
+ }
298
+ tracker.recordPerformance(vendorId !== null && vendorId !== void 0 ? vendorId : service.vendorId, vendorName, service.id, service.name, model, { ttftMs, tokensPerSecond, outputTokens, timingAccuracy, isError });
299
+ }
256
300
  /**
257
301
  * 从请求中提取 API Key(支持三种 Header,按优先级依次尝试)
258
302
  */
@@ -3544,12 +3588,20 @@ class ProxyServer {
3544
3588
  let downstreamResponseBodyForLog;
3545
3589
  let upstreamRequestForLog;
3546
3590
  let actuallyUsedProxy = false; // 标记是否实际使用了代理
3591
+ // 服务性能打点:流式分支会创建实例并注入 pipeline;finalizeLog 据此判定 precise/estimated
3592
+ let streamTiming = null;
3547
3593
  // 标记规则正在使用
3548
3594
  rules_status_service_1.rulesStatusBroadcaster.markRuleInUse(route.id, rule.id);
3549
3595
  const finalizeLog = (statusCode, error) => __awaiter(this, void 0, void 0, function* () {
3550
- var _a, _b, _c, _d, _e, _f, _g;
3596
+ var _a, _b, _c, _d, _e, _f, _g, _h;
3551
3597
  if (logged)
3552
3598
  return;
3599
+ // 服务性能数据点采集(全局,与 AUTH 无关;独立于 enableLogging 开关)
3600
+ this.emitPerformance({
3601
+ statusCode, startTime, usage: usageForLog, streamTiming,
3602
+ service, vendorId: vendor === null || vendor === void 0 ? void 0 : vendor.id, vendorName: vendor === null || vendor === void 0 ? void 0 : vendor.name,
3603
+ model: rule.targetModel || ((_a = req.body) === null || _a === void 0 ? void 0 : _a.model),
3604
+ });
3553
3605
  const isError = statusCode >= 400;
3554
3606
  if (isError) {
3555
3607
  console.log(`\x1b[31m[Request Error]\x1b[0m client=${targetType}, session=${sessionId}, rule=${rule.id}(${rule.contentType}), vendor=${(vendor === null || vendor === void 0 ? void 0 : vendor.name) || '-'}, service=${service.name}, status=${statusCode}, time=${Date.now() - startTime}ms${error ? `, error=${error}` : ''}`);
@@ -3558,7 +3610,7 @@ class ProxyServer {
3558
3610
  console.log(`\x1b[33m[Request End]\x1b[0m client=${targetType}, session=${sessionId}, rule=${rule.id}(${rule.contentType}), vendor=${(vendor === null || vendor === void 0 ? void 0 : vendor.name) || '-'}, service=${service.name}, status=${statusCode}, time=${Date.now() - startTime}ms`);
3559
3611
  }
3560
3612
  // 检查是否启用日志记录(默认启用)
3561
- const enableLogging = ((_a = this.config) === null || _a === void 0 ? void 0 : _a.enableLogging) !== false; // 默认为 true
3613
+ const enableLogging = ((_b = this.config) === null || _b === void 0 ? void 0 : _b.enableLogging) !== false; // 默认为 true
3562
3614
  if (!enableLogging) {
3563
3615
  return;
3564
3616
  }
@@ -3585,10 +3637,10 @@ class ProxyServer {
3585
3637
  targetType,
3586
3638
  targetServiceId: service.id,
3587
3639
  targetServiceName: service.name,
3588
- targetModel: rule.targetModel || ((_b = req.body) === null || _b === void 0 ? void 0 : _b.model),
3640
+ targetModel: rule.targetModel || ((_c = req.body) === null || _c === void 0 ? void 0 : _c.model),
3589
3641
  vendorId: service.vendorId,
3590
3642
  vendorName: vendor === null || vendor === void 0 ? void 0 : vendor.name,
3591
- requestModel: (_c = req.body) === null || _c === void 0 ? void 0 : _c.model,
3643
+ requestModel: (_d = req.body) === null || _d === void 0 ? void 0 : _d.model,
3592
3644
  tags: this.buildRelayTags(relayedForLog, useOriginalConfig),
3593
3645
  responseHeaders: responseHeadersForLog,
3594
3646
  responseBody: responseBodyForLog,
@@ -3622,7 +3674,7 @@ class ProxyServer {
3622
3674
  vendorName: vendor === null || vendor === void 0 ? void 0 : vendor.name,
3623
3675
  serviceId: service.id,
3624
3676
  serviceName: service.name,
3625
- model: rule.targetModel || ((_d = req.body) === null || _d === void 0 ? void 0 : _d.model),
3677
+ model: rule.targetModel || ((_e = req.body) === null || _e === void 0 ? void 0 : _e.model),
3626
3678
  totalTokens: sessionTokens,
3627
3679
  }).catch(err => console.error('[KeySession] upsert error:', err));
3628
3680
  }
@@ -3649,10 +3701,10 @@ class ProxyServer {
3649
3701
  targetType,
3650
3702
  targetServiceId: service.id,
3651
3703
  targetServiceName: service.name,
3652
- targetModel: rule.targetModel || ((_e = req.body) === null || _e === void 0 ? void 0 : _e.model),
3704
+ targetModel: rule.targetModel || ((_f = req.body) === null || _f === void 0 ? void 0 : _f.model),
3653
3705
  vendorId: service.vendorId,
3654
3706
  vendorName: vendor === null || vendor === void 0 ? void 0 : vendor.name,
3655
- requestModel: (_f = req.body) === null || _f === void 0 ? void 0 : _f.model,
3707
+ requestModel: (_g = req.body) === null || _g === void 0 ? void 0 : _g.model,
3656
3708
  tags: this.buildRelayTags(relayedForLog, useOriginalConfig),
3657
3709
  });
3658
3710
  }
@@ -3665,7 +3717,7 @@ class ProxyServer {
3665
3717
  const vendors = this.dbManager.getVendors();
3666
3718
  const vendorForLog = vendors.find(v => v.id === service.vendorId);
3667
3719
  // 从请求体中提取模型信息
3668
- const requestModel = (_g = req.body) === null || _g === void 0 ? void 0 : _g.model;
3720
+ const requestModel = (_h = req.body) === null || _h === void 0 ? void 0 : _h.model;
3669
3721
  const tagsForLog = this.buildRelayTags(relayedForLog, useOriginalConfig);
3670
3722
  if (extraTagsForLog.length > 0) {
3671
3723
  tagsForLog.push(...extraTagsForLog);
@@ -4061,6 +4113,8 @@ class ProxyServer {
4061
4113
  const downstreamChunkCollector = new chunk_collector_1.ChunkCollectorTransform(() => {
4062
4114
  rules_status_service_1.rulesStatusBroadcaster.refreshRuleInUse(route.id, rule.id);
4063
4115
  });
4116
+ // 服务性能打点:记录首/末 SSE 事件时间,用于 TTFT 与生成阶段吞吐
4117
+ streamTiming = new stream_timing_transform_1.StreamTimingTransform(startTime);
4064
4118
  const compactResponseSanitizer = rule.contentType === 'compact' && targetType === 'claude-code'
4065
4119
  ? new ClaudeCompactResponseSanitizer()
4066
4120
  : null;
@@ -4102,7 +4156,7 @@ class ProxyServer {
4102
4156
  ensureResponseWritable();
4103
4157
  return yield new Promise((resolve, reject) => {
4104
4158
  if (converter) {
4105
- const streamStages = [streamSource, parser, eventCollector, converter];
4159
+ const streamStages = [streamSource, parser, eventCollector, streamTiming, converter];
4106
4160
  if (compactResponseSanitizer) {
4107
4161
  streamStages.push(compactResponseSanitizer);
4108
4162
  }
@@ -4120,7 +4174,7 @@ class ProxyServer {
4120
4174
  });
4121
4175
  return;
4122
4176
  }
4123
- const streamStages = [streamSource, parser, eventCollector];
4177
+ const streamStages = [streamSource, parser, eventCollector, streamTiming];
4124
4178
  if (compactResponseSanitizer) {
4125
4179
  streamStages.push(compactResponseSanitizer);
4126
4180
  }
@@ -4608,6 +4662,8 @@ class ProxyServer {
4608
4662
  let responseBodyForLog;
4609
4663
  let downstreamResponseBodyForLog;
4610
4664
  let streamChunksForLog;
4665
+ // 服务性能打点:流式分支会创建实例并注入 pipeline
4666
+ let streamTiming = null;
4611
4667
  let responseHeadersForLog;
4612
4668
  let upstreamRequestForLog;
4613
4669
  let relayedForLog = true;
@@ -4629,10 +4685,16 @@ class ProxyServer {
4629
4685
  requestBody = (0, compact_1.normalizeClaudeCompactRequestBody)(requestBody);
4630
4686
  }
4631
4687
  const finalizeLog = (statusCode, error) => __awaiter(this, void 0, void 0, function* () {
4632
- var _a, _b, _c, _d, _e;
4688
+ var _a, _b, _c, _d, _e, _f;
4633
4689
  if (logged)
4634
4690
  return;
4635
4691
  logged = true;
4692
+ // 服务性能数据点采集(全局,与 AUTH 无关;独立于 enableLogging 开关)
4693
+ this.emitPerformance({
4694
+ statusCode, startTime, usage: usageForLog, streamTiming,
4695
+ service, vendorId: vendor === null || vendor === void 0 ? void 0 : vendor.id, vendorName: vendor === null || vendor === void 0 ? void 0 : vendor.name,
4696
+ model: rule.targetModel || ((_a = req.body) === null || _a === void 0 ? void 0 : _a.model),
4697
+ });
4636
4698
  // AccessKey 独立日志处理
4637
4699
  const accessKeyCtx = req._accessKeyCtx;
4638
4700
  if (accessKeyCtx && this.accessKeyModule) {
@@ -4651,10 +4713,10 @@ class ProxyServer {
4651
4713
  ruleId: rule.id,
4652
4714
  targetServiceId: service.id,
4653
4715
  targetServiceName: service.name,
4654
- targetModel: rule.targetModel || ((_a = req.body) === null || _a === void 0 ? void 0 : _a.model),
4716
+ targetModel: rule.targetModel || ((_b = req.body) === null || _b === void 0 ? void 0 : _b.model),
4655
4717
  vendorId: service.vendorId,
4656
4718
  vendorName: vendor === null || vendor === void 0 ? void 0 : vendor.name,
4657
- requestModel: (_b = req.body) === null || _b === void 0 ? void 0 : _b.model,
4719
+ requestModel: (_c = req.body) === null || _c === void 0 ? void 0 : _c.model,
4658
4720
  tags: this.buildRelayTags(relayedForLog),
4659
4721
  });
4660
4722
  if (usageForLog && statusCode < 400) {
@@ -4680,7 +4742,7 @@ class ProxyServer {
4680
4742
  vendorName: vendor === null || vendor === void 0 ? void 0 : vendor.name,
4681
4743
  serviceId: service.id,
4682
4744
  serviceName: service.name,
4683
- model: rule.targetModel || ((_c = req.body) === null || _c === void 0 ? void 0 : _c.model),
4745
+ model: rule.targetModel || ((_d = req.body) === null || _d === void 0 ? void 0 : _d.model),
4684
4746
  totalTokens: sessionTokens,
4685
4747
  }).catch(err => console.error('[KeySession] upsert error:', err));
4686
4748
  }
@@ -4704,10 +4766,10 @@ class ProxyServer {
4704
4766
  ruleId: rule.id,
4705
4767
  targetServiceId: service.id,
4706
4768
  targetServiceName: service.name,
4707
- targetModel: rule.targetModel || ((_d = req.body) === null || _d === void 0 ? void 0 : _d.model),
4769
+ targetModel: rule.targetModel || ((_e = req.body) === null || _e === void 0 ? void 0 : _e.model),
4708
4770
  vendorId: service.vendorId,
4709
4771
  vendorName: vendor === null || vendor === void 0 ? void 0 : vendor.name,
4710
- requestModel: (_e = req.body) === null || _e === void 0 ? void 0 : _e.model,
4772
+ requestModel: (_f = req.body) === null || _f === void 0 ? void 0 : _f.model,
4711
4773
  tags: this.buildRelayTags(relayedForLog),
4712
4774
  });
4713
4775
  }
@@ -4877,6 +4939,8 @@ class ProxyServer {
4877
4939
  const downstreamChunkCollector = new chunk_collector_1.ChunkCollectorTransform(() => {
4878
4940
  rules_status_service_1.rulesStatusBroadcaster.refreshRuleInUse(route.id, rule.id);
4879
4941
  });
4942
+ // 服务性能打点:记录首/末 SSE 事件时间
4943
+ streamTiming = new stream_timing_transform_1.StreamTimingTransform(startTime);
4880
4944
  responseHeadersForLog = this.normalizeResponseHeaders(responseHeaders);
4881
4945
  // 流式 model 回写:将上游返回的 model 改写为客户端请求时的原始模型名
4882
4946
  const originalModel = (_d = req.body) === null || _d === void 0 ? void 0 : _d.model;
@@ -4911,7 +4975,7 @@ class ProxyServer {
4911
4975
  return stages;
4912
4976
  };
4913
4977
  if (converter) {
4914
- const stages = buildStages(streamSource, parser, eventCollector, converter);
4978
+ const stages = buildStages(streamSource, parser, eventCollector, streamTiming, converter);
4915
4979
  stream_1.pipeline(...stages, (error) => {
4916
4980
  if (error) {
4917
4981
  reject(error);
@@ -4921,7 +4985,7 @@ class ProxyServer {
4921
4985
  });
4922
4986
  }
4923
4987
  else {
4924
- const stages = buildStages(streamSource, parser, eventCollector);
4988
+ const stages = buildStages(streamSource, parser, eventCollector, streamTiming);
4925
4989
  stream_1.pipeline(...stages, (error) => {
4926
4990
  if (error) {
4927
4991
  reject(error);
@@ -52,8 +52,8 @@ class RulesStatusBroadcaster extends events_1.EventEmitter {
52
52
  enumerable: true,
53
53
  configurable: true,
54
54
  writable: true,
55
- value: 10000
56
- }); // 10秒无活动后标记为空闲
55
+ value: 120000
56
+ }); // 120秒无活动后标记为空闲(兜底安全网,覆盖 thinking hold 等长静默场景)
57
57
  Object.defineProperty(this, "IDLE_DEBOUNCE_DELAY", {
58
58
  enumerable: true,
59
59
  configurable: true,
@@ -258,16 +258,31 @@ class RulesStatusBroadcaster extends events_1.EventEmitter {
258
258
  });
259
259
  }
260
260
  /**
261
- * 刷新规则使用中的不活动定时器(轻量级,仅重置定时器,不修改状态)
262
- * 用于 streaming 过程中持续保持 in_use 状态
261
+ * 刷新规则使用中的不活动定时器(轻量级,仅重置定时器,通常不修改状态)
262
+ * 用于 streaming 过程中持续保持 in_use 状态。
263
+ *
264
+ * 行为:
265
+ * - status === 'in_use':重置不活动定时器,并清除可能 pending 的 idle debounce,
266
+ * 避免已触发的 idle 经 SSE 推送出去(thinking hold 场景的关键修复)。
267
+ * - status === 'idle':说明此前已被错误判空闲,但请求仍在出流——重新标记为 in_use
268
+ * 以便经 SSE 把状态推回"使用中",实现前端自愈。
269
+ * - status === 'error' / 'suspended':早退,这两种终态有独立恢复机制,不应被流式刷新覆盖。
263
270
  */
264
271
  refreshRuleInUse(routeId, ruleId) {
265
272
  const currentStatus = this.ruleStates.get(ruleId);
266
- // 仅当状态已经是 in_use 时才刷新定时器
267
- if ((currentStatus === null || currentStatus === void 0 ? void 0 : currentStatus.status) !== 'in_use')
273
+ // 终态有独立恢复机制,刷新不应覆盖
274
+ if ((currentStatus === null || currentStatus === void 0 ? void 0 : currentStatus.status) === 'error' || (currentStatus === null || currentStatus === void 0 ? void 0 : currentStatus.status) === 'suspended') {
268
275
  return;
276
+ }
269
277
  const timeoutKey = `${routeId}:${ruleId}`;
278
+ // 已被错误判空闲:重新标记为 in_use(内部会清旧定时器/debounce 并 emit statusChanged → SSE 推回使用中)
279
+ if ((currentStatus === null || currentStatus === void 0 ? void 0 : currentStatus.status) === 'idle') {
280
+ this.markRuleInUse(routeId, ruleId);
281
+ return;
282
+ }
283
+ // in_use:重置不活动定时器,并清除 pending 的 idle debounce(阻止已触发的 idle 经 SSE 推送)
270
284
  this.clearRuleTimeout(timeoutKey);
285
+ this.clearIdleDebounce(timeoutKey);
271
286
  const timeout = setTimeout(() => {
272
287
  this.markRuleIdle(routeId, ruleId);
273
288
  }, this.INACTIVITY_TIMEOUT);