claude-sdk-proxy 2.3.2 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/logger.ts +127 -8
- package/src/mcpTools.ts +3 -207
- package/src/proxy/server.ts +1001 -225
- package/src/proxy/types.ts +9 -2
- package/src/session-store.ts +198 -0
- package/src/trace.ts +633 -0
package/src/trace.ts
ADDED
|
@@ -0,0 +1,633 @@
|
|
|
1
|
+
import { logInfo, logWarn, logError, logDebug, dumpError } from "./logger"
|
|
2
|
+
|
|
3
|
+
// ── Request Trace ────────────────────────────────────────────────────────────
|
|
4
|
+
// Captures the full lifecycle of a single API request with timing milestones,
|
|
5
|
+
// so you can see exactly WHERE time was spent and WHERE failures occurred.
|
|
6
|
+
|
|
7
|
+
export type TracePhase =
|
|
8
|
+
| "received" // HTTP request received, body parsed
|
|
9
|
+
| "validated" // Request validated, model resolved
|
|
10
|
+
| "queued" // Waiting for concurrency slot
|
|
11
|
+
| "acquired" // Concurrency slot acquired
|
|
12
|
+
| "sdk_starting" // About to call SDK query()
|
|
13
|
+
| "sdk_streaming" // Receiving events from SDK
|
|
14
|
+
| "sdk_done" // SDK query() iterator finished
|
|
15
|
+
| "responding" // Building/sending HTTP response
|
|
16
|
+
| "completed" // Successfully sent response
|
|
17
|
+
| "error" // Failed at some point
|
|
18
|
+
|
|
19
|
+
export type TraceStatus = "active" | "completed" | "error"
|
|
20
|
+
|
|
21
|
+
export interface TraceError {
|
|
22
|
+
type: string // "AbortError", "sdk_error", "queue_timeout", "parse_error", etc.
|
|
23
|
+
message: string
|
|
24
|
+
stack?: string
|
|
25
|
+
phase: TracePhase // Which phase the error occurred in
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface RequestTrace {
|
|
29
|
+
reqId: string
|
|
30
|
+
startedAt: number // Date.now() when request received
|
|
31
|
+
|
|
32
|
+
// Request metadata
|
|
33
|
+
model: string // "haiku" | "sonnet" | "opus"
|
|
34
|
+
requestedModel: string // Original model string from caller (e.g. "claude-haiku-4-5")
|
|
35
|
+
stream: boolean
|
|
36
|
+
hasTools: boolean
|
|
37
|
+
thinking?: string // "enabled" | "disabled" | "adaptive"
|
|
38
|
+
promptLen: number // Character length of serialized prompt
|
|
39
|
+
systemLen: number // Character length of system prompt
|
|
40
|
+
msgCount: number // Number of messages in request
|
|
41
|
+
bodyBytes: number // Raw HTTP body size in bytes
|
|
42
|
+
|
|
43
|
+
// Client info
|
|
44
|
+
clientIp?: string
|
|
45
|
+
userAgent?: string
|
|
46
|
+
|
|
47
|
+
// Timing milestones (all Date.now() values)
|
|
48
|
+
queuedAt?: number // When we started waiting for a slot
|
|
49
|
+
acquiredAt?: number // When we got a concurrency slot
|
|
50
|
+
sdkStartedAt?: number // When query() was called
|
|
51
|
+
firstTokenAt?: number // When first content event arrived
|
|
52
|
+
sdkEndedAt?: number // When query() iterator finished
|
|
53
|
+
completedAt?: number // When HTTP response was fully sent
|
|
54
|
+
|
|
55
|
+
// Phase tracking
|
|
56
|
+
phase: TracePhase
|
|
57
|
+
status: TraceStatus
|
|
58
|
+
|
|
59
|
+
// Output metrics
|
|
60
|
+
sdkEventCount: number
|
|
61
|
+
outputLen: number // Character length of generated text
|
|
62
|
+
toolCallCount: number
|
|
63
|
+
stallCount: number // How many 15s stall intervals where stallMs > 30s occurred
|
|
64
|
+
|
|
65
|
+
// SDK event type distribution
|
|
66
|
+
eventTypes: Record<string, number> // e.g. { "content_block_delta": 500, "content_block_start": 1, ... }
|
|
67
|
+
|
|
68
|
+
// Last event tracking (for stall diagnostics)
|
|
69
|
+
lastEventAt: number // Date.now() of last SDK event
|
|
70
|
+
lastEventType?: string // Type of last SDK event received
|
|
71
|
+
|
|
72
|
+
// Termination reason (more specific than just error/completed)
|
|
73
|
+
endReason?: "completed" | "client_disconnect" | "stall_timeout" | "queue_timeout" | "sdk_error" | "abort" | "unknown"
|
|
74
|
+
|
|
75
|
+
// Error info
|
|
76
|
+
error?: TraceError
|
|
77
|
+
|
|
78
|
+
// SDK debug log path (from DEBUG_CLAUDE_AGENT_SDK)
|
|
79
|
+
sdkDebugLogPath?: string
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// ── Per-model stats ──────────────────────────────────────────────────────────
|
|
83
|
+
|
|
84
|
+
interface ModelStats {
|
|
85
|
+
total: number
|
|
86
|
+
errors: number
|
|
87
|
+
totalDurationMs: number
|
|
88
|
+
totalTimeToFirstTokenMs: number
|
|
89
|
+
firstTokenCount: number // requests where we got a first token (for avg calc)
|
|
90
|
+
maxDurationMs: number
|
|
91
|
+
lastErrorAt?: number
|
|
92
|
+
lastErrorReqId?: string
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function newModelStats(): ModelStats {
|
|
96
|
+
return { total: 0, errors: 0, totalDurationMs: 0, totalTimeToFirstTokenMs: 0, firstTokenCount: 0, maxDurationMs: 0 }
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// ── Trace Store ──────────────────────────────────────────────────────────────
|
|
100
|
+
// In-memory ring buffer of recent traces + aggregate stats.
|
|
101
|
+
|
|
102
|
+
const BUFFER_SIZE = 200
|
|
103
|
+
const ERROR_BUFFER_SIZE = 50
|
|
104
|
+
|
|
105
|
+
class TraceStore {
|
|
106
|
+
private traces: RequestTrace[] = []
|
|
107
|
+
private errorTraces: RequestTrace[] = []
|
|
108
|
+
private activeTraces = new Map<string, RequestTrace>()
|
|
109
|
+
private stats = {
|
|
110
|
+
totalRequests: 0,
|
|
111
|
+
totalErrors: 0,
|
|
112
|
+
totalDurationMs: 0,
|
|
113
|
+
startedAt: Date.now(),
|
|
114
|
+
}
|
|
115
|
+
private modelStats = new Map<string, ModelStats>()
|
|
116
|
+
|
|
117
|
+
/** Create a new trace for a request. */
|
|
118
|
+
create(init: {
|
|
119
|
+
reqId: string
|
|
120
|
+
model: string
|
|
121
|
+
requestedModel: string
|
|
122
|
+
stream: boolean
|
|
123
|
+
hasTools: boolean
|
|
124
|
+
thinking?: string
|
|
125
|
+
promptLen: number
|
|
126
|
+
systemLen: number
|
|
127
|
+
msgCount: number
|
|
128
|
+
bodyBytes: number
|
|
129
|
+
clientIp?: string
|
|
130
|
+
userAgent?: string
|
|
131
|
+
}): RequestTrace {
|
|
132
|
+
const now = Date.now()
|
|
133
|
+
const trace: RequestTrace = {
|
|
134
|
+
...init,
|
|
135
|
+
startedAt: now,
|
|
136
|
+
phase: "received",
|
|
137
|
+
status: "active",
|
|
138
|
+
sdkEventCount: 0,
|
|
139
|
+
outputLen: 0,
|
|
140
|
+
toolCallCount: 0,
|
|
141
|
+
stallCount: 0,
|
|
142
|
+
eventTypes: {},
|
|
143
|
+
lastEventAt: now,
|
|
144
|
+
}
|
|
145
|
+
this.activeTraces.set(init.reqId, trace)
|
|
146
|
+
this.stats.totalRequests++
|
|
147
|
+
|
|
148
|
+
logInfo("trace.created", {
|
|
149
|
+
reqId: init.reqId,
|
|
150
|
+
model: init.model,
|
|
151
|
+
requestedModel: init.requestedModel,
|
|
152
|
+
stream: init.stream,
|
|
153
|
+
hasTools: init.hasTools,
|
|
154
|
+
thinking: init.thinking,
|
|
155
|
+
promptLen: init.promptLen,
|
|
156
|
+
systemLen: init.systemLen,
|
|
157
|
+
msgCount: init.msgCount,
|
|
158
|
+
bodyBytes: init.bodyBytes,
|
|
159
|
+
clientIp: init.clientIp,
|
|
160
|
+
userAgent: init.userAgent,
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
return trace
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/** Update the phase of an active trace. Logs the transition. */
|
|
167
|
+
phase(reqId: string, phase: TracePhase, extra?: Record<string, unknown>) {
|
|
168
|
+
const trace = this.activeTraces.get(reqId)
|
|
169
|
+
if (!trace) return
|
|
170
|
+
|
|
171
|
+
const now = Date.now()
|
|
172
|
+
trace.phase = phase
|
|
173
|
+
|
|
174
|
+
switch (phase) {
|
|
175
|
+
case "queued":
|
|
176
|
+
trace.queuedAt = now
|
|
177
|
+
break
|
|
178
|
+
case "acquired":
|
|
179
|
+
trace.acquiredAt = now
|
|
180
|
+
break
|
|
181
|
+
case "sdk_starting":
|
|
182
|
+
trace.sdkStartedAt = now
|
|
183
|
+
break
|
|
184
|
+
case "sdk_streaming":
|
|
185
|
+
if (!trace.firstTokenAt) trace.firstTokenAt = now
|
|
186
|
+
break
|
|
187
|
+
case "sdk_done":
|
|
188
|
+
trace.sdkEndedAt = now
|
|
189
|
+
break
|
|
190
|
+
case "completed":
|
|
191
|
+
trace.completedAt = now
|
|
192
|
+
break
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
const elapsed = now - trace.startedAt
|
|
196
|
+
logDebug("trace.phase", { reqId, phase, elapsedMs: elapsed, ...extra })
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/** Record an SDK event. Tracks timing, event type distribution, and first-token detection. */
|
|
200
|
+
sdkEvent(reqId: string, eventNum: number, eventType: string, subtype?: string) {
|
|
201
|
+
const trace = this.activeTraces.get(reqId)
|
|
202
|
+
if (!trace) return
|
|
203
|
+
|
|
204
|
+
const now = Date.now()
|
|
205
|
+
trace.sdkEventCount = eventNum
|
|
206
|
+
trace.lastEventAt = now
|
|
207
|
+
|
|
208
|
+
// Track event type distribution
|
|
209
|
+
const key = subtype ?? eventType
|
|
210
|
+
trace.lastEventType = key
|
|
211
|
+
trace.eventTypes[key] = (trace.eventTypes[key] ?? 0) + 1
|
|
212
|
+
|
|
213
|
+
// Mark first content event
|
|
214
|
+
if (!trace.firstTokenAt && (subtype === "content_block_delta" || subtype === "content_block_start")) {
|
|
215
|
+
trace.firstTokenAt = now
|
|
216
|
+
const ttft = now - trace.startedAt
|
|
217
|
+
const ttftFromSdk = trace.sdkStartedAt ? now - trace.sdkStartedAt : undefined
|
|
218
|
+
|
|
219
|
+
logInfo("trace.first_token", {
|
|
220
|
+
reqId,
|
|
221
|
+
ttftMs: ttft,
|
|
222
|
+
ttftFromSdkMs: ttftFromSdk,
|
|
223
|
+
eventNum,
|
|
224
|
+
model: trace.model,
|
|
225
|
+
})
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Log first 5 events, then every 200th, plus every thinking event
|
|
229
|
+
if (eventNum <= 5 || eventNum % 200 === 0 || subtype === "thinking") {
|
|
230
|
+
logDebug("trace.sdk_event", {
|
|
231
|
+
reqId,
|
|
232
|
+
n: eventNum,
|
|
233
|
+
type: eventType,
|
|
234
|
+
subtype,
|
|
235
|
+
elapsedMs: now - trace.startedAt,
|
|
236
|
+
outputLen: trace.outputLen,
|
|
237
|
+
})
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/** Record a stall check (called every 15s). Only warns if idle > 30s. */
|
|
242
|
+
stall(reqId: string, stallMs: number) {
|
|
243
|
+
const trace = this.activeTraces.get(reqId)
|
|
244
|
+
if (!trace) return
|
|
245
|
+
|
|
246
|
+
// Only count meaningful stalls (>30s idle)
|
|
247
|
+
if (stallMs < 30_000) {
|
|
248
|
+
// Short gap — debug log only, not a real stall
|
|
249
|
+
logDebug("trace.stall_check", {
|
|
250
|
+
reqId,
|
|
251
|
+
stallMs,
|
|
252
|
+
sdkEventCount: trace.sdkEventCount,
|
|
253
|
+
phase: trace.phase,
|
|
254
|
+
})
|
|
255
|
+
return
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
trace.stallCount++
|
|
259
|
+
const level = stallMs > 60_000 ? "error" : "warn"
|
|
260
|
+
const log = level === "error" ? logError : logWarn
|
|
261
|
+
|
|
262
|
+
log("trace.stall", {
|
|
263
|
+
reqId,
|
|
264
|
+
stallMs,
|
|
265
|
+
stallCount: trace.stallCount,
|
|
266
|
+
sdkEventCount: trace.sdkEventCount,
|
|
267
|
+
outputLen: trace.outputLen,
|
|
268
|
+
elapsedMs: Date.now() - trace.startedAt,
|
|
269
|
+
phase: trace.phase,
|
|
270
|
+
model: trace.model,
|
|
271
|
+
lastEventType: trace.lastEventType,
|
|
272
|
+
eventTypes: trace.eventTypes,
|
|
273
|
+
})
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/** Mark a trace as successfully completed. */
|
|
277
|
+
complete(reqId: string, extra?: { outputLen?: number; toolCallCount?: number }) {
|
|
278
|
+
const trace = this.activeTraces.get(reqId)
|
|
279
|
+
if (!trace) return
|
|
280
|
+
|
|
281
|
+
const now = Date.now()
|
|
282
|
+
trace.completedAt = now
|
|
283
|
+
trace.phase = "completed"
|
|
284
|
+
trace.status = "completed"
|
|
285
|
+
trace.endReason = "completed"
|
|
286
|
+
if (extra?.outputLen !== undefined) trace.outputLen = extra.outputLen
|
|
287
|
+
if (extra?.toolCallCount !== undefined) trace.toolCallCount = extra.toolCallCount
|
|
288
|
+
|
|
289
|
+
const duration = now - trace.startedAt
|
|
290
|
+
const timings = this.computeTimings(trace)
|
|
291
|
+
|
|
292
|
+
// Compute throughput (chars/sec) over the streaming period
|
|
293
|
+
const streamDuration = trace.sdkStartedAt ? now - trace.sdkStartedAt : duration
|
|
294
|
+
const charsPerSec = streamDuration > 0 ? Math.round((trace.outputLen / streamDuration) * 1000) : 0
|
|
295
|
+
const eventsPerSec = streamDuration > 0 ? Math.round((trace.sdkEventCount / streamDuration) * 1000) : 0
|
|
296
|
+
|
|
297
|
+
logInfo("trace.completed", {
|
|
298
|
+
reqId,
|
|
299
|
+
model: trace.model,
|
|
300
|
+
requestedModel: trace.requestedModel,
|
|
301
|
+
durationMs: duration,
|
|
302
|
+
...timings,
|
|
303
|
+
sdkEventCount: trace.sdkEventCount,
|
|
304
|
+
outputLen: trace.outputLen,
|
|
305
|
+
toolCallCount: trace.toolCallCount,
|
|
306
|
+
stallCount: trace.stallCount,
|
|
307
|
+
charsPerSec,
|
|
308
|
+
eventsPerSec,
|
|
309
|
+
eventTypes: trace.eventTypes,
|
|
310
|
+
})
|
|
311
|
+
|
|
312
|
+
// Update stats
|
|
313
|
+
this.stats.totalDurationMs += duration
|
|
314
|
+
const ms = this.getModelStats(trace.model)
|
|
315
|
+
ms.total++
|
|
316
|
+
ms.totalDurationMs += duration
|
|
317
|
+
if (ms.maxDurationMs < duration) ms.maxDurationMs = duration
|
|
318
|
+
if (timings.ttftMs !== undefined) {
|
|
319
|
+
ms.totalTimeToFirstTokenMs += timings.ttftMs
|
|
320
|
+
ms.firstTokenCount++
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
this.archive(trace)
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
/** Mark a trace as failed. Dumps error context to file. */
|
|
327
|
+
fail(reqId: string, error: Error | string, phase?: TracePhase, extra?: Record<string, unknown>) {
|
|
328
|
+
const trace = this.activeTraces.get(reqId)
|
|
329
|
+
if (!trace) {
|
|
330
|
+
// No trace found — log the error anyway
|
|
331
|
+
logError("trace.fail.no_trace", { reqId, error: String(error), phase })
|
|
332
|
+
return
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
const now = Date.now()
|
|
336
|
+
trace.completedAt = now
|
|
337
|
+
trace.phase = phase ?? trace.phase
|
|
338
|
+
trace.status = "error"
|
|
339
|
+
|
|
340
|
+
const err = error instanceof Error ? error : new Error(String(error))
|
|
341
|
+
const errorType = classifyError(err)
|
|
342
|
+
|
|
343
|
+
// Determine specific end reason
|
|
344
|
+
trace.endReason = extra?.clientDisconnect ? "client_disconnect"
|
|
345
|
+
: errorType === "stall_timeout" ? "stall_timeout"
|
|
346
|
+
: errorType === "queue_timeout" ? "queue_timeout"
|
|
347
|
+
: errorType === "timeout" ? "stall_timeout"
|
|
348
|
+
: err.name === "AbortError" ? "abort"
|
|
349
|
+
: "sdk_error"
|
|
350
|
+
|
|
351
|
+
trace.error = {
|
|
352
|
+
type: errorType,
|
|
353
|
+
message: err.message,
|
|
354
|
+
stack: err.stack,
|
|
355
|
+
phase: trace.phase,
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
const duration = now - trace.startedAt
|
|
359
|
+
const timings = this.computeTimings(trace)
|
|
360
|
+
const timeSinceLastEvent = now - trace.lastEventAt
|
|
361
|
+
|
|
362
|
+
// Compute throughput (chars/sec) over the streaming period
|
|
363
|
+
const streamDuration = trace.sdkStartedAt ? now - trace.sdkStartedAt : duration
|
|
364
|
+
const charsPerSec = streamDuration > 0 ? Math.round((trace.outputLen / streamDuration) * 1000) : 0
|
|
365
|
+
|
|
366
|
+
logError("trace.failed", {
|
|
367
|
+
reqId,
|
|
368
|
+
model: trace.model,
|
|
369
|
+
requestedModel: trace.requestedModel,
|
|
370
|
+
endReason: trace.endReason,
|
|
371
|
+
errorType,
|
|
372
|
+
error: err.message,
|
|
373
|
+
phase: trace.phase,
|
|
374
|
+
durationMs: duration,
|
|
375
|
+
...timings,
|
|
376
|
+
sdkEventCount: trace.sdkEventCount,
|
|
377
|
+
outputLen: trace.outputLen,
|
|
378
|
+
stallCount: trace.stallCount,
|
|
379
|
+
charsPerSec,
|
|
380
|
+
timeSinceLastEventMs: timeSinceLastEvent,
|
|
381
|
+
lastEventType: trace.lastEventType,
|
|
382
|
+
eventTypes: trace.eventTypes,
|
|
383
|
+
...extra,
|
|
384
|
+
})
|
|
385
|
+
|
|
386
|
+
// Update stats
|
|
387
|
+
this.stats.totalErrors++
|
|
388
|
+
this.stats.totalDurationMs += duration
|
|
389
|
+
const ms = this.getModelStats(trace.model)
|
|
390
|
+
ms.total++
|
|
391
|
+
ms.errors++
|
|
392
|
+
ms.totalDurationMs += duration
|
|
393
|
+
if (ms.maxDurationMs < duration) ms.maxDurationMs = duration
|
|
394
|
+
ms.lastErrorAt = now
|
|
395
|
+
ms.lastErrorReqId = reqId
|
|
396
|
+
if (timings.ttftMs !== undefined) {
|
|
397
|
+
ms.totalTimeToFirstTokenMs += timings.ttftMs
|
|
398
|
+
ms.firstTokenCount++
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
// Dump full error context to file
|
|
402
|
+
const dumpPath = dumpError(reqId, {
|
|
403
|
+
trace: this.serializeTrace(trace),
|
|
404
|
+
error: { type: errorType, message: err.message, stack: err.stack, phase: trace.phase },
|
|
405
|
+
endReason: trace.endReason,
|
|
406
|
+
timeSinceLastEventMs: timeSinceLastEvent,
|
|
407
|
+
lastEventType: trace.lastEventType,
|
|
408
|
+
eventTypes: trace.eventTypes,
|
|
409
|
+
charsPerSec,
|
|
410
|
+
...extra,
|
|
411
|
+
})
|
|
412
|
+
logInfo("trace.error_dumped", { reqId, path: dumpPath })
|
|
413
|
+
|
|
414
|
+
// Store in error buffer
|
|
415
|
+
this.errorTraces.push({ ...trace })
|
|
416
|
+
if (this.errorTraces.length > ERROR_BUFFER_SIZE) {
|
|
417
|
+
this.errorTraces.shift()
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
this.archive(trace)
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
/** Update output length on a live trace (during streaming). */
|
|
424
|
+
updateOutput(reqId: string, outputLen: number) {
|
|
425
|
+
const trace = this.activeTraces.get(reqId)
|
|
426
|
+
if (trace) trace.outputLen = outputLen
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
/** Set the SDK debug log path for a trace. */
|
|
430
|
+
setSdkDebugLog(reqId: string, path: string) {
|
|
431
|
+
const trace = this.activeTraces.get(reqId)
|
|
432
|
+
if (trace) trace.sdkDebugLogPath = path
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// ── Query methods (for debug endpoints) ──────────────────────────────────
|
|
436
|
+
|
|
437
|
+
/** Get aggregate stats. */
|
|
438
|
+
getStats() {
|
|
439
|
+
const now = Date.now()
|
|
440
|
+
const uptimeMs = now - this.stats.startedAt
|
|
441
|
+
const avgDurationMs = this.stats.totalRequests > 0
|
|
442
|
+
? Math.round(this.stats.totalDurationMs / this.stats.totalRequests)
|
|
443
|
+
: 0
|
|
444
|
+
|
|
445
|
+
const byModel: Record<string, {
|
|
446
|
+
total: number
|
|
447
|
+
errors: number
|
|
448
|
+
avgDurationMs: number
|
|
449
|
+
avgTtftMs: number
|
|
450
|
+
maxDurationMs: number
|
|
451
|
+
lastErrorAt?: string
|
|
452
|
+
lastErrorReqId?: string
|
|
453
|
+
}> = {}
|
|
454
|
+
for (const [model, ms] of this.modelStats) {
|
|
455
|
+
byModel[model] = {
|
|
456
|
+
total: ms.total,
|
|
457
|
+
errors: ms.errors,
|
|
458
|
+
avgDurationMs: ms.total > 0 ? Math.round(ms.totalDurationMs / ms.total) : 0,
|
|
459
|
+
avgTtftMs: ms.firstTokenCount > 0 ? Math.round(ms.totalTimeToFirstTokenMs / ms.firstTokenCount) : 0,
|
|
460
|
+
maxDurationMs: ms.maxDurationMs,
|
|
461
|
+
...(ms.lastErrorAt ? { lastErrorAt: new Date(ms.lastErrorAt).toISOString() } : {}),
|
|
462
|
+
...(ms.lastErrorReqId ? { lastErrorReqId: ms.lastErrorReqId } : {}),
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
return {
|
|
467
|
+
uptimeMs,
|
|
468
|
+
uptimeHuman: humanDuration(uptimeMs),
|
|
469
|
+
requests: {
|
|
470
|
+
total: this.stats.totalRequests,
|
|
471
|
+
errors: this.stats.totalErrors,
|
|
472
|
+
active: this.activeTraces.size,
|
|
473
|
+
avgDurationMs,
|
|
474
|
+
errorRate: this.stats.totalRequests > 0
|
|
475
|
+
? `${((this.stats.totalErrors / this.stats.totalRequests) * 100).toFixed(1)}%`
|
|
476
|
+
: "0%",
|
|
477
|
+
},
|
|
478
|
+
byModel,
|
|
479
|
+
activeRequests: Array.from(this.activeTraces.values()).map(t => ({
|
|
480
|
+
reqId: t.reqId,
|
|
481
|
+
model: t.model,
|
|
482
|
+
requestedModel: t.requestedModel,
|
|
483
|
+
phase: t.phase,
|
|
484
|
+
stream: t.stream,
|
|
485
|
+
hasTools: t.hasTools,
|
|
486
|
+
thinking: t.thinking,
|
|
487
|
+
elapsedMs: now - t.startedAt,
|
|
488
|
+
timeSinceLastEventMs: now - t.lastEventAt,
|
|
489
|
+
lastEventType: t.lastEventType,
|
|
490
|
+
sdkEventCount: t.sdkEventCount,
|
|
491
|
+
outputLen: t.outputLen,
|
|
492
|
+
stallCount: t.stallCount,
|
|
493
|
+
promptLen: t.promptLen,
|
|
494
|
+
systemLen: t.systemLen,
|
|
495
|
+
bodyBytes: t.bodyBytes,
|
|
496
|
+
clientIp: t.clientIp,
|
|
497
|
+
})),
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
/** Get recent traces (most recent first). */
|
|
502
|
+
getRecentTraces(limit = 20): ReturnType<typeof this.serializeTrace>[] {
|
|
503
|
+
return this.traces.slice(-limit).reverse().map(t => this.serializeTrace(t))
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
/** Get a specific trace by reqId. */
|
|
507
|
+
getTrace(reqId: string): ReturnType<typeof this.serializeTrace> | null {
|
|
508
|
+
// Check active first
|
|
509
|
+
const active = this.activeTraces.get(reqId)
|
|
510
|
+
if (active) return this.serializeTrace(active)
|
|
511
|
+
// Check buffer
|
|
512
|
+
const archived = this.traces.find(t => t.reqId === reqId)
|
|
513
|
+
if (archived) return this.serializeTrace(archived)
|
|
514
|
+
// Check error buffer
|
|
515
|
+
const err = this.errorTraces.find(t => t.reqId === reqId)
|
|
516
|
+
if (err) return this.serializeTrace(err)
|
|
517
|
+
return null
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
/** Get recent error traces. */
|
|
521
|
+
getRecentErrors(limit = 10): ReturnType<typeof this.serializeTrace>[] {
|
|
522
|
+
return this.errorTraces.slice(-limit).reverse().map(t => this.serializeTrace(t))
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
// ── Internal ─────────────────────────────────────────────────────────────
|
|
526
|
+
|
|
527
|
+
private archive(trace: RequestTrace) {
|
|
528
|
+
this.activeTraces.delete(trace.reqId)
|
|
529
|
+
this.traces.push(trace)
|
|
530
|
+
if (this.traces.length > BUFFER_SIZE) {
|
|
531
|
+
this.traces.shift()
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
private getModelStats(model: string): ModelStats {
|
|
536
|
+
let ms = this.modelStats.get(model)
|
|
537
|
+
if (!ms) {
|
|
538
|
+
ms = newModelStats()
|
|
539
|
+
this.modelStats.set(model, ms)
|
|
540
|
+
}
|
|
541
|
+
return ms
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
private computeTimings(trace: RequestTrace) {
|
|
545
|
+
const result: Record<string, number | undefined> = {}
|
|
546
|
+
if (trace.queuedAt && trace.acquiredAt) {
|
|
547
|
+
result.queueWaitMs = trace.acquiredAt - trace.queuedAt
|
|
548
|
+
}
|
|
549
|
+
if (trace.firstTokenAt) {
|
|
550
|
+
result.ttftMs = trace.firstTokenAt - trace.startedAt
|
|
551
|
+
if (trace.sdkStartedAt) {
|
|
552
|
+
result.ttftFromSdkMs = trace.firstTokenAt - trace.sdkStartedAt
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
if (trace.sdkStartedAt && trace.sdkEndedAt) {
|
|
556
|
+
result.sdkDurationMs = trace.sdkEndedAt - trace.sdkStartedAt
|
|
557
|
+
}
|
|
558
|
+
if (trace.completedAt) {
|
|
559
|
+
result.totalDurationMs = trace.completedAt - trace.startedAt
|
|
560
|
+
}
|
|
561
|
+
return result
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
private serializeTrace(trace: RequestTrace) {
|
|
565
|
+
const now = Date.now()
|
|
566
|
+
const timings = this.computeTimings(trace)
|
|
567
|
+
const duration = (trace.completedAt ?? now) - trace.startedAt
|
|
568
|
+
const streamDuration = trace.sdkStartedAt ? (trace.completedAt ?? now) - trace.sdkStartedAt : duration
|
|
569
|
+
const charsPerSec = streamDuration > 0 ? Math.round((trace.outputLen / streamDuration) * 1000) : 0
|
|
570
|
+
const timeSinceLastEvent = now - trace.lastEventAt
|
|
571
|
+
|
|
572
|
+
return {
|
|
573
|
+
reqId: trace.reqId,
|
|
574
|
+
model: trace.model,
|
|
575
|
+
requestedModel: trace.requestedModel,
|
|
576
|
+
stream: trace.stream,
|
|
577
|
+
hasTools: trace.hasTools,
|
|
578
|
+
thinking: trace.thinking,
|
|
579
|
+
promptLen: trace.promptLen,
|
|
580
|
+
systemLen: trace.systemLen,
|
|
581
|
+
msgCount: trace.msgCount,
|
|
582
|
+
bodyBytes: trace.bodyBytes,
|
|
583
|
+
clientIp: trace.clientIp,
|
|
584
|
+
phase: trace.phase,
|
|
585
|
+
status: trace.status,
|
|
586
|
+
endReason: trace.endReason,
|
|
587
|
+
sdkEventCount: trace.sdkEventCount,
|
|
588
|
+
outputLen: trace.outputLen,
|
|
589
|
+
toolCallCount: trace.toolCallCount,
|
|
590
|
+
stallCount: trace.stallCount,
|
|
591
|
+
charsPerSec,
|
|
592
|
+
eventTypes: trace.eventTypes,
|
|
593
|
+
lastEventType: trace.lastEventType,
|
|
594
|
+
startedAt: new Date(trace.startedAt).toISOString(),
|
|
595
|
+
...(trace.completedAt
|
|
596
|
+
? { completedAt: new Date(trace.completedAt).toISOString() }
|
|
597
|
+
: { elapsedMs: now - trace.startedAt, timeSinceLastEventMs: timeSinceLastEvent }),
|
|
598
|
+
...timings,
|
|
599
|
+
...(trace.error ? { error: trace.error } : {}),
|
|
600
|
+
...(trace.sdkDebugLogPath ? { sdkDebugLogPath: trace.sdkDebugLogPath } : {}),
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
// ── Error classification ─────────────────────────────────────────────────────
|
|
606
|
+
|
|
607
|
+
export function classifyError(err: Error): string {
|
|
608
|
+
if (err.name === "AbortError" || err.message?.includes("aborted")) return "stall_timeout"
|
|
609
|
+
if (err.message.includes("Queue timeout")) return "queue_timeout"
|
|
610
|
+
if (err.message.includes("client disconnect") || err.message.includes("cancel")) return "client_disconnect"
|
|
611
|
+
if (err.message.includes("process aborted")) return "sdk_aborted"
|
|
612
|
+
if (err.message.includes("SIGTERM") || err.message.includes("SIGKILL")) return "sdk_killed"
|
|
613
|
+
if (err.message.includes("spawn") || err.message.includes("ENOENT")) return "sdk_spawn_error"
|
|
614
|
+
if (err.message.includes("JSON")) return "parse_error"
|
|
615
|
+
if (err.message.includes("ECONNREFUSED") || err.message.includes("ECONNRESET")) return "connection_error"
|
|
616
|
+
if (err.message.includes("EPIPE") || err.message.includes("broken pipe")) return "broken_pipe"
|
|
617
|
+
if (err.message.includes("memory") || err.message.includes("OOM")) return "oom_error"
|
|
618
|
+
if (err.message.includes("rate limit") || err.message.includes("429")) return "rate_limit"
|
|
619
|
+
return "unknown_error"
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
function humanDuration(ms: number): string {
|
|
623
|
+
if (ms < 1000) return `${ms}ms`
|
|
624
|
+
if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`
|
|
625
|
+
if (ms < 3600000) return `${Math.floor(ms / 60000)}m ${Math.floor((ms % 60000) / 1000)}s`
|
|
626
|
+
const h = Math.floor(ms / 3600000)
|
|
627
|
+
const m = Math.floor((ms % 3600000) / 60000)
|
|
628
|
+
return `${h}h ${m}m`
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// ── Singleton ────────────────────────────────────────────────────────────────
|
|
632
|
+
|
|
633
|
+
export const traceStore = new TraceStore()
|