@kylindc/ccxray 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,683 @@
1
+ 'use strict';
2
+
3
+ const https = require('https');
4
+ const http = require('http');
5
+ const config = require('./config');
6
+ const store = require('./store');
7
+ const { calculateCost } = require('./pricing');
8
+ const helpers = require('./helpers');
9
+ const { broadcast, broadcastSessionStatus } = require('./sse-broadcast');
10
+ const { EventStreamDecoder } = require('./eventstream');
11
+ const sigv4 = require('./sigv4');
12
+
13
+ // ── Strip injected proxy stats from conversation history ─────────────
14
+ const STATS_PATTERN = /\n\n---\n📊 Context: .+$/s;
15
+
16
+ function stripInjectedStats(parsedBody) {
17
+ if (!parsedBody?.messages) return false;
18
+ let modified = false;
19
+ for (const msg of parsedBody.messages) {
20
+ if (msg.role !== 'assistant' || !Array.isArray(msg.content)) continue;
21
+ for (let i = msg.content.length - 1; i >= 0; i--) {
22
+ const block = msg.content[i];
23
+ if (block.type !== 'text') continue;
24
+ if (STATS_PATTERN.test(block.text)) {
25
+ block.text = block.text.replace(STATS_PATTERN, '');
26
+ if (!block.text) { msg.content.splice(i, 1); }
27
+ modified = true;
28
+ }
29
+ }
30
+ }
31
+ return modified;
32
+ }
33
+
34
+ // ── Forward request to Bedrock ────────────────────────────────────────
35
+ function forwardBedrockRequest(ctx) {
36
+ const { id, ts, startTime, parsedBody, rawBody, clientReq, clientRes, reqSessionId } = ctx;
37
+
38
+ const statsStripped = stripInjectedStats(parsedBody);
39
+ const bodyToSend = (ctx.bodyModified || statsStripped) ? Buffer.from(JSON.stringify(parsedBody)) : rawBody;
40
+
41
+ // Resolve Bedrock model ID
42
+ let bedrockModelId;
43
+ try {
44
+ bedrockModelId = config.resolveBedrockModelId(parsedBody?.model);
45
+ } catch (err) {
46
+ if (reqSessionId) {
47
+ store.activeRequests[reqSessionId] = Math.max(0, (store.activeRequests[reqSessionId] || 1) - 1);
48
+ broadcastSessionStatus(reqSessionId);
49
+ }
50
+ clientRes.writeHead(400, { 'Content-Type': 'application/json' });
51
+ clientRes.end(JSON.stringify({ error: 'bedrock_model_unknown', message: err.message }));
52
+ return;
53
+ }
54
+
55
+ const bedrockUrl = config.buildBedrockUrl(config.BEDROCK_RESOLVED_REGION, bedrockModelId, config.BEDROCK_PROFILE_ARN);
56
+ const url = new URL(bedrockUrl);
57
+
58
+ // Build translated headers: strip Anthropic-specific and auth headers
59
+ const fwdHeaders = {};
60
+ for (const [k, v] of Object.entries(ctx.fwdHeaders || {})) {
61
+ const lk = k.toLowerCase();
62
+ if (['x-api-key', 'anthropic-version', 'authorization', 'host', 'content-length'].includes(lk)) continue;
63
+ fwdHeaders[k] = v;
64
+ }
65
+ fwdHeaders['content-type'] = 'application/json';
66
+ fwdHeaders['accept'] = 'application/vnd.amazon.eventstream';
67
+ fwdHeaders['host'] = url.hostname;
68
+
69
+ // Auth: bearer token takes precedence over SigV4
70
+ if (config.BEDROCK_BEARER_TOKEN) {
71
+ fwdHeaders['authorization'] = `Bearer ${config.BEDROCK_BEARER_TOKEN}`;
72
+ } else {
73
+ const creds = config.BEDROCK_CREDENTIALS;
74
+ const signed = sigv4.sign('POST', bedrockUrl, fwdHeaders, bodyToSend, creds, config.BEDROCK_RESOLVED_REGION, 'bedrock');
75
+ fwdHeaders['authorization'] = signed.authorization;
76
+ fwdHeaders['x-amz-date'] = signed['x-amz-date'];
77
+ if (signed['x-amz-security-token']) fwdHeaders['x-amz-security-token'] = signed['x-amz-security-token'];
78
+ }
79
+ fwdHeaders['content-length'] = bodyToSend.length;
80
+
81
+ const protocol = (config.BEDROCK_TEST_PROTOCOL === 'http') ? http : https;
82
+ const proxyReq = protocol.request({
83
+ hostname: config.BEDROCK_TEST_HOST || url.hostname,
84
+ port: config.BEDROCK_TEST_HOST ? config.BEDROCK_TEST_PORT : (url.port || 443),
85
+ path: url.pathname + (url.search || ''),
86
+ method: 'POST',
87
+ headers: fwdHeaders,
88
+ }, (proxyRes) => {
89
+ // Override Content-Type so Claude Code receives standard SSE
90
+ const responseHeaders = { ...proxyRes.headers };
91
+ responseHeaders['content-type'] = 'text/event-stream';
92
+ delete responseHeaders['content-length'];
93
+ clientRes.writeHead(proxyRes.statusCode, responseHeaders);
94
+
95
+ if (proxyRes.statusCode === 200) {
96
+ handleBedrockSSEResponse(ctx, proxyRes, clientRes, bedrockModelId);
97
+ } else {
98
+ handleNonSSEResponse(ctx, proxyRes, clientRes);
99
+ }
100
+ });
101
+
102
+ proxyReq.on('error', (err) => {
103
+ console.error(`\x1b[31m❌ BEDROCK PROXY ERROR: ${err.message}\x1b[0m`);
104
+ if (reqSessionId) {
105
+ store.activeRequests[reqSessionId] = Math.max(0, (store.activeRequests[reqSessionId] || 1) - 1);
106
+ broadcastSessionStatus(reqSessionId);
107
+ }
108
+ if (!clientRes.headersSent) {
109
+ clientRes.writeHead(502, { 'Content-Type': 'application/json' });
110
+ }
111
+ clientRes.end(JSON.stringify({ error: 'proxy_error', message: err.message }));
112
+ });
113
+
114
+ proxyReq.end(bodyToSend);
115
+ }
116
+
117
+ function handleBedrockSSEResponse(ctx, proxyRes, clientRes, bedrockModelId) {
118
+ const { id, startTime, parsedBody, reqSessionId } = ctx;
119
+ const decoder = new EventStreamDecoder();
120
+ const decodedEvents = [];
121
+ let maxBlockIndex = -1;
122
+ const heldEventStrs = [];
123
+ const eventTimestamps = [];
124
+ let eventSeqIdx = 0;
125
+ let streamErrored = false;
126
+
127
+ proxyRes.on('error', (err) => {
128
+ console.error(`\x1b[31m❌ BEDROCK UPSTREAM STREAM ERROR: ${err.message}\x1b[0m`);
129
+ if (reqSessionId) {
130
+ store.activeRequests[reqSessionId] = Math.max(0, (store.activeRequests[reqSessionId] || 1) - 1);
131
+ broadcastSessionStatus(reqSessionId);
132
+ }
133
+ if (!clientRes.writableEnded) clientRes.end();
134
+ });
135
+
136
+ proxyRes.on('data', chunk => {
137
+ if (streamErrored) return;
138
+ const results = decoder.push(chunk);
139
+
140
+ for (const result of results) {
141
+ if (result.error === 'crc_mismatch') {
142
+ console.warn(`\x1b[33m⚠ BEDROCK STREAM: CRC32 mismatch, skipping frame\x1b[0m`);
143
+ continue;
144
+ }
145
+ if (result.error === 'modelStreamErrorException') {
146
+ console.error(`\x1b[31m❌ BEDROCK STREAM ERROR: ${result.message}\x1b[0m`);
147
+ streamErrored = true;
148
+ if (!clientRes.writableEnded) clientRes.end();
149
+ return;
150
+ }
151
+ if (!result.event) continue;
152
+
153
+ const evt = result.event;
154
+ eventTimestamps.push({ seqIdx: eventSeqIdx++, ts: Date.now() });
155
+ if (evt.index != null && evt.index > maxBlockIndex) maxBlockIndex = evt.index;
156
+
157
+ const sseStr = `data: ${JSON.stringify(evt)}\n\n`;
158
+
159
+ if (evt.type === 'message_delta' || evt.type === 'message_stop') {
160
+ heldEventStrs.push(sseStr);
161
+ } else {
162
+ clientRes.write(sseStr);
163
+ }
164
+ decodedEvents.push(evt);
165
+ }
166
+ });
167
+
168
+ proxyRes.on('end', () => {
169
+ if (streamErrored) return;
170
+
171
+ if (ctx.skipEntry) {
172
+ for (const held of heldEventStrs) clientRes.write(held);
173
+ if (!clientRes.writableEnded) clientRes.end();
174
+ return;
175
+ }
176
+
177
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
178
+
179
+ // Attach timestamps (same pattern as handleSSEResponse)
180
+ for (let i = 0; i < decodedEvents.length && i < eventTimestamps.length; i++) {
181
+ decodedEvents[i]._ts = eventTimestamps[i].ts;
182
+ }
183
+
184
+ const resWritePromise = config.storage.write(id, '_res.json', JSON.stringify(decodedEvents))
185
+ .catch(e => console.error('Write res.json failed:', e.message));
186
+
187
+ const usage = helpers.extractUsage(decodedEvents);
188
+ const stopReason = heldEventStrs.reduce((r, raw) => {
189
+ const m = raw.match(/^data: (.+)$/m);
190
+ if (m) try { const e = JSON.parse(m[1]); if (e.delta?.stop_reason) return e.delta.stop_reason; } catch {}
191
+ return r;
192
+ }, '');
193
+
194
+ const totalCtx = helpers.totalContextTokens(usage);
195
+ if (usage && totalCtx && stopReason !== 'tool_use') {
196
+ const maxCtx = config.getMaxContext(parsedBody?.model, parsedBody?.system);
197
+ const pct = (totalCtx / maxCtx * 100).toFixed(1);
198
+ const newIdx = maxBlockIndex + 1;
199
+ const costInfo = calculateCost(usage, parsedBody?.model);
200
+
201
+ let text = '\n\n---\n📊 Context: ' + pct + '% (' + totalCtx.toLocaleString() + ' / ' + maxCtx.toLocaleString() + ')';
202
+ text += ' | ' + totalCtx.toLocaleString() + ' in + ' + (usage.output_tokens || 0).toLocaleString() + ' out';
203
+ if (usage.cache_read_input_tokens) {
204
+ const hitRate = (usage.cache_read_input_tokens / totalCtx * 100).toFixed(0);
205
+ text += ' | Cache ' + hitRate + '% hit';
206
+ }
207
+ if (costInfo?.cost != null) {
208
+ text += ' | $' + costInfo.cost.toFixed(4);
209
+ }
210
+ if (pct >= 90) {
211
+ text += '\n⚠️ Context ' + pct + '% — consider /clear';
212
+ } else if (pct >= 70) {
213
+ text += '\n⚡ Context ' + pct + '% — getting full';
214
+ }
215
+
216
+ const sseEvent = (eventType, data) => 'event: ' + eventType + '\ndata: ' + JSON.stringify(data) + '\n\n';
217
+ clientRes.write(sseEvent('content_block_start', { type: 'content_block_start', index: newIdx, content_block: { type: 'text', text: '' } }));
218
+ clientRes.write(sseEvent('content_block_delta', { type: 'content_block_delta', index: newIdx, delta: { type: 'text_delta', text: text } }));
219
+ clientRes.write(sseEvent('content_block_stop', { type: 'content_block_stop', index: newIdx }));
220
+ }
221
+
222
+ // Inject intercept modification summary
223
+ if (ctx.bodyModified && ctx.originalBody) {
224
+ const orig = ctx.originalBody;
225
+ const mod = parsedBody;
226
+ const diffs = [];
227
+ if (orig.model !== mod.model) diffs.push('Model: ' + orig.model + ' → ' + mod.model);
228
+ const origMsgLen = (orig.messages || []).length;
229
+ const modMsgLen = (mod.messages || []).length;
230
+ if (origMsgLen !== modMsgLen) diffs.push('Messages: ' + origMsgLen + ' → ' + modMsgLen);
231
+ const msgEdits = (mod.messages || []).reduce((cnt, m, i) => {
232
+ const o = (orig.messages || [])[i];
233
+ if (!o) return cnt;
234
+ const oStr = typeof o.content === 'string' ? o.content : JSON.stringify(o.content);
235
+ const mStr = typeof m.content === 'string' ? m.content : JSON.stringify(m.content);
236
+ return oStr !== mStr ? cnt + 1 : cnt;
237
+ }, 0);
238
+ if (msgEdits > 0) diffs.push(msgEdits + ' message(s) edited');
239
+ if (diffs.length > 0) {
240
+ const interceptIdx = maxBlockIndex + (usage && totalCtx && stopReason !== 'tool_use' ? 2 : 1);
241
+ const iText = '\n\n---\n🔀 Request was modified by dashboard intercept:\n ' + diffs.join('\n ');
242
+ const sseEvt = (eventType, data) => 'event: ' + eventType + '\ndata: ' + JSON.stringify(data) + '\n\n';
243
+ clientRes.write(sseEvt('content_block_start', { type: 'content_block_start', index: interceptIdx, content_block: { type: 'text', text: '' } }));
244
+ clientRes.write(sseEvt('content_block_delta', { type: 'content_block_delta', index: interceptIdx, delta: { type: 'text_delta', text: iText } }));
245
+ clientRes.write(sseEvt('content_block_stop', { type: 'content_block_stop', index: interceptIdx }));
246
+ }
247
+ }
248
+
249
+ for (const held of heldEventStrs) clientRes.write(held);
250
+ if (!clientRes.writableEnded) clientRes.end();
251
+
252
+ if (reqSessionId) {
253
+ store.activeRequests[reqSessionId] = Math.max(0, (store.activeRequests[reqSessionId] || 1) - 1);
254
+ broadcastSessionStatus(reqSessionId);
255
+ if (store.sessionMeta[reqSessionId]) store.sessionMeta[reqSessionId].lastStopReason = stopReason || null;
256
+ }
257
+
258
+ const sessionId = reqSessionId;
259
+ const costInfo = calculateCost(usage, parsedBody?.model);
260
+ const maxContext = config.getMaxContext(parsedBody?.model, parsedBody?.system);
261
+ const title = helpers.extractResponseTitle(decodedEvents);
262
+ const thinkingDuration = helpers.computeThinkingDuration(decodedEvents);
263
+ const entry = {
264
+ id, ts: ctx.ts, sessionId, method: ctx.clientReq.method, url: ctx.clientReq.url,
265
+ req: parsedBody, res: decodedEvents,
266
+ elapsed, status: proxyRes.statusCode, isSSE: true,
267
+ tokens: helpers.tokenizeRequest(parsedBody),
268
+ usage, cost: costInfo,
269
+ maxContext,
270
+ cwd: store.sessionMeta[sessionId]?.cwd || null,
271
+ receivedAt: startTime,
272
+ thinkingDuration,
273
+ duplicateToolCalls: helpers.extractDuplicateToolCalls(parsedBody?.messages),
274
+ model: parsedBody?.model || null,
275
+ msgCount: parsedBody?.messages?.length || 0,
276
+ toolCount: parsedBody?.tools?.length || 0,
277
+ toolCalls: helpers.extractToolCalls(parsedBody?.messages),
278
+ isSubagent: !store.extractCwd(parsedBody),
279
+ title,
280
+ stopReason,
281
+ sysHash: ctx.sysHash || null,
282
+ toolsHash: ctx.toolsHash || null,
283
+ };
284
+ entry._writePromise = Promise.all([ctx.reqWritePromise, resWritePromise].filter(Boolean));
285
+ store.entries.push(entry);
286
+ store.trimEntries();
287
+ broadcast(entry);
288
+
289
+ const indexLine = JSON.stringify({
290
+ id, ts: ctx.ts, sessionId,
291
+ model: entry.model, msgCount: entry.msgCount, toolCount: entry.toolCount,
292
+ toolCalls: entry.toolCalls, isSubagent: entry.isSubagent,
293
+ cwd: entry.cwd, isSSE: true,
294
+ usage, cost: costInfo, maxContext,
295
+ stopReason, title, thinkingDuration,
296
+ elapsed, status: proxyRes.statusCode,
297
+ receivedAt: startTime,
298
+ sysHash: ctx.sysHash || null, toolsHash: ctx.toolsHash || null,
299
+ });
300
+ config.storage.appendIndex(indexLine + '\n').catch(e => console.error('Write index failed:', e.message));
301
+
302
+ entry.req = null;
303
+ entry.res = null;
304
+ entry._loaded = false;
305
+
306
+ console.log(`\x1b[32m📥 BEDROCK RESPONSE [${helpers.taipeiTime()}] (${elapsed}s) status=${proxyRes.statusCode}\x1b[0m`);
307
+ if (usage) helpers.printContextBar(usage, parsedBody?.model, parsedBody?.system);
308
+ if (costInfo?.cost != null) {
309
+ store.sessionCosts.set(sessionId, (store.sessionCosts.get(sessionId) || 0) + costInfo.cost);
310
+ console.log(` 💰 $${costInfo.cost.toFixed(4)} this turn | $${store.sessionCosts.get(sessionId).toFixed(4)} session`);
311
+ }
312
+ helpers.printSeparator();
313
+ console.log();
314
+ });
315
+ }
316
+
317
+ // ── Forward request to Anthropic ─────────────────────────────────────
318
+ function forwardRequest(ctx) {
319
+ // Route to Bedrock path when Bedrock mode is active
320
+ if (config.IS_BEDROCK_MODE) return forwardBedrockRequest(ctx);
321
+
322
+ const { id, ts, startTime, parsedBody, rawBody, clientReq, clientRes, fwdHeaders, reqSessionId } = ctx;
323
+
324
+ // Remove previously injected stats so they don't accumulate in conversation
325
+ const statsStripped = stripInjectedStats(parsedBody);
326
+ const bodyToSend = (ctx.bodyModified || statsStripped) ? Buffer.from(JSON.stringify(parsedBody)) : rawBody;
327
+
328
+ const transport = config.ANTHROPIC_PROTOCOL === 'http' ? http : https;
329
+ const proxyReq = transport.request({
330
+ hostname: config.ANTHROPIC_HOST, port: config.ANTHROPIC_PORT,
331
+ path: clientReq.url, method: clientReq.method,
332
+ headers: { ...fwdHeaders, 'content-length': bodyToSend.length },
333
+ }, (proxyRes) => {
334
+ const isSSE = (proxyRes.headers['content-type'] || '').includes('text/event-stream');
335
+
336
+ // Capture rate limit headers
337
+ const rl = proxyRes.headers;
338
+ if (rl['anthropic-ratelimit-tokens-limit']) {
339
+ store.setRateLimitState({
340
+ tokensLimit: parseInt(rl['anthropic-ratelimit-tokens-limit']) || null,
341
+ tokensRemaining: parseInt(rl['anthropic-ratelimit-tokens-remaining']) || null,
342
+ tokensReset: rl['anthropic-ratelimit-tokens-reset'] || null,
343
+ inputLimit: parseInt(rl['anthropic-ratelimit-input-tokens-limit']) || null,
344
+ inputRemaining: parseInt(rl['anthropic-ratelimit-input-tokens-remaining']) || null,
345
+ inputReset: rl['anthropic-ratelimit-input-tokens-reset'] || null,
346
+ updatedAt: Date.now(),
347
+ });
348
+ }
349
+ clientRes.writeHead(proxyRes.statusCode, proxyRes.headers);
350
+
351
+ if (isSSE) {
352
+ handleSSEResponse(ctx, proxyRes, clientRes);
353
+ } else {
354
+ handleNonSSEResponse(ctx, proxyRes, clientRes);
355
+ }
356
+ });
357
+
358
+ proxyReq.on('error', (err) => {
359
+ console.error(`\x1b[31m❌ PROXY ERROR: ${err.message}\x1b[0m`);
360
+ if (reqSessionId) {
361
+ store.activeRequests[reqSessionId] = Math.max(0, (store.activeRequests[reqSessionId] || 1) - 1);
362
+ broadcastSessionStatus(reqSessionId);
363
+ }
364
+ if (!clientRes.headersSent) {
365
+ clientRes.writeHead(502, { 'Content-Type': 'application/json' });
366
+ }
367
+ clientRes.end(JSON.stringify({ error: 'proxy_error', message: err.message }));
368
+ });
369
+
370
+ proxyReq.end(bodyToSend);
371
+ }
372
+
373
+ function handleSSEResponse(ctx, proxyRes, clientRes) {
374
+ const { id, startTime, parsedBody, reqSessionId, fwdHeaders } = ctx;
375
+ const resChunks = [];
376
+ let sseLineBuf = '';
377
+ let maxBlockIndex = -1;
378
+ const heldEvents = [];
379
+ const eventTimestamps = [];
380
+ let eventSeqIdx = 0;
381
+
382
+ proxyRes.on('error', (err) => {
383
+ console.error(`\x1b[31m❌ UPSTREAM STREAM ERROR: ${err.message}\x1b[0m`);
384
+ if (reqSessionId) {
385
+ store.activeRequests[reqSessionId] = Math.max(0, (store.activeRequests[reqSessionId] || 1) - 1);
386
+ broadcastSessionStatus(reqSessionId);
387
+ }
388
+ if (!clientRes.writableEnded) clientRes.end();
389
+ });
390
+
391
+ proxyRes.on('data', chunk => {
392
+ resChunks.push(chunk);
393
+ sseLineBuf += chunk.toString();
394
+
395
+ const parts = sseLineBuf.split('\n\n');
396
+ sseLineBuf = parts.pop();
397
+
398
+ for (const part of parts) {
399
+ if (!part.trim()) { clientRes.write('\n\n'); continue; }
400
+
401
+ const dataMatch = part.match(/^data: (.+)$/m);
402
+ if (dataMatch) {
403
+ try {
404
+ const evt = JSON.parse(dataMatch[1]);
405
+ eventTimestamps.push({ seqIdx: eventSeqIdx++, ts: Date.now() });
406
+ if (evt.index != null && evt.index > maxBlockIndex) maxBlockIndex = evt.index;
407
+ if (evt.type === 'message_delta' || evt.type === 'message_stop') {
408
+ heldEvents.push(part + '\n\n');
409
+ continue;
410
+ }
411
+ } catch {}
412
+ }
413
+
414
+ clientRes.write(part + '\n\n');
415
+ }
416
+ });
417
+
418
+ proxyRes.on('end', () => {
419
+ if (sseLineBuf.trim()) {
420
+ const dataMatch = sseLineBuf.match(/^data: (.+)$/m);
421
+ let held = false;
422
+ if (dataMatch) {
423
+ try {
424
+ const evt = JSON.parse(dataMatch[1]);
425
+ eventTimestamps.push({ seqIdx: eventSeqIdx++, ts: Date.now() });
426
+ if (evt.index != null && evt.index > maxBlockIndex) maxBlockIndex = evt.index;
427
+ if (evt.type === 'message_delta' || evt.type === 'message_stop') {
428
+ heldEvents.push(sseLineBuf + '\n\n');
429
+ held = true;
430
+ }
431
+ } catch {}
432
+ }
433
+ if (!held) clientRes.write(sseLineBuf);
434
+ }
435
+
436
+ // Quota-check: just flush held events and end — no logging, no entry
437
+ if (ctx.skipEntry) {
438
+ for (const held of heldEvents) clientRes.write(held);
439
+ clientRes.end();
440
+ return;
441
+ }
442
+
443
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
444
+ const raw = Buffer.concat(resChunks).toString();
445
+ const events = helpers.parseSSEEvents(raw);
446
+ for (let i = 0; i < events.length && i < eventTimestamps.length; i++) {
447
+ events[i]._ts = eventTimestamps[i].ts;
448
+ }
449
+ const resWritePromise = config.storage.write(id, '_res.json', JSON.stringify(events)).catch(e => console.error('Write res.json failed:', e.message));
450
+
451
+ const usage = helpers.extractUsage(events);
452
+ // Inject usage text block before message_delta/message_stop
453
+ const stopReason = heldEvents.reduce((r, raw) => {
454
+ const m = raw.match(/^data: (.+)$/m);
455
+ if (m) try { const e = JSON.parse(m[1]); if (e.delta?.stop_reason) return e.delta.stop_reason; } catch {}
456
+ return r;
457
+ }, '');
458
+ const totalCtx = helpers.totalContextTokens(usage);
459
+ if (usage && totalCtx && stopReason !== 'tool_use') {
460
+ const maxCtx = config.getMaxContext(parsedBody?.model, parsedBody?.system);
461
+ const pct = (totalCtx / maxCtx * 100).toFixed(1);
462
+ const newIdx = maxBlockIndex + 1;
463
+ const costInfo = calculateCost(usage, parsedBody?.model);
464
+
465
+ let text = '\n\n---\n📊 Context: ' + pct + '% (' + totalCtx.toLocaleString() + ' / ' + maxCtx.toLocaleString() + ')';
466
+ text += ' | ' + totalCtx.toLocaleString() + ' in + ' + (usage.output_tokens || 0).toLocaleString() + ' out';
467
+ if (usage.cache_read_input_tokens) {
468
+ const hitRate = (usage.cache_read_input_tokens / totalCtx * 100).toFixed(0);
469
+ text += ' | Cache ' + hitRate + '% hit';
470
+ }
471
+ if (costInfo?.cost != null) {
472
+ text += ' | $' + costInfo.cost.toFixed(4);
473
+ }
474
+ if (pct >= 90) {
475
+ text += '\n⚠️ Context ' + pct + '% — consider /clear';
476
+ } else if (pct >= 70) {
477
+ text += '\n⚡ Context ' + pct + '% — getting full';
478
+ }
479
+
480
+ const sseEvent = (eventType, data) => 'event: ' + eventType + '\ndata: ' + JSON.stringify(data) + '\n\n';
481
+ clientRes.write(sseEvent('content_block_start', { type: 'content_block_start', index: newIdx, content_block: { type: 'text', text: '' } }));
482
+ clientRes.write(sseEvent('content_block_delta', { type: 'content_block_delta', index: newIdx, delta: { type: 'text_delta', text: text } }));
483
+ clientRes.write(sseEvent('content_block_stop', { type: 'content_block_stop', index: newIdx }));
484
+ }
485
+
486
+ // Inject intercept modification summary
487
+ if (ctx.bodyModified && ctx.originalBody) {
488
+ const orig = ctx.originalBody;
489
+ const mod = parsedBody;
490
+ const diffs = [];
491
+ if (orig.model !== mod.model) diffs.push('Model: ' + orig.model + ' → ' + mod.model);
492
+ const origMsgLen = (orig.messages || []).length;
493
+ const modMsgLen = (mod.messages || []).length;
494
+ if (origMsgLen !== modMsgLen) diffs.push('Messages: ' + origMsgLen + ' → ' + modMsgLen);
495
+ const msgEdits = (mod.messages || []).reduce((cnt, m, i) => {
496
+ const o = (orig.messages || [])[i];
497
+ if (!o) return cnt;
498
+ const oStr = typeof o.content === 'string' ? o.content : JSON.stringify(o.content);
499
+ const mStr = typeof m.content === 'string' ? m.content : JSON.stringify(m.content);
500
+ return oStr !== mStr ? cnt + 1 : cnt;
501
+ }, 0);
502
+ if (msgEdits > 0) diffs.push(msgEdits + ' message(s) edited');
503
+ const origToolLen = (orig.tools || []).length;
504
+ const modToolLen = (mod.tools || []).length;
505
+ if (origToolLen !== modToolLen) diffs.push('Tools: ' + origToolLen + ' → ' + modToolLen + ' (' + (modToolLen - origToolLen) + ')');
506
+ const origSys = typeof orig.system === 'string' ? orig.system : JSON.stringify(orig.system);
507
+ const modSys = typeof mod.system === 'string' ? mod.system : JSON.stringify(mod.system);
508
+ if (origSys !== modSys) diffs.push('System prompt: modified');
509
+ if (diffs.length > 0) {
510
+ const interceptIdx = maxBlockIndex + (usage && totalCtx && stopReason !== 'tool_use' ? 2 : 1);
511
+ const iText = '\n\n---\n🔀 Request was modified by dashboard intercept:\n ' + diffs.join('\n ');
512
+ const sseEvt = (eventType, data) => 'event: ' + eventType + '\ndata: ' + JSON.stringify(data) + '\n\n';
513
+ clientRes.write(sseEvt('content_block_start', { type: 'content_block_start', index: interceptIdx, content_block: { type: 'text', text: '' } }));
514
+ clientRes.write(sseEvt('content_block_delta', { type: 'content_block_delta', index: interceptIdx, delta: { type: 'text_delta', text: iText } }));
515
+ clientRes.write(sseEvt('content_block_stop', { type: 'content_block_stop', index: interceptIdx }));
516
+ }
517
+ }
518
+
519
+ // Forward held events
520
+ for (const held of heldEvents) {
521
+ clientRes.write(held);
522
+ }
523
+ clientRes.end();
524
+
525
+ if (reqSessionId) {
526
+ store.activeRequests[reqSessionId] = Math.max(0, (store.activeRequests[reqSessionId] || 1) - 1);
527
+ broadcastSessionStatus(reqSessionId);
528
+ if (store.sessionMeta[reqSessionId]) store.sessionMeta[reqSessionId].lastStopReason = stopReason || null;
529
+ }
530
+
531
+ const sessionId = reqSessionId;
532
+ const costInfo = calculateCost(usage, parsedBody?.model);
533
+ const maxContext = config.getMaxContext(parsedBody?.model, parsedBody?.system);
534
+ const title = helpers.extractResponseTitle(events);
535
+ const thinkingDuration = helpers.computeThinkingDuration(events);
536
+ const entry = {
537
+ id, ts: ctx.ts, sessionId, method: ctx.clientReq.method, url: ctx.clientReq.url,
538
+ req: parsedBody, res: events,
539
+ elapsed, status: proxyRes.statusCode, isSSE: true,
540
+ tokens: helpers.tokenizeRequest(parsedBody),
541
+ usage, cost: costInfo,
542
+ maxContext,
543
+ cwd: store.sessionMeta[sessionId]?.cwd || null,
544
+ receivedAt: startTime,
545
+ thinkingDuration,
546
+ duplicateToolCalls: helpers.extractDuplicateToolCalls(parsedBody?.messages),
547
+ model: parsedBody?.model || null,
548
+ msgCount: parsedBody?.messages?.length || 0,
549
+ toolCount: parsedBody?.tools?.length || 0,
550
+ toolCalls: helpers.extractToolCalls(parsedBody?.messages),
551
+ isSubagent: !store.extractCwd(parsedBody),
552
+ title,
553
+ stopReason,
554
+ sysHash: ctx.sysHash || null,
555
+ toolsHash: ctx.toolsHash || null,
556
+ };
557
+ // Track in-flight writes so lazy-load can await them
558
+ entry._writePromise = Promise.all([ctx.reqWritePromise, resWritePromise].filter(Boolean));
559
+ store.entries.push(entry);
560
+ store.trimEntries();
561
+ broadcast(entry);
562
+
563
+ // Persist to index (fire-and-forget after broadcast)
564
+ const indexLine = JSON.stringify({
565
+ id, ts: ctx.ts, sessionId,
566
+ model: entry.model, msgCount: entry.msgCount, toolCount: entry.toolCount,
567
+ toolCalls: entry.toolCalls, isSubagent: entry.isSubagent,
568
+ cwd: entry.cwd, isSSE: true,
569
+ usage, cost: costInfo, maxContext,
570
+ stopReason, title, thinkingDuration,
571
+ elapsed, status: proxyRes.statusCode,
572
+ receivedAt: startTime,
573
+ sysHash: ctx.sysHash || null, toolsHash: ctx.toolsHash || null,
574
+ });
575
+ config.storage.appendIndex(indexLine + '\n').catch(e => console.error('Write index failed:', e.message));
576
+
577
+ // Release req/res from memory — data is on disk (or being written), lazy-load on demand
578
+ entry.req = null;
579
+ entry.res = null;
580
+ entry._loaded = false;
581
+
582
+ // Terminal summary
583
+ console.log(`\x1b[32m📥 RESPONSE [${helpers.taipeiTime()}] (${elapsed}s) status=${proxyRes.statusCode}\x1b[0m`);
584
+ if (usage) helpers.printContextBar(usage, parsedBody?.model, parsedBody?.system);
585
+ if (costInfo?.cost != null) {
586
+ store.sessionCosts.set(sessionId, (store.sessionCosts.get(sessionId) || 0) + costInfo.cost);
587
+ console.log(` 💰 $${costInfo.cost.toFixed(4)} this turn | $${store.sessionCosts.get(sessionId).toFixed(4)} session`);
588
+ }
589
+ helpers.printSeparator();
590
+ console.log();
591
+ });
592
+ }
593
+
594
+ function handleNonSSEResponse(ctx, proxyRes, clientRes) {
595
+ const { id, startTime, parsedBody, reqSessionId } = ctx;
596
+ const resChunks = [];
597
+
598
+ proxyRes.on('error', (err) => {
599
+ console.error(`\x1b[31m❌ UPSTREAM STREAM ERROR: ${err.message}\x1b[0m`);
600
+ if (reqSessionId) {
601
+ store.activeRequests[reqSessionId] = Math.max(0, (store.activeRequests[reqSessionId] || 1) - 1);
602
+ broadcastSessionStatus(reqSessionId);
603
+ }
604
+ if (!clientRes.writableEnded) clientRes.end();
605
+ });
606
+
607
+ proxyRes.on('data', chunk => {
608
+ clientRes.write(chunk);
609
+ resChunks.push(chunk);
610
+ });
611
+
612
+ proxyRes.on('end', () => {
613
+ clientRes.end();
614
+
615
+ // Quota-check: no logging, no entry
616
+ if (ctx.skipEntry) return;
617
+
618
+ if (reqSessionId) {
619
+ store.activeRequests[reqSessionId] = Math.max(0, (store.activeRequests[reqSessionId] || 1) - 1);
620
+ broadcastSessionStatus(reqSessionId);
621
+ if (store.sessionMeta[reqSessionId]) store.sessionMeta[reqSessionId].lastStopReason = null;
622
+ }
623
+
624
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
625
+ const raw = Buffer.concat(resChunks).toString();
626
+ let resData;
627
+ try { resData = JSON.parse(raw); } catch { resData = raw; }
628
+ const resWritePromise = config.storage.write(id, '_res.json', typeof resData === 'string' ? resData : JSON.stringify(resData)).catch(e => console.error('Write res.json failed:', e.message));
629
+
630
+ const sessionId = reqSessionId;
631
+ const maxContext = config.getMaxContext(parsedBody?.model, parsedBody?.system);
632
+ const title = helpers.extractResponseTitle(resData);
633
+ const stopReason = resData?.stop_reason || '';
634
+ const entry = {
635
+ id, ts: ctx.ts, sessionId, method: ctx.clientReq.method, url: ctx.clientReq.url,
636
+ req: parsedBody, res: resData,
637
+ elapsed, status: proxyRes.statusCode, isSSE: false,
638
+ tokens: helpers.tokenizeRequest(parsedBody),
639
+ usage: null, cost: null,
640
+ maxContext,
641
+ cwd: store.sessionMeta[sessionId]?.cwd || null,
642
+ receivedAt: startTime,
643
+ duplicateToolCalls: helpers.extractDuplicateToolCalls(parsedBody?.messages),
644
+ model: parsedBody?.model || null,
645
+ msgCount: parsedBody?.messages?.length || 0,
646
+ toolCount: parsedBody?.tools?.length || 0,
647
+ toolCalls: helpers.extractToolCalls(parsedBody?.messages),
648
+ isSubagent: !store.extractCwd(parsedBody),
649
+ title,
650
+ stopReason,
651
+ sysHash: ctx.sysHash || null,
652
+ toolsHash: ctx.toolsHash || null,
653
+ };
654
+ entry._writePromise = Promise.all([ctx.reqWritePromise, resWritePromise].filter(Boolean));
655
+ store.entries.push(entry);
656
+ store.trimEntries();
657
+ broadcast(entry);
658
+
659
+ const indexLine = JSON.stringify({
660
+ id, ts: ctx.ts, sessionId,
661
+ model: entry.model, msgCount: entry.msgCount, toolCount: entry.toolCount,
662
+ toolCalls: entry.toolCalls, isSubagent: entry.isSubagent,
663
+ cwd: entry.cwd, isSSE: false,
664
+ usage: null, cost: null, maxContext,
665
+ stopReason, title, thinkingDuration: null,
666
+ elapsed, status: proxyRes.statusCode,
667
+ receivedAt: startTime,
668
+ sysHash: ctx.sysHash || null, toolsHash: ctx.toolsHash || null,
669
+ });
670
+ config.storage.appendIndex(indexLine + '\n').catch(e => console.error('Write index failed:', e.message));
671
+
672
+ // Release req/res from memory — data is on disk (or being written), lazy-load on demand
673
+ entry.req = null;
674
+ entry.res = null;
675
+ entry._loaded = false;
676
+
677
+ console.log(`\x1b[32m📥 RESPONSE [${helpers.taipeiTime()}] (${elapsed}s) status=${proxyRes.statusCode}\x1b[0m`);
678
+ helpers.printSeparator();
679
+ console.log();
680
+ });
681
+ }
682
+
683
+ module.exports = { forwardRequest, forwardBedrockRequest };