polydev-ai 1.9.40 → 1.9.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cliManager.js +129 -9
- package/lib/tunnelClient.js +321 -1
- package/package.json +1 -1
package/lib/cliManager.js
CHANGED
|
@@ -578,6 +578,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
|
|
|
578
578
|
success: true,
|
|
579
579
|
content,
|
|
580
580
|
tokens_used: this.estimateTokens(prompt + content),
|
|
581
|
+
input_tokens: Math.ceil(prompt.length / 4),
|
|
582
|
+
output_tokens: Math.ceil(content.length / 4),
|
|
581
583
|
latency_ms: Date.now() - startTime,
|
|
582
584
|
provider: providerId,
|
|
583
585
|
mode: 'args',
|
|
@@ -689,6 +691,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
|
|
|
689
691
|
success: true,
|
|
690
692
|
content: jsonResult.content,
|
|
691
693
|
tokens_used: jsonResult.tokens_used,
|
|
694
|
+
input_tokens: jsonResult.input_tokens,
|
|
695
|
+
output_tokens: jsonResult.output_tokens,
|
|
692
696
|
latency_ms: Date.now() - startTime,
|
|
693
697
|
provider: providerId,
|
|
694
698
|
mode: 'args',
|
|
@@ -715,6 +719,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
|
|
|
715
719
|
success: true,
|
|
716
720
|
content,
|
|
717
721
|
tokens_used: this.estimateTokens(prompt + content),
|
|
722
|
+
input_tokens: Math.ceil(prompt.length / 4),
|
|
723
|
+
output_tokens: Math.ceil(content.length / 4),
|
|
718
724
|
latency_ms: Date.now() - startTime,
|
|
719
725
|
provider: providerId,
|
|
720
726
|
mode: 'args',
|
|
@@ -771,6 +777,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
|
|
|
771
777
|
success: true,
|
|
772
778
|
content,
|
|
773
779
|
tokens_used: this.estimateTokens(prompt + content),
|
|
780
|
+
input_tokens: Math.ceil(prompt.length / 4),
|
|
781
|
+
output_tokens: Math.ceil(content.length / 4),
|
|
774
782
|
latency_ms: Date.now() - startTime,
|
|
775
783
|
provider: providerId,
|
|
776
784
|
mode: 'args',
|
|
@@ -1038,17 +1046,13 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
|
|
|
1038
1046
|
const content = json.result;
|
|
1039
1047
|
|
|
1040
1048
|
// Extract primary model from modelUsage
|
|
1041
|
-
// The primary model is the one with highest cost - that's the user's configured main model
|
|
1042
|
-
// (Haiku is used internally for quick tasks, but the expensive model is what the user chose)
|
|
1043
1049
|
let primaryModel = CLI_DEFAULT_MODELS['claude_code'] || 'cli_default';
|
|
1044
1050
|
const modelUsage = json.modelUsage || {};
|
|
1045
1051
|
const modelNames = Object.keys(modelUsage);
|
|
1046
1052
|
|
|
1047
1053
|
if (modelNames.length === 1) {
|
|
1048
|
-
// Only one model used - that's the primary
|
|
1049
1054
|
primaryModel = modelNames[0];
|
|
1050
1055
|
} else if (modelNames.length > 1) {
|
|
1051
|
-
// Multiple models - the one with highest cost is the user's configured main model
|
|
1052
1056
|
let highestCost = -1;
|
|
1053
1057
|
for (const [modelName, usage] of Object.entries(modelUsage)) {
|
|
1054
1058
|
const cost = usage.costUSD || 0;
|
|
@@ -1059,13 +1063,15 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
|
|
|
1059
1063
|
}
|
|
1060
1064
|
}
|
|
1061
1065
|
|
|
1062
|
-
// Calculate
|
|
1066
|
+
// Calculate separate input and output tokens
|
|
1067
|
+
let totalInputTokens = 0;
|
|
1068
|
+
let totalOutputTokens = 0;
|
|
1063
1069
|
let totalTokens = 0;
|
|
1064
1070
|
let cacheTokens = 0;
|
|
1065
1071
|
for (const usage of Object.values(modelUsage)) {
|
|
1066
|
-
|
|
1072
|
+
totalInputTokens += (usage.inputTokens || 0);
|
|
1073
|
+
totalOutputTokens += (usage.outputTokens || 0);
|
|
1067
1074
|
totalTokens += (usage.inputTokens || 0) + (usage.outputTokens || 0);
|
|
1068
|
-
// Track cache tokens separately (for cost calculations, but not displayed as "tokens used")
|
|
1069
1075
|
cacheTokens += (usage.cacheReadInputTokens || 0) + (usage.cacheCreationInputTokens || 0);
|
|
1070
1076
|
}
|
|
1071
1077
|
|
|
@@ -1073,14 +1079,15 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
|
|
|
1073
1079
|
content,
|
|
1074
1080
|
model_used: primaryModel,
|
|
1075
1081
|
tokens_used: totalTokens || json.usage?.input_tokens + json.usage?.output_tokens || 0,
|
|
1076
|
-
|
|
1082
|
+
input_tokens: totalInputTokens,
|
|
1083
|
+
output_tokens: totalOutputTokens,
|
|
1084
|
+
cache_tokens: cacheTokens,
|
|
1077
1085
|
cost_usd: json.total_cost_usd || 0,
|
|
1078
1086
|
model_usage: modelUsage,
|
|
1079
1087
|
session_id: json.session_id,
|
|
1080
1088
|
duration_ms: json.duration_ms
|
|
1081
1089
|
};
|
|
1082
1090
|
} catch (e) {
|
|
1083
|
-
// Not valid JSON, return null to fall back to text parsing
|
|
1084
1091
|
return null;
|
|
1085
1092
|
}
|
|
1086
1093
|
}
|
|
@@ -1374,6 +1381,119 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
|
|
|
1374
1381
|
return this.providers.get(providerId);
|
|
1375
1382
|
}
|
|
1376
1383
|
|
|
1384
|
+
// ============================================
|
|
1385
|
+
// Streaming CLI Methods (CLI-as-API real streaming)
|
|
1386
|
+
// ============================================
|
|
1387
|
+
|
|
1388
|
+
/**
|
|
1389
|
+
* Check if a provider supports streaming output
|
|
1390
|
+
*/
|
|
1391
|
+
supportsStreaming(providerId) {
|
|
1392
|
+
return providerId === 'claude_code' || providerId === 'gemini_cli';
|
|
1393
|
+
}
|
|
1394
|
+
|
|
1395
|
+
/**
|
|
1396
|
+
* Build CLI args for streaming mode (NDJSON output)
|
|
1397
|
+
* @param {string} providerId - 'claude_code' or 'gemini_cli'
|
|
1398
|
+
* @param {string} prompt - The user prompt
|
|
1399
|
+
* @param {string|null} model - Optional model name
|
|
1400
|
+
* @returns {string[]} CLI arguments array
|
|
1401
|
+
*/
|
|
1402
|
+
buildStreamingArgs(providerId, prompt, model = null) {
|
|
1403
|
+
if (providerId === 'claude_code') {
|
|
1404
|
+
const args = ['-p', '--output-format', 'stream-json', '--verbose', '--include-partial-messages'];
|
|
1405
|
+
|
|
1406
|
+
// Add model flag if specified
|
|
1407
|
+
if (model) {
|
|
1408
|
+
// Normalize model names to Claude CLI aliases
|
|
1409
|
+
const claudeModelMap = {
|
|
1410
|
+
'claude-opus-4-5': 'opus',
|
|
1411
|
+
'claude-opus-4.5': 'opus',
|
|
1412
|
+
'claude-4.5-opus': 'opus',
|
|
1413
|
+
'claude-opus-4-5-20250514': 'opus',
|
|
1414
|
+
'claude-sonnet-4-5': 'sonnet',
|
|
1415
|
+
'claude-sonnet-4.5': 'sonnet',
|
|
1416
|
+
'claude-4.5-sonnet': 'sonnet',
|
|
1417
|
+
'claude-sonnet-4-5-20250514': 'sonnet',
|
|
1418
|
+
'claude-3-5-sonnet': 'sonnet',
|
|
1419
|
+
'claude-3-5-haiku': 'haiku',
|
|
1420
|
+
'claude-haiku-3-5': 'haiku',
|
|
1421
|
+
};
|
|
1422
|
+
const cliModel = claudeModelMap[model.toLowerCase()] || model;
|
|
1423
|
+
args.unshift('--model', cliModel);
|
|
1424
|
+
}
|
|
1425
|
+
|
|
1426
|
+
args.push(prompt);
|
|
1427
|
+
return args;
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
if (providerId === 'gemini_cli') {
|
|
1431
|
+
const args = ['-o', 'stream-json'];
|
|
1432
|
+
|
|
1433
|
+
// Add model flag if specified
|
|
1434
|
+
if (model) {
|
|
1435
|
+
args.push('-m', model);
|
|
1436
|
+
}
|
|
1437
|
+
|
|
1438
|
+
// Add prompt prefix to prevent tool planning in non-interactive mode
|
|
1439
|
+
const geminiPrompt = `Answer directly without using any tools, file operations, or searches. Do not say "I will search" or "I will look up". Provide your analysis immediately.\n\n${prompt}`;
|
|
1440
|
+
args.push('-p', geminiPrompt);
|
|
1441
|
+
return args;
|
|
1442
|
+
}
|
|
1443
|
+
|
|
1444
|
+
throw new Error(`Provider ${providerId} does not support streaming`);
|
|
1445
|
+
}
|
|
1446
|
+
|
|
1447
|
+
/**
|
|
1448
|
+
* Spawn a CLI process in streaming mode.
|
|
1449
|
+
* Returns the raw child process so the caller can read stdout line-by-line.
|
|
1450
|
+
*
|
|
1451
|
+
* @param {string} providerId - 'claude_code' or 'gemini_cli'
|
|
1452
|
+
* @param {string} prompt - The user prompt
|
|
1453
|
+
* @param {string|null} model - Optional model name
|
|
1454
|
+
* @returns {import('child_process').ChildProcess} The spawned process
|
|
1455
|
+
*/
|
|
1456
|
+
spawnStreamingCli(providerId, prompt, model = null) {
|
|
1457
|
+
const provider = this.providers.get(providerId);
|
|
1458
|
+
if (!provider) {
|
|
1459
|
+
throw new Error(`Unknown provider: ${providerId}`);
|
|
1460
|
+
}
|
|
1461
|
+
|
|
1462
|
+
const args = this.buildStreamingArgs(providerId, prompt, model);
|
|
1463
|
+
|
|
1464
|
+
console.log(`[Polydev CLI] Spawning streaming ${providerId}: ${provider.command} ${args.slice(0, 4).join(' ')}...`);
|
|
1465
|
+
|
|
1466
|
+
const child = spawn(provider.command, args, {
|
|
1467
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
1468
|
+
shell: process.platform === 'win32',
|
|
1469
|
+
env: {
|
|
1470
|
+
...process.env,
|
|
1471
|
+
HOME: process.env.HOME || os.homedir(),
|
|
1472
|
+
CI: '1',
|
|
1473
|
+
NO_COLOR: '1',
|
|
1474
|
+
TERM: 'dumb',
|
|
1475
|
+
NONINTERACTIVE: '1',
|
|
1476
|
+
NO_BROWSER: '1',
|
|
1477
|
+
BROWSER: 'echo',
|
|
1478
|
+
DISPLAY: '',
|
|
1479
|
+
HEADLESS: '1',
|
|
1480
|
+
CODEX_DISABLE_UPDATE_CHECK: '1',
|
|
1481
|
+
CLAUDE_CODE_DISABLE_UPDATE_CHECK: '1',
|
|
1482
|
+
GEMINI_NO_BROWSER: '1',
|
|
1483
|
+
GOOGLE_NO_BROWSER: '1',
|
|
1484
|
+
npm_config_update_notifier: 'false',
|
|
1485
|
+
NO_UPDATE_NOTIFIER: '1'
|
|
1486
|
+
}
|
|
1487
|
+
});
|
|
1488
|
+
|
|
1489
|
+
// Close stdin immediately to prevent hanging (critical for Gemini CLI)
|
|
1490
|
+
if (child.stdin) {
|
|
1491
|
+
child.stdin.end();
|
|
1492
|
+
}
|
|
1493
|
+
|
|
1494
|
+
return child;
|
|
1495
|
+
}
|
|
1496
|
+
|
|
1377
1497
|
// ============================================
|
|
1378
1498
|
// Status Reporting Methods
|
|
1379
1499
|
// ============================================
|
package/lib/tunnelClient.js
CHANGED
|
@@ -205,8 +205,13 @@ class TunnelClient {
|
|
|
205
205
|
* Handle a single tunnel request by routing to CLI
|
|
206
206
|
*/
|
|
207
207
|
async handleRequest(request) {
|
|
208
|
+
// Use streaming path for providers that support it
|
|
209
|
+
if (this.cliManager.supportsStreaming(request.provider)) {
|
|
210
|
+
return this.handleStreamingRequest(request);
|
|
211
|
+
}
|
|
212
|
+
|
|
208
213
|
const startTime = Date.now();
|
|
209
|
-
console.error(`[Tunnel] Processing request ${request.id} → ${request.provider}`);
|
|
214
|
+
console.error(`[Tunnel] Processing request ${request.id} → ${request.provider} (non-streaming)`);
|
|
210
215
|
|
|
211
216
|
try {
|
|
212
217
|
const result = await this.cliManager.sendCliPrompt(
|
|
@@ -226,6 +231,8 @@ class TunnelClient {
|
|
|
226
231
|
content: result.content || '',
|
|
227
232
|
model_used: result.model || result.detectedModel || request.provider,
|
|
228
233
|
tokens_used: result.tokens_used || null,
|
|
234
|
+
input_tokens: result.input_tokens || null,
|
|
235
|
+
output_tokens: result.output_tokens || null,
|
|
229
236
|
latency_ms: latencyMs,
|
|
230
237
|
});
|
|
231
238
|
} else {
|
|
@@ -247,6 +254,319 @@ class TunnelClient {
|
|
|
247
254
|
}
|
|
248
255
|
}
|
|
249
256
|
|
|
257
|
+
/**
|
|
258
|
+
* Handle a streaming-capable tunnel request (Claude Code or Gemini CLI).
|
|
259
|
+
* Spawns CLI with stream-json flags, reads NDJSON stdout line-by-line,
|
|
260
|
+
* extracts text deltas, batches them, and POSTs chunks to /api/tunnel/stream-chunk.
|
|
261
|
+
* On completion, POSTs final response to /api/tunnel/respond for backward compat.
|
|
262
|
+
*/
|
|
263
|
+
async handleStreamingRequest(request) {
|
|
264
|
+
const startTime = Date.now();
|
|
265
|
+
const requestId = request.id;
|
|
266
|
+
const provider = request.provider;
|
|
267
|
+
console.error(`[Tunnel] Processing request ${requestId} → ${provider} (streaming)`);
|
|
268
|
+
|
|
269
|
+
let child;
|
|
270
|
+
let fullContent = '';
|
|
271
|
+
let chunkIndex = 0;
|
|
272
|
+
let pendingText = '';
|
|
273
|
+
let lastFlushTime = Date.now();
|
|
274
|
+
let modelUsed = null;
|
|
275
|
+
let inputTokens = null;
|
|
276
|
+
let outputTokens = null;
|
|
277
|
+
let costUsd = null;
|
|
278
|
+
|
|
279
|
+
const BATCH_INTERVAL_MS = 300;
|
|
280
|
+
const BATCH_MIN_CHARS = 10;
|
|
281
|
+
|
|
282
|
+
// Flush accumulated text as a stream chunk
|
|
283
|
+
const flushChunk = async (force = false) => {
|
|
284
|
+
if (!pendingText) return;
|
|
285
|
+
if (!force && pendingText.length < BATCH_MIN_CHARS && (Date.now() - lastFlushTime) < BATCH_INTERVAL_MS) {
|
|
286
|
+
return;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
const text = pendingText;
|
|
290
|
+
pendingText = '';
|
|
291
|
+
lastFlushTime = Date.now();
|
|
292
|
+
|
|
293
|
+
try {
|
|
294
|
+
await this.sendStreamChunk({
|
|
295
|
+
request_id: requestId,
|
|
296
|
+
chunk_index: chunkIndex++,
|
|
297
|
+
content: text,
|
|
298
|
+
});
|
|
299
|
+
} catch (err) {
|
|
300
|
+
console.error(`[Tunnel] Failed to send stream chunk for ${requestId}:`, err.message);
|
|
301
|
+
}
|
|
302
|
+
};
|
|
303
|
+
|
|
304
|
+
// Set up periodic flushing
|
|
305
|
+
const flushInterval = setInterval(async () => {
|
|
306
|
+
if (pendingText.length > 0 && (Date.now() - lastFlushTime) >= BATCH_INTERVAL_MS) {
|
|
307
|
+
await flushChunk(true);
|
|
308
|
+
}
|
|
309
|
+
}, BATCH_INTERVAL_MS);
|
|
310
|
+
|
|
311
|
+
try {
|
|
312
|
+
child = this.cliManager.spawnStreamingCli(
|
|
313
|
+
provider,
|
|
314
|
+
request.prompt,
|
|
315
|
+
request.model_requested || null
|
|
316
|
+
);
|
|
317
|
+
|
|
318
|
+
// Set up CLI timeout
|
|
319
|
+
const timeoutHandle = setTimeout(() => {
|
|
320
|
+
if (child && !child.killed) {
|
|
321
|
+
console.error(`[Tunnel] Streaming request ${requestId} timed out after ${this.CLI_TIMEOUT_MS}ms`);
|
|
322
|
+
child.kill('SIGTERM');
|
|
323
|
+
setTimeout(() => {
|
|
324
|
+
if (!child.killed) child.kill('SIGKILL');
|
|
325
|
+
}, 2000);
|
|
326
|
+
}
|
|
327
|
+
}, this.CLI_TIMEOUT_MS);
|
|
328
|
+
|
|
329
|
+
// Read stdout line-by-line (NDJSON)
|
|
330
|
+
let lineBuf = '';
|
|
331
|
+
|
|
332
|
+
child.stdout.on('data', (data) => {
|
|
333
|
+
lineBuf += data.toString();
|
|
334
|
+
|
|
335
|
+
// Process complete lines
|
|
336
|
+
let newlineIdx;
|
|
337
|
+
while ((newlineIdx = lineBuf.indexOf('\n')) !== -1) {
|
|
338
|
+
const line = lineBuf.slice(0, newlineIdx).trim();
|
|
339
|
+
lineBuf = lineBuf.slice(newlineIdx + 1);
|
|
340
|
+
|
|
341
|
+
if (!line) continue;
|
|
342
|
+
|
|
343
|
+
try {
|
|
344
|
+
const event = JSON.parse(line);
|
|
345
|
+
const delta = this.extractTextDelta(provider, event);
|
|
346
|
+
|
|
347
|
+
if (delta) {
|
|
348
|
+
fullContent += delta;
|
|
349
|
+
pendingText += delta;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// Extract metadata from events
|
|
353
|
+
if (provider === 'claude_code') {
|
|
354
|
+
this._extractClaudeMetadata(event, (meta) => {
|
|
355
|
+
if (meta.model) modelUsed = meta.model;
|
|
356
|
+
if (meta.inputTokens != null) inputTokens = meta.inputTokens;
|
|
357
|
+
if (meta.outputTokens != null) outputTokens = meta.outputTokens;
|
|
358
|
+
if (meta.costUsd != null) costUsd = meta.costUsd;
|
|
359
|
+
});
|
|
360
|
+
} else if (provider === 'gemini_cli') {
|
|
361
|
+
this._extractGeminiMetadata(event, (meta) => {
|
|
362
|
+
if (meta.model) modelUsed = meta.model;
|
|
363
|
+
if (meta.inputTokens != null) inputTokens = meta.inputTokens;
|
|
364
|
+
if (meta.outputTokens != null) outputTokens = meta.outputTokens;
|
|
365
|
+
});
|
|
366
|
+
}
|
|
367
|
+
} catch {
|
|
368
|
+
// Skip non-JSON lines (stderr leaking into stdout, etc.)
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
// Capture stderr for debugging
|
|
374
|
+
let stderrBuf = '';
|
|
375
|
+
child.stderr.on('data', (data) => {
|
|
376
|
+
stderrBuf += data.toString();
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
// Wait for process to exit
|
|
380
|
+
await new Promise((resolve, reject) => {
|
|
381
|
+
child.on('close', (code) => {
|
|
382
|
+
clearTimeout(timeoutHandle);
|
|
383
|
+
resolve(code);
|
|
384
|
+
});
|
|
385
|
+
child.on('error', (err) => {
|
|
386
|
+
clearTimeout(timeoutHandle);
|
|
387
|
+
reject(err);
|
|
388
|
+
});
|
|
389
|
+
});
|
|
390
|
+
|
|
391
|
+
// Flush any remaining text
|
|
392
|
+
await flushChunk(true);
|
|
393
|
+
clearInterval(flushInterval);
|
|
394
|
+
|
|
395
|
+
// Send final is_final chunk marker
|
|
396
|
+
if (chunkIndex > 0) {
|
|
397
|
+
try {
|
|
398
|
+
await this.sendStreamChunk({
|
|
399
|
+
request_id: requestId,
|
|
400
|
+
chunk_index: chunkIndex++,
|
|
401
|
+
content: '',
|
|
402
|
+
is_final: true,
|
|
403
|
+
});
|
|
404
|
+
} catch (err) {
|
|
405
|
+
console.error(`[Tunnel] Failed to send final chunk for ${requestId}:`, err.message);
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
const latencyMs = Date.now() - startTime;
|
|
410
|
+
|
|
411
|
+
if (fullContent) {
|
|
412
|
+
console.error(`[Tunnel] Streaming request ${requestId} completed (${latencyMs}ms, ${chunkIndex} chunks, ${fullContent.length} chars)`);
|
|
413
|
+
|
|
414
|
+
// Estimate tokens if not extracted from metadata
|
|
415
|
+
const estInputTokens = inputTokens || Math.ceil(request.prompt.length / 4);
|
|
416
|
+
const estOutputTokens = outputTokens || Math.ceil(fullContent.length / 4);
|
|
417
|
+
|
|
418
|
+
await this.sendResponse({
|
|
419
|
+
request_id: requestId,
|
|
420
|
+
content: fullContent,
|
|
421
|
+
model_used: modelUsed || request.provider,
|
|
422
|
+
tokens_used: (estInputTokens + estOutputTokens) || null,
|
|
423
|
+
input_tokens: estInputTokens,
|
|
424
|
+
output_tokens: estOutputTokens,
|
|
425
|
+
latency_ms: latencyMs,
|
|
426
|
+
});
|
|
427
|
+
} else {
|
|
428
|
+
// No content extracted from streaming — fall back to error
|
|
429
|
+
const errorMsg = stderrBuf.trim().slice(0, 500) || 'Streaming CLI produced no output';
|
|
430
|
+
console.error(`[Tunnel] Streaming request ${requestId} failed: ${errorMsg}`);
|
|
431
|
+
await this.sendResponse({
|
|
432
|
+
request_id: requestId,
|
|
433
|
+
error: errorMsg,
|
|
434
|
+
latency_ms: latencyMs,
|
|
435
|
+
});
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
} catch (err) {
|
|
439
|
+
clearInterval(flushInterval);
|
|
440
|
+
const latencyMs = Date.now() - startTime;
|
|
441
|
+
console.error(`[Tunnel] Streaming request ${requestId} exception: ${err.message}`);
|
|
442
|
+
|
|
443
|
+
// If we got partial content before the error, still send it
|
|
444
|
+
if (fullContent) {
|
|
445
|
+
await flushChunk(true);
|
|
446
|
+
await this.sendResponse({
|
|
447
|
+
request_id: requestId,
|
|
448
|
+
content: fullContent,
|
|
449
|
+
model_used: modelUsed || request.provider,
|
|
450
|
+
latency_ms: latencyMs,
|
|
451
|
+
});
|
|
452
|
+
} else {
|
|
453
|
+
await this.sendResponse({
|
|
454
|
+
request_id: requestId,
|
|
455
|
+
error: err.message || 'Streaming execution failed',
|
|
456
|
+
latency_ms: latencyMs,
|
|
457
|
+
});
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
/**
|
|
463
|
+
* Extract text delta from a parsed NDJSON event based on provider type.
|
|
464
|
+
* Returns the incremental text string, or null if this event isn't a text delta.
|
|
465
|
+
*/
|
|
466
|
+
extractTextDelta(provider, event) {
|
|
467
|
+
if (provider === 'claude_code') {
|
|
468
|
+
// Claude stream_event wrapping raw API events
|
|
469
|
+
// Filter: type === 'stream_event' && event.event.type === 'content_block_delta'
|
|
470
|
+
// && event.event.delta.type === 'text_delta'
|
|
471
|
+
if (event.type === 'stream_event' &&
|
|
472
|
+
event.event?.type === 'content_block_delta' &&
|
|
473
|
+
event.event?.delta?.type === 'text_delta') {
|
|
474
|
+
return event.event.delta.text || null;
|
|
475
|
+
}
|
|
476
|
+
return null;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
if (provider === 'gemini_cli') {
|
|
480
|
+
// Gemini: type === 'message' && role === 'assistant' && delta === true
|
|
481
|
+
if (event.type === 'message' &&
|
|
482
|
+
event.role === 'assistant' &&
|
|
483
|
+
event.delta === true) {
|
|
484
|
+
return event.content || null;
|
|
485
|
+
}
|
|
486
|
+
return null;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
return null;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
/**
|
|
493
|
+
* Extract metadata (model, tokens, cost) from Claude Code stream events
|
|
494
|
+
*/
|
|
495
|
+
_extractClaudeMetadata(event, callback) {
|
|
496
|
+
// From 'system' init event: model info
|
|
497
|
+
if (event.type === 'system' && event.subtype === 'init' && event.model) {
|
|
498
|
+
callback({ model: event.model });
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
// From 'result' event: usage stats
|
|
502
|
+
if (event.type === 'result') {
|
|
503
|
+
const meta = {};
|
|
504
|
+
if (event.usage) {
|
|
505
|
+
meta.inputTokens = event.usage.input_tokens || 0;
|
|
506
|
+
meta.outputTokens = event.usage.output_tokens || 0;
|
|
507
|
+
}
|
|
508
|
+
if (event.total_cost_usd != null) {
|
|
509
|
+
meta.costUsd = event.total_cost_usd;
|
|
510
|
+
}
|
|
511
|
+
// Extract model from modelUsage keys
|
|
512
|
+
if (event.modelUsage) {
|
|
513
|
+
const models = Object.keys(event.modelUsage);
|
|
514
|
+
if (models.length > 0) {
|
|
515
|
+
// Pick the model with highest cost as the primary model
|
|
516
|
+
let primary = models[0];
|
|
517
|
+
let highestCost = -1;
|
|
518
|
+
for (const [m, usage] of Object.entries(event.modelUsage)) {
|
|
519
|
+
const cost = usage.costUSD || 0;
|
|
520
|
+
if (cost > highestCost) {
|
|
521
|
+
highestCost = cost;
|
|
522
|
+
primary = m;
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
meta.model = primary;
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
callback(meta);
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
/**
|
|
533
|
+
* Extract metadata from Gemini CLI stream events
|
|
534
|
+
*/
|
|
535
|
+
_extractGeminiMetadata(event, callback) {
|
|
536
|
+
// From 'init' event: model info
|
|
537
|
+
if (event.type === 'init' && event.model) {
|
|
538
|
+
callback({ model: event.model });
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// From 'result' event: usage stats
|
|
542
|
+
if (event.type === 'result' && event.stats) {
|
|
543
|
+
callback({
|
|
544
|
+
inputTokens: event.stats.input_tokens || 0,
|
|
545
|
+
outputTokens: event.stats.output_tokens || 0,
|
|
546
|
+
});
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
/**
|
|
551
|
+
* Send a stream chunk to the server
|
|
552
|
+
*/
|
|
553
|
+
async sendStreamChunk(chunkData) {
|
|
554
|
+
const url = `${this.serverBaseUrl}/api/tunnel/stream-chunk`;
|
|
555
|
+
const res = await fetch(url, {
|
|
556
|
+
method: 'POST',
|
|
557
|
+
headers: {
|
|
558
|
+
'Authorization': `Bearer ${this.authToken}`,
|
|
559
|
+
'Content-Type': 'application/json',
|
|
560
|
+
},
|
|
561
|
+
body: JSON.stringify(chunkData),
|
|
562
|
+
});
|
|
563
|
+
|
|
564
|
+
if (!res.ok) {
|
|
565
|
+
const text = await res.text().catch(() => '');
|
|
566
|
+
throw new Error(`Stream chunk POST failed (${res.status}): ${text}`);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
|
|
250
570
|
/**
|
|
251
571
|
* Send response back to server
|
|
252
572
|
*/
|