polydev-ai 1.9.39 → 1.9.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cliManager.js +129 -9
- package/lib/tunnelClient.js +381 -3
- package/mcp/stdio-wrapper.js +5 -1
- package/package.json +1 -1
package/lib/cliManager.js
CHANGED
|
@@ -578,6 +578,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
|
|
|
578
578
|
success: true,
|
|
579
579
|
content,
|
|
580
580
|
tokens_used: this.estimateTokens(prompt + content),
|
|
581
|
+
input_tokens: Math.ceil(prompt.length / 4),
|
|
582
|
+
output_tokens: Math.ceil(content.length / 4),
|
|
581
583
|
latency_ms: Date.now() - startTime,
|
|
582
584
|
provider: providerId,
|
|
583
585
|
mode: 'args',
|
|
@@ -689,6 +691,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
|
|
|
689
691
|
success: true,
|
|
690
692
|
content: jsonResult.content,
|
|
691
693
|
tokens_used: jsonResult.tokens_used,
|
|
694
|
+
input_tokens: jsonResult.input_tokens,
|
|
695
|
+
output_tokens: jsonResult.output_tokens,
|
|
692
696
|
latency_ms: Date.now() - startTime,
|
|
693
697
|
provider: providerId,
|
|
694
698
|
mode: 'args',
|
|
@@ -715,6 +719,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
|
|
|
715
719
|
success: true,
|
|
716
720
|
content,
|
|
717
721
|
tokens_used: this.estimateTokens(prompt + content),
|
|
722
|
+
input_tokens: Math.ceil(prompt.length / 4),
|
|
723
|
+
output_tokens: Math.ceil(content.length / 4),
|
|
718
724
|
latency_ms: Date.now() - startTime,
|
|
719
725
|
provider: providerId,
|
|
720
726
|
mode: 'args',
|
|
@@ -771,6 +777,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
|
|
|
771
777
|
success: true,
|
|
772
778
|
content,
|
|
773
779
|
tokens_used: this.estimateTokens(prompt + content),
|
|
780
|
+
input_tokens: Math.ceil(prompt.length / 4),
|
|
781
|
+
output_tokens: Math.ceil(content.length / 4),
|
|
774
782
|
latency_ms: Date.now() - startTime,
|
|
775
783
|
provider: providerId,
|
|
776
784
|
mode: 'args',
|
|
@@ -1038,17 +1046,13 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
|
|
|
1038
1046
|
const content = json.result;
|
|
1039
1047
|
|
|
1040
1048
|
// Extract primary model from modelUsage
|
|
1041
|
-
// The primary model is the one with highest cost - that's the user's configured main model
|
|
1042
|
-
// (Haiku is used internally for quick tasks, but the expensive model is what the user chose)
|
|
1043
1049
|
let primaryModel = CLI_DEFAULT_MODELS['claude_code'] || 'cli_default';
|
|
1044
1050
|
const modelUsage = json.modelUsage || {};
|
|
1045
1051
|
const modelNames = Object.keys(modelUsage);
|
|
1046
1052
|
|
|
1047
1053
|
if (modelNames.length === 1) {
|
|
1048
|
-
// Only one model used - that's the primary
|
|
1049
1054
|
primaryModel = modelNames[0];
|
|
1050
1055
|
} else if (modelNames.length > 1) {
|
|
1051
|
-
// Multiple models - the one with highest cost is the user's configured main model
|
|
1052
1056
|
let highestCost = -1;
|
|
1053
1057
|
for (const [modelName, usage] of Object.entries(modelUsage)) {
|
|
1054
1058
|
const cost = usage.costUSD || 0;
|
|
@@ -1059,13 +1063,15 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
|
|
|
1059
1063
|
}
|
|
1060
1064
|
}
|
|
1061
1065
|
|
|
1062
|
-
// Calculate
|
|
1066
|
+
// Calculate separate input and output tokens
|
|
1067
|
+
let totalInputTokens = 0;
|
|
1068
|
+
let totalOutputTokens = 0;
|
|
1063
1069
|
let totalTokens = 0;
|
|
1064
1070
|
let cacheTokens = 0;
|
|
1065
1071
|
for (const usage of Object.values(modelUsage)) {
|
|
1066
|
-
|
|
1072
|
+
totalInputTokens += (usage.inputTokens || 0);
|
|
1073
|
+
totalOutputTokens += (usage.outputTokens || 0);
|
|
1067
1074
|
totalTokens += (usage.inputTokens || 0) + (usage.outputTokens || 0);
|
|
1068
|
-
// Track cache tokens separately (for cost calculations, but not displayed as "tokens used")
|
|
1069
1075
|
cacheTokens += (usage.cacheReadInputTokens || 0) + (usage.cacheCreationInputTokens || 0);
|
|
1070
1076
|
}
|
|
1071
1077
|
|
|
@@ -1073,14 +1079,15 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
|
|
|
1073
1079
|
content,
|
|
1074
1080
|
model_used: primaryModel,
|
|
1075
1081
|
tokens_used: totalTokens || json.usage?.input_tokens + json.usage?.output_tokens || 0,
|
|
1076
|
-
|
|
1082
|
+
input_tokens: totalInputTokens,
|
|
1083
|
+
output_tokens: totalOutputTokens,
|
|
1084
|
+
cache_tokens: cacheTokens,
|
|
1077
1085
|
cost_usd: json.total_cost_usd || 0,
|
|
1078
1086
|
model_usage: modelUsage,
|
|
1079
1087
|
session_id: json.session_id,
|
|
1080
1088
|
duration_ms: json.duration_ms
|
|
1081
1089
|
};
|
|
1082
1090
|
} catch (e) {
|
|
1083
|
-
// Not valid JSON, return null to fall back to text parsing
|
|
1084
1091
|
return null;
|
|
1085
1092
|
}
|
|
1086
1093
|
}
|
|
@@ -1374,6 +1381,119 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
|
|
|
1374
1381
|
return this.providers.get(providerId);
|
|
1375
1382
|
}
|
|
1376
1383
|
|
|
1384
|
+
// ============================================
|
|
1385
|
+
// Streaming CLI Methods (CLI-as-API real streaming)
|
|
1386
|
+
// ============================================
|
|
1387
|
+
|
|
1388
|
+
/**
|
|
1389
|
+
* Check if a provider supports streaming output
|
|
1390
|
+
*/
|
|
1391
|
+
supportsStreaming(providerId) {
|
|
1392
|
+
return providerId === 'claude_code' || providerId === 'gemini_cli';
|
|
1393
|
+
}
|
|
1394
|
+
|
|
1395
|
+
/**
|
|
1396
|
+
* Build CLI args for streaming mode (NDJSON output)
|
|
1397
|
+
* @param {string} providerId - 'claude_code' or 'gemini_cli'
|
|
1398
|
+
* @param {string} prompt - The user prompt
|
|
1399
|
+
* @param {string|null} model - Optional model name
|
|
1400
|
+
* @returns {string[]} CLI arguments array
|
|
1401
|
+
*/
|
|
1402
|
+
buildStreamingArgs(providerId, prompt, model = null) {
|
|
1403
|
+
if (providerId === 'claude_code') {
|
|
1404
|
+
const args = ['-p', '--output-format', 'stream-json', '--verbose', '--include-partial-messages'];
|
|
1405
|
+
|
|
1406
|
+
// Add model flag if specified
|
|
1407
|
+
if (model) {
|
|
1408
|
+
// Normalize model names to Claude CLI aliases
|
|
1409
|
+
const claudeModelMap = {
|
|
1410
|
+
'claude-opus-4-5': 'opus',
|
|
1411
|
+
'claude-opus-4.5': 'opus',
|
|
1412
|
+
'claude-4.5-opus': 'opus',
|
|
1413
|
+
'claude-opus-4-5-20250514': 'opus',
|
|
1414
|
+
'claude-sonnet-4-5': 'sonnet',
|
|
1415
|
+
'claude-sonnet-4.5': 'sonnet',
|
|
1416
|
+
'claude-4.5-sonnet': 'sonnet',
|
|
1417
|
+
'claude-sonnet-4-5-20250514': 'sonnet',
|
|
1418
|
+
'claude-3-5-sonnet': 'sonnet',
|
|
1419
|
+
'claude-3-5-haiku': 'haiku',
|
|
1420
|
+
'claude-haiku-3-5': 'haiku',
|
|
1421
|
+
};
|
|
1422
|
+
const cliModel = claudeModelMap[model.toLowerCase()] || model;
|
|
1423
|
+
args.unshift('--model', cliModel);
|
|
1424
|
+
}
|
|
1425
|
+
|
|
1426
|
+
args.push(prompt);
|
|
1427
|
+
return args;
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
if (providerId === 'gemini_cli') {
|
|
1431
|
+
const args = ['-o', 'stream-json'];
|
|
1432
|
+
|
|
1433
|
+
// Add model flag if specified
|
|
1434
|
+
if (model) {
|
|
1435
|
+
args.push('-m', model);
|
|
1436
|
+
}
|
|
1437
|
+
|
|
1438
|
+
// Add prompt prefix to prevent tool planning in non-interactive mode
|
|
1439
|
+
const geminiPrompt = `Answer directly without using any tools, file operations, or searches. Do not say "I will search" or "I will look up". Provide your analysis immediately.\n\n${prompt}`;
|
|
1440
|
+
args.push('-p', geminiPrompt);
|
|
1441
|
+
return args;
|
|
1442
|
+
}
|
|
1443
|
+
|
|
1444
|
+
throw new Error(`Provider ${providerId} does not support streaming`);
|
|
1445
|
+
}
|
|
1446
|
+
|
|
1447
|
+
/**
|
|
1448
|
+
* Spawn a CLI process in streaming mode.
|
|
1449
|
+
* Returns the raw child process so the caller can read stdout line-by-line.
|
|
1450
|
+
*
|
|
1451
|
+
* @param {string} providerId - 'claude_code' or 'gemini_cli'
|
|
1452
|
+
* @param {string} prompt - The user prompt
|
|
1453
|
+
* @param {string|null} model - Optional model name
|
|
1454
|
+
* @returns {import('child_process').ChildProcess} The spawned process
|
|
1455
|
+
*/
|
|
1456
|
+
spawnStreamingCli(providerId, prompt, model = null) {
|
|
1457
|
+
const provider = this.providers.get(providerId);
|
|
1458
|
+
if (!provider) {
|
|
1459
|
+
throw new Error(`Unknown provider: ${providerId}`);
|
|
1460
|
+
}
|
|
1461
|
+
|
|
1462
|
+
const args = this.buildStreamingArgs(providerId, prompt, model);
|
|
1463
|
+
|
|
1464
|
+
console.log(`[Polydev CLI] Spawning streaming ${providerId}: ${provider.command} ${args.slice(0, 4).join(' ')}...`);
|
|
1465
|
+
|
|
1466
|
+
const child = spawn(provider.command, args, {
|
|
1467
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
1468
|
+
shell: process.platform === 'win32',
|
|
1469
|
+
env: {
|
|
1470
|
+
...process.env,
|
|
1471
|
+
HOME: process.env.HOME || os.homedir(),
|
|
1472
|
+
CI: '1',
|
|
1473
|
+
NO_COLOR: '1',
|
|
1474
|
+
TERM: 'dumb',
|
|
1475
|
+
NONINTERACTIVE: '1',
|
|
1476
|
+
NO_BROWSER: '1',
|
|
1477
|
+
BROWSER: 'echo',
|
|
1478
|
+
DISPLAY: '',
|
|
1479
|
+
HEADLESS: '1',
|
|
1480
|
+
CODEX_DISABLE_UPDATE_CHECK: '1',
|
|
1481
|
+
CLAUDE_CODE_DISABLE_UPDATE_CHECK: '1',
|
|
1482
|
+
GEMINI_NO_BROWSER: '1',
|
|
1483
|
+
GOOGLE_NO_BROWSER: '1',
|
|
1484
|
+
npm_config_update_notifier: 'false',
|
|
1485
|
+
NO_UPDATE_NOTIFIER: '1'
|
|
1486
|
+
}
|
|
1487
|
+
});
|
|
1488
|
+
|
|
1489
|
+
// Close stdin immediately to prevent hanging (critical for Gemini CLI)
|
|
1490
|
+
if (child.stdin) {
|
|
1491
|
+
child.stdin.end();
|
|
1492
|
+
}
|
|
1493
|
+
|
|
1494
|
+
return child;
|
|
1495
|
+
}
|
|
1496
|
+
|
|
1377
1497
|
// ============================================
|
|
1378
1498
|
// Status Reporting Methods
|
|
1379
1499
|
// ============================================
|
package/lib/tunnelClient.js
CHANGED
|
@@ -21,11 +21,60 @@ class TunnelClient {
|
|
|
21
21
|
this.pollInterval = null;
|
|
22
22
|
this._processing = new Set(); // track in-flight request IDs
|
|
23
23
|
this._started = false;
|
|
24
|
+
this._consecutive401s = 0; // track auth failures for token reload
|
|
24
25
|
|
|
25
26
|
// Configurable intervals
|
|
26
27
|
this.HEARTBEAT_INTERVAL_MS = 30_000; // 30s
|
|
27
28
|
this.POLL_INTERVAL_MS = 3_000; // 3s
|
|
28
29
|
this.CLI_TIMEOUT_MS = 120_000; // 2 min per request
|
|
30
|
+
|
|
31
|
+
// Try to load freshest token from file on construction
|
|
32
|
+
// (env var may be stale if process was started long ago)
|
|
33
|
+
this._reloadTokenFromFile();
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Reload token from ~/.polydev.env file.
|
|
38
|
+
* The env var POLYDEV_USER_TOKEN may be stale (set when the IDE started the process).
|
|
39
|
+
* The file is always updated by the latest login.
|
|
40
|
+
*/
|
|
41
|
+
_reloadTokenFromFile() {
|
|
42
|
+
try {
|
|
43
|
+
const fs = require('fs');
|
|
44
|
+
const path = require('path');
|
|
45
|
+
const os = require('os');
|
|
46
|
+
const envFile = path.join(os.homedir(), '.polydev.env');
|
|
47
|
+
|
|
48
|
+
if (!fs.existsSync(envFile)) return false;
|
|
49
|
+
|
|
50
|
+
const content = fs.readFileSync(envFile, 'utf8');
|
|
51
|
+
const match = content.match(/POLYDEV_USER_TOKEN[=\s]["']?([^"'\n]+)["']?/);
|
|
52
|
+
if (match && match[1] && (match[1].startsWith('pd_') || match[1].startsWith('polydev_'))) {
|
|
53
|
+
if (match[1] !== this.authToken) {
|
|
54
|
+
console.error(`[Tunnel] Token reloaded from ${envFile} (was stale)`);
|
|
55
|
+
this.authToken = match[1];
|
|
56
|
+
this._consecutive401s = 0;
|
|
57
|
+
return true;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
} catch {
|
|
61
|
+
// ignore file read errors
|
|
62
|
+
}
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Handle a 401 response — try reloading token from file
|
|
68
|
+
*/
|
|
69
|
+
_handle401() {
|
|
70
|
+
this._consecutive401s++;
|
|
71
|
+
// Try reload every 5 consecutive 401s (every ~15s at 3s poll interval)
|
|
72
|
+
if (this._consecutive401s % 5 === 1) {
|
|
73
|
+
const reloaded = this._reloadTokenFromFile();
|
|
74
|
+
if (reloaded) {
|
|
75
|
+
console.error('[Tunnel] Token refreshed after 401 — retrying');
|
|
76
|
+
}
|
|
77
|
+
}
|
|
29
78
|
}
|
|
30
79
|
|
|
31
80
|
/**
|
|
@@ -36,6 +85,7 @@ class TunnelClient {
|
|
|
36
85
|
this._started = true;
|
|
37
86
|
|
|
38
87
|
console.error('[Tunnel] Starting CLI-as-API tunnel client');
|
|
88
|
+
console.error(`[Tunnel] Auth token prefix: ${this.authToken ? this.authToken.substring(0, 8) + '...' : 'NONE'}`);
|
|
39
89
|
|
|
40
90
|
// Send initial heartbeat immediately
|
|
41
91
|
try {
|
|
@@ -102,9 +152,14 @@ class TunnelClient {
|
|
|
102
152
|
});
|
|
103
153
|
|
|
104
154
|
if (!res.ok) {
|
|
155
|
+
if (res.status === 401) {
|
|
156
|
+
this._handle401();
|
|
157
|
+
return;
|
|
158
|
+
}
|
|
105
159
|
const text = await res.text().catch(() => '');
|
|
106
160
|
throw new Error(`Heartbeat failed (${res.status}): ${text}`);
|
|
107
161
|
}
|
|
162
|
+
this._consecutive401s = 0; // reset on success
|
|
108
163
|
}
|
|
109
164
|
|
|
110
165
|
/**
|
|
@@ -120,11 +175,14 @@ class TunnelClient {
|
|
|
120
175
|
});
|
|
121
176
|
|
|
122
177
|
if (!res.ok) {
|
|
123
|
-
|
|
124
|
-
|
|
178
|
+
if (res.status === 401) {
|
|
179
|
+
this._handle401();
|
|
180
|
+
return;
|
|
181
|
+
}
|
|
125
182
|
const text = await res.text().catch(() => '');
|
|
126
183
|
throw new Error(`Poll failed (${res.status}): ${text}`);
|
|
127
184
|
}
|
|
185
|
+
this._consecutive401s = 0; // reset on success
|
|
128
186
|
|
|
129
187
|
const data = await res.json();
|
|
130
188
|
const requests = data.requests || [];
|
|
@@ -147,8 +205,13 @@ class TunnelClient {
|
|
|
147
205
|
* Handle a single tunnel request by routing to CLI
|
|
148
206
|
*/
|
|
149
207
|
async handleRequest(request) {
|
|
208
|
+
// Use streaming path for providers that support it
|
|
209
|
+
if (this.cliManager.supportsStreaming(request.provider)) {
|
|
210
|
+
return this.handleStreamingRequest(request);
|
|
211
|
+
}
|
|
212
|
+
|
|
150
213
|
const startTime = Date.now();
|
|
151
|
-
console.error(`[Tunnel] Processing request ${request.id} → ${request.provider}`);
|
|
214
|
+
console.error(`[Tunnel] Processing request ${request.id} → ${request.provider} (non-streaming)`);
|
|
152
215
|
|
|
153
216
|
try {
|
|
154
217
|
const result = await this.cliManager.sendCliPrompt(
|
|
@@ -168,6 +231,8 @@ class TunnelClient {
|
|
|
168
231
|
content: result.content || '',
|
|
169
232
|
model_used: result.model || result.detectedModel || request.provider,
|
|
170
233
|
tokens_used: result.tokens_used || null,
|
|
234
|
+
input_tokens: result.input_tokens || null,
|
|
235
|
+
output_tokens: result.output_tokens || null,
|
|
171
236
|
latency_ms: latencyMs,
|
|
172
237
|
});
|
|
173
238
|
} else {
|
|
@@ -189,6 +254,319 @@ class TunnelClient {
|
|
|
189
254
|
}
|
|
190
255
|
}
|
|
191
256
|
|
|
257
|
+
/**
|
|
258
|
+
* Handle a streaming-capable tunnel request (Claude Code or Gemini CLI).
|
|
259
|
+
* Spawns CLI with stream-json flags, reads NDJSON stdout line-by-line,
|
|
260
|
+
* extracts text deltas, batches them, and POSTs chunks to /api/tunnel/stream-chunk.
|
|
261
|
+
* On completion, POSTs final response to /api/tunnel/respond for backward compat.
|
|
262
|
+
*/
|
|
263
|
+
async handleStreamingRequest(request) {
|
|
264
|
+
const startTime = Date.now();
|
|
265
|
+
const requestId = request.id;
|
|
266
|
+
const provider = request.provider;
|
|
267
|
+
console.error(`[Tunnel] Processing request ${requestId} → ${provider} (streaming)`);
|
|
268
|
+
|
|
269
|
+
let child;
|
|
270
|
+
let fullContent = '';
|
|
271
|
+
let chunkIndex = 0;
|
|
272
|
+
let pendingText = '';
|
|
273
|
+
let lastFlushTime = Date.now();
|
|
274
|
+
let modelUsed = null;
|
|
275
|
+
let inputTokens = null;
|
|
276
|
+
let outputTokens = null;
|
|
277
|
+
let costUsd = null;
|
|
278
|
+
|
|
279
|
+
const BATCH_INTERVAL_MS = 300;
|
|
280
|
+
const BATCH_MIN_CHARS = 10;
|
|
281
|
+
|
|
282
|
+
// Flush accumulated text as a stream chunk
|
|
283
|
+
const flushChunk = async (force = false) => {
|
|
284
|
+
if (!pendingText) return;
|
|
285
|
+
if (!force && pendingText.length < BATCH_MIN_CHARS && (Date.now() - lastFlushTime) < BATCH_INTERVAL_MS) {
|
|
286
|
+
return;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
const text = pendingText;
|
|
290
|
+
pendingText = '';
|
|
291
|
+
lastFlushTime = Date.now();
|
|
292
|
+
|
|
293
|
+
try {
|
|
294
|
+
await this.sendStreamChunk({
|
|
295
|
+
request_id: requestId,
|
|
296
|
+
chunk_index: chunkIndex++,
|
|
297
|
+
content: text,
|
|
298
|
+
});
|
|
299
|
+
} catch (err) {
|
|
300
|
+
console.error(`[Tunnel] Failed to send stream chunk for ${requestId}:`, err.message);
|
|
301
|
+
}
|
|
302
|
+
};
|
|
303
|
+
|
|
304
|
+
// Set up periodic flushing
|
|
305
|
+
const flushInterval = setInterval(async () => {
|
|
306
|
+
if (pendingText.length > 0 && (Date.now() - lastFlushTime) >= BATCH_INTERVAL_MS) {
|
|
307
|
+
await flushChunk(true);
|
|
308
|
+
}
|
|
309
|
+
}, BATCH_INTERVAL_MS);
|
|
310
|
+
|
|
311
|
+
try {
|
|
312
|
+
child = this.cliManager.spawnStreamingCli(
|
|
313
|
+
provider,
|
|
314
|
+
request.prompt,
|
|
315
|
+
request.model_requested || null
|
|
316
|
+
);
|
|
317
|
+
|
|
318
|
+
// Set up CLI timeout
|
|
319
|
+
const timeoutHandle = setTimeout(() => {
|
|
320
|
+
if (child && !child.killed) {
|
|
321
|
+
console.error(`[Tunnel] Streaming request ${requestId} timed out after ${this.CLI_TIMEOUT_MS}ms`);
|
|
322
|
+
child.kill('SIGTERM');
|
|
323
|
+
setTimeout(() => {
|
|
324
|
+
if (!child.killed) child.kill('SIGKILL');
|
|
325
|
+
}, 2000);
|
|
326
|
+
}
|
|
327
|
+
}, this.CLI_TIMEOUT_MS);
|
|
328
|
+
|
|
329
|
+
// Read stdout line-by-line (NDJSON)
|
|
330
|
+
let lineBuf = '';
|
|
331
|
+
|
|
332
|
+
child.stdout.on('data', (data) => {
|
|
333
|
+
lineBuf += data.toString();
|
|
334
|
+
|
|
335
|
+
// Process complete lines
|
|
336
|
+
let newlineIdx;
|
|
337
|
+
while ((newlineIdx = lineBuf.indexOf('\n')) !== -1) {
|
|
338
|
+
const line = lineBuf.slice(0, newlineIdx).trim();
|
|
339
|
+
lineBuf = lineBuf.slice(newlineIdx + 1);
|
|
340
|
+
|
|
341
|
+
if (!line) continue;
|
|
342
|
+
|
|
343
|
+
try {
|
|
344
|
+
const event = JSON.parse(line);
|
|
345
|
+
const delta = this.extractTextDelta(provider, event);
|
|
346
|
+
|
|
347
|
+
if (delta) {
|
|
348
|
+
fullContent += delta;
|
|
349
|
+
pendingText += delta;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// Extract metadata from events
|
|
353
|
+
if (provider === 'claude_code') {
|
|
354
|
+
this._extractClaudeMetadata(event, (meta) => {
|
|
355
|
+
if (meta.model) modelUsed = meta.model;
|
|
356
|
+
if (meta.inputTokens != null) inputTokens = meta.inputTokens;
|
|
357
|
+
if (meta.outputTokens != null) outputTokens = meta.outputTokens;
|
|
358
|
+
if (meta.costUsd != null) costUsd = meta.costUsd;
|
|
359
|
+
});
|
|
360
|
+
} else if (provider === 'gemini_cli') {
|
|
361
|
+
this._extractGeminiMetadata(event, (meta) => {
|
|
362
|
+
if (meta.model) modelUsed = meta.model;
|
|
363
|
+
if (meta.inputTokens != null) inputTokens = meta.inputTokens;
|
|
364
|
+
if (meta.outputTokens != null) outputTokens = meta.outputTokens;
|
|
365
|
+
});
|
|
366
|
+
}
|
|
367
|
+
} catch {
|
|
368
|
+
// Skip non-JSON lines (stderr leaking into stdout, etc.)
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
// Capture stderr for debugging
|
|
374
|
+
let stderrBuf = '';
|
|
375
|
+
child.stderr.on('data', (data) => {
|
|
376
|
+
stderrBuf += data.toString();
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
// Wait for process to exit
|
|
380
|
+
await new Promise((resolve, reject) => {
|
|
381
|
+
child.on('close', (code) => {
|
|
382
|
+
clearTimeout(timeoutHandle);
|
|
383
|
+
resolve(code);
|
|
384
|
+
});
|
|
385
|
+
child.on('error', (err) => {
|
|
386
|
+
clearTimeout(timeoutHandle);
|
|
387
|
+
reject(err);
|
|
388
|
+
});
|
|
389
|
+
});
|
|
390
|
+
|
|
391
|
+
// Flush any remaining text
|
|
392
|
+
await flushChunk(true);
|
|
393
|
+
clearInterval(flushInterval);
|
|
394
|
+
|
|
395
|
+
// Send final is_final chunk marker
|
|
396
|
+
if (chunkIndex > 0) {
|
|
397
|
+
try {
|
|
398
|
+
await this.sendStreamChunk({
|
|
399
|
+
request_id: requestId,
|
|
400
|
+
chunk_index: chunkIndex++,
|
|
401
|
+
content: '',
|
|
402
|
+
is_final: true,
|
|
403
|
+
});
|
|
404
|
+
} catch (err) {
|
|
405
|
+
console.error(`[Tunnel] Failed to send final chunk for ${requestId}:`, err.message);
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
const latencyMs = Date.now() - startTime;
|
|
410
|
+
|
|
411
|
+
if (fullContent) {
|
|
412
|
+
console.error(`[Tunnel] Streaming request ${requestId} completed (${latencyMs}ms, ${chunkIndex} chunks, ${fullContent.length} chars)`);
|
|
413
|
+
|
|
414
|
+
// Estimate tokens if not extracted from metadata
|
|
415
|
+
const estInputTokens = inputTokens || Math.ceil(request.prompt.length / 4);
|
|
416
|
+
const estOutputTokens = outputTokens || Math.ceil(fullContent.length / 4);
|
|
417
|
+
|
|
418
|
+
await this.sendResponse({
|
|
419
|
+
request_id: requestId,
|
|
420
|
+
content: fullContent,
|
|
421
|
+
model_used: modelUsed || request.provider,
|
|
422
|
+
tokens_used: (estInputTokens + estOutputTokens) || null,
|
|
423
|
+
input_tokens: estInputTokens,
|
|
424
|
+
output_tokens: estOutputTokens,
|
|
425
|
+
latency_ms: latencyMs,
|
|
426
|
+
});
|
|
427
|
+
} else {
|
|
428
|
+
// No content extracted from streaming — fall back to error
|
|
429
|
+
const errorMsg = stderrBuf.trim().slice(0, 500) || 'Streaming CLI produced no output';
|
|
430
|
+
console.error(`[Tunnel] Streaming request ${requestId} failed: ${errorMsg}`);
|
|
431
|
+
await this.sendResponse({
|
|
432
|
+
request_id: requestId,
|
|
433
|
+
error: errorMsg,
|
|
434
|
+
latency_ms: latencyMs,
|
|
435
|
+
});
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
} catch (err) {
|
|
439
|
+
clearInterval(flushInterval);
|
|
440
|
+
const latencyMs = Date.now() - startTime;
|
|
441
|
+
console.error(`[Tunnel] Streaming request ${requestId} exception: ${err.message}`);
|
|
442
|
+
|
|
443
|
+
// If we got partial content before the error, still send it
|
|
444
|
+
if (fullContent) {
|
|
445
|
+
await flushChunk(true);
|
|
446
|
+
await this.sendResponse({
|
|
447
|
+
request_id: requestId,
|
|
448
|
+
content: fullContent,
|
|
449
|
+
model_used: modelUsed || request.provider,
|
|
450
|
+
latency_ms: latencyMs,
|
|
451
|
+
});
|
|
452
|
+
} else {
|
|
453
|
+
await this.sendResponse({
|
|
454
|
+
request_id: requestId,
|
|
455
|
+
error: err.message || 'Streaming execution failed',
|
|
456
|
+
latency_ms: latencyMs,
|
|
457
|
+
});
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
/**
|
|
463
|
+
* Extract text delta from a parsed NDJSON event based on provider type.
|
|
464
|
+
* Returns the incremental text string, or null if this event isn't a text delta.
|
|
465
|
+
*/
|
|
466
|
+
extractTextDelta(provider, event) {
|
|
467
|
+
if (provider === 'claude_code') {
|
|
468
|
+
// Claude stream_event wrapping raw API events
|
|
469
|
+
// Filter: type === 'stream_event' && event.event.type === 'content_block_delta'
|
|
470
|
+
// && event.event.delta.type === 'text_delta'
|
|
471
|
+
if (event.type === 'stream_event' &&
|
|
472
|
+
event.event?.type === 'content_block_delta' &&
|
|
473
|
+
event.event?.delta?.type === 'text_delta') {
|
|
474
|
+
return event.event.delta.text || null;
|
|
475
|
+
}
|
|
476
|
+
return null;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
if (provider === 'gemini_cli') {
|
|
480
|
+
// Gemini: type === 'message' && role === 'assistant' && delta === true
|
|
481
|
+
if (event.type === 'message' &&
|
|
482
|
+
event.role === 'assistant' &&
|
|
483
|
+
event.delta === true) {
|
|
484
|
+
return event.content || null;
|
|
485
|
+
}
|
|
486
|
+
return null;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
return null;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
/**
|
|
493
|
+
* Extract metadata (model, tokens, cost) from Claude Code stream events
|
|
494
|
+
*/
|
|
495
|
+
_extractClaudeMetadata(event, callback) {
|
|
496
|
+
// From 'system' init event: model info
|
|
497
|
+
if (event.type === 'system' && event.subtype === 'init' && event.model) {
|
|
498
|
+
callback({ model: event.model });
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
// From 'result' event: usage stats
|
|
502
|
+
if (event.type === 'result') {
|
|
503
|
+
const meta = {};
|
|
504
|
+
if (event.usage) {
|
|
505
|
+
meta.inputTokens = event.usage.input_tokens || 0;
|
|
506
|
+
meta.outputTokens = event.usage.output_tokens || 0;
|
|
507
|
+
}
|
|
508
|
+
if (event.total_cost_usd != null) {
|
|
509
|
+
meta.costUsd = event.total_cost_usd;
|
|
510
|
+
}
|
|
511
|
+
// Extract model from modelUsage keys
|
|
512
|
+
if (event.modelUsage) {
|
|
513
|
+
const models = Object.keys(event.modelUsage);
|
|
514
|
+
if (models.length > 0) {
|
|
515
|
+
// Pick the model with highest cost as the primary model
|
|
516
|
+
let primary = models[0];
|
|
517
|
+
let highestCost = -1;
|
|
518
|
+
for (const [m, usage] of Object.entries(event.modelUsage)) {
|
|
519
|
+
const cost = usage.costUSD || 0;
|
|
520
|
+
if (cost > highestCost) {
|
|
521
|
+
highestCost = cost;
|
|
522
|
+
primary = m;
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
meta.model = primary;
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
callback(meta);
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
/**
|
|
533
|
+
* Extract metadata from Gemini CLI stream events
|
|
534
|
+
*/
|
|
535
|
+
_extractGeminiMetadata(event, callback) {
|
|
536
|
+
// From 'init' event: model info
|
|
537
|
+
if (event.type === 'init' && event.model) {
|
|
538
|
+
callback({ model: event.model });
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// From 'result' event: usage stats
|
|
542
|
+
if (event.type === 'result' && event.stats) {
|
|
543
|
+
callback({
|
|
544
|
+
inputTokens: event.stats.input_tokens || 0,
|
|
545
|
+
outputTokens: event.stats.output_tokens || 0,
|
|
546
|
+
});
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
/**
|
|
551
|
+
* Send a stream chunk to the server
|
|
552
|
+
*/
|
|
553
|
+
async sendStreamChunk(chunkData) {
|
|
554
|
+
const url = `${this.serverBaseUrl}/api/tunnel/stream-chunk`;
|
|
555
|
+
const res = await fetch(url, {
|
|
556
|
+
method: 'POST',
|
|
557
|
+
headers: {
|
|
558
|
+
'Authorization': `Bearer ${this.authToken}`,
|
|
559
|
+
'Content-Type': 'application/json',
|
|
560
|
+
},
|
|
561
|
+
body: JSON.stringify(chunkData),
|
|
562
|
+
});
|
|
563
|
+
|
|
564
|
+
if (!res.ok) {
|
|
565
|
+
const text = await res.text().catch(() => '');
|
|
566
|
+
throw new Error(`Stream chunk POST failed (${res.status}): ${text}`);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
|
|
192
570
|
/**
|
|
193
571
|
* Send response back to server
|
|
194
572
|
*/
|
package/mcp/stdio-wrapper.js
CHANGED
|
@@ -2770,7 +2770,7 @@ To re-login: /polydev:login`
|
|
|
2770
2770
|
const statusFile = path.join(polydevevDir, 'cli-status.json');
|
|
2771
2771
|
|
|
2772
2772
|
// Ensure directory exists
|
|
2773
|
-
if (!fs.existsSync(
|
|
2773
|
+
if (!fs.existsSync(polydevevDir)) {
|
|
2774
2774
|
fs.mkdirSync(polydeveevDir, { recursive: true });
|
|
2775
2775
|
}
|
|
2776
2776
|
|
|
@@ -2962,6 +2962,10 @@ To re-login: /polydev:login`
|
|
|
2962
2962
|
return; // No auth, skip tunnel
|
|
2963
2963
|
}
|
|
2964
2964
|
|
|
2965
|
+
// Reload token from file to get the freshest one
|
|
2966
|
+
// (env var may be stale if IDE started this process long ago)
|
|
2967
|
+
this.reloadTokenFromFiles();
|
|
2968
|
+
|
|
2965
2969
|
try {
|
|
2966
2970
|
this.tunnelClient = new TunnelClient(
|
|
2967
2971
|
this.serverUrl, // https://www.polydev.ai/api/mcp
|