polydev-ai 1.9.39 → 1.9.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/cliManager.js CHANGED
@@ -578,6 +578,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
578
578
  success: true,
579
579
  content,
580
580
  tokens_used: this.estimateTokens(prompt + content),
581
+ input_tokens: Math.ceil(prompt.length / 4),
582
+ output_tokens: Math.ceil(content.length / 4),
581
583
  latency_ms: Date.now() - startTime,
582
584
  provider: providerId,
583
585
  mode: 'args',
@@ -689,6 +691,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
689
691
  success: true,
690
692
  content: jsonResult.content,
691
693
  tokens_used: jsonResult.tokens_used,
694
+ input_tokens: jsonResult.input_tokens,
695
+ output_tokens: jsonResult.output_tokens,
692
696
  latency_ms: Date.now() - startTime,
693
697
  provider: providerId,
694
698
  mode: 'args',
@@ -715,6 +719,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
715
719
  success: true,
716
720
  content,
717
721
  tokens_used: this.estimateTokens(prompt + content),
722
+ input_tokens: Math.ceil(prompt.length / 4),
723
+ output_tokens: Math.ceil(content.length / 4),
718
724
  latency_ms: Date.now() - startTime,
719
725
  provider: providerId,
720
726
  mode: 'args',
@@ -771,6 +777,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
771
777
  success: true,
772
778
  content,
773
779
  tokens_used: this.estimateTokens(prompt + content),
780
+ input_tokens: Math.ceil(prompt.length / 4),
781
+ output_tokens: Math.ceil(content.length / 4),
774
782
  latency_ms: Date.now() - startTime,
775
783
  provider: providerId,
776
784
  mode: 'args',
@@ -1038,17 +1046,13 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
1038
1046
  const content = json.result;
1039
1047
 
1040
1048
  // Extract primary model from modelUsage
1041
- // The primary model is the one with highest cost - that's the user's configured main model
1042
- // (Haiku is used internally for quick tasks, but the expensive model is what the user chose)
1043
1049
  let primaryModel = CLI_DEFAULT_MODELS['claude_code'] || 'cli_default';
1044
1050
  const modelUsage = json.modelUsage || {};
1045
1051
  const modelNames = Object.keys(modelUsage);
1046
1052
 
1047
1053
  if (modelNames.length === 1) {
1048
- // Only one model used - that's the primary
1049
1054
  primaryModel = modelNames[0];
1050
1055
  } else if (modelNames.length > 1) {
1051
- // Multiple models - the one with highest cost is the user's configured main model
1052
1056
  let highestCost = -1;
1053
1057
  for (const [modelName, usage] of Object.entries(modelUsage)) {
1054
1058
  const cost = usage.costUSD || 0;
@@ -1059,13 +1063,15 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
1059
1063
  }
1060
1064
  }
1061
1065
 
1062
- // Calculate total tokens (excluding cache tokens which are just infrastructure overhead)
1066
+ // Calculate separate input and output tokens
1067
+ let totalInputTokens = 0;
1068
+ let totalOutputTokens = 0;
1063
1069
  let totalTokens = 0;
1064
1070
  let cacheTokens = 0;
1065
1071
  for (const usage of Object.values(modelUsage)) {
1066
- // Count actual input/output tokens
1072
+ totalInputTokens += (usage.inputTokens || 0);
1073
+ totalOutputTokens += (usage.outputTokens || 0);
1067
1074
  totalTokens += (usage.inputTokens || 0) + (usage.outputTokens || 0);
1068
- // Track cache tokens separately (for cost calculations, but not displayed as "tokens used")
1069
1075
  cacheTokens += (usage.cacheReadInputTokens || 0) + (usage.cacheCreationInputTokens || 0);
1070
1076
  }
1071
1077
 
@@ -1073,14 +1079,15 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
1073
1079
  content,
1074
1080
  model_used: primaryModel,
1075
1081
  tokens_used: totalTokens || json.usage?.input_tokens + json.usage?.output_tokens || 0,
1076
- cache_tokens: cacheTokens, // Separate field for cache tokens
1082
+ input_tokens: totalInputTokens,
1083
+ output_tokens: totalOutputTokens,
1084
+ cache_tokens: cacheTokens,
1077
1085
  cost_usd: json.total_cost_usd || 0,
1078
1086
  model_usage: modelUsage,
1079
1087
  session_id: json.session_id,
1080
1088
  duration_ms: json.duration_ms
1081
1089
  };
1082
1090
  } catch (e) {
1083
- // Not valid JSON, return null to fall back to text parsing
1084
1091
  return null;
1085
1092
  }
1086
1093
  }
@@ -1374,6 +1381,119 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
1374
1381
  return this.providers.get(providerId);
1375
1382
  }
1376
1383
 
1384
+ // ============================================
1385
+ // Streaming CLI Methods (CLI-as-API real streaming)
1386
+ // ============================================
1387
+
1388
+ /**
1389
+ * Check if a provider supports streaming output
1390
+ */
1391
+ supportsStreaming(providerId) {
1392
+ return providerId === 'claude_code' || providerId === 'gemini_cli';
1393
+ }
1394
+
1395
+ /**
1396
+ * Build CLI args for streaming mode (NDJSON output)
1397
+ * @param {string} providerId - 'claude_code' or 'gemini_cli'
1398
+ * @param {string} prompt - The user prompt
1399
+ * @param {string|null} model - Optional model name
1400
+ * @returns {string[]} CLI arguments array
1401
+ */
1402
+ buildStreamingArgs(providerId, prompt, model = null) {
1403
+ if (providerId === 'claude_code') {
1404
+ const args = ['-p', '--output-format', 'stream-json', '--verbose', '--include-partial-messages'];
1405
+
1406
+ // Add model flag if specified
1407
+ if (model) {
1408
+ // Normalize model names to Claude CLI aliases
1409
+ const claudeModelMap = {
1410
+ 'claude-opus-4-5': 'opus',
1411
+ 'claude-opus-4.5': 'opus',
1412
+ 'claude-4.5-opus': 'opus',
1413
+ 'claude-opus-4-5-20250514': 'opus',
1414
+ 'claude-sonnet-4-5': 'sonnet',
1415
+ 'claude-sonnet-4.5': 'sonnet',
1416
+ 'claude-4.5-sonnet': 'sonnet',
1417
+ 'claude-sonnet-4-5-20250514': 'sonnet',
1418
+ 'claude-3-5-sonnet': 'sonnet',
1419
+ 'claude-3-5-haiku': 'haiku',
1420
+ 'claude-haiku-3-5': 'haiku',
1421
+ };
1422
+ const cliModel = claudeModelMap[model.toLowerCase()] || model;
1423
+ args.unshift('--model', cliModel);
1424
+ }
1425
+
1426
+ args.push(prompt);
1427
+ return args;
1428
+ }
1429
+
1430
+ if (providerId === 'gemini_cli') {
1431
+ const args = ['-o', 'stream-json'];
1432
+
1433
+ // Add model flag if specified
1434
+ if (model) {
1435
+ args.push('-m', model);
1436
+ }
1437
+
1438
+ // Add prompt prefix to prevent tool planning in non-interactive mode
1439
+ const geminiPrompt = `Answer directly without using any tools, file operations, or searches. Do not say "I will search" or "I will look up". Provide your analysis immediately.\n\n${prompt}`;
1440
+ args.push('-p', geminiPrompt);
1441
+ return args;
1442
+ }
1443
+
1444
+ throw new Error(`Provider ${providerId} does not support streaming`);
1445
+ }
1446
+
1447
+ /**
1448
+ * Spawn a CLI process in streaming mode.
1449
+ * Returns the raw child process so the caller can read stdout line-by-line.
1450
+ *
1451
+ * @param {string} providerId - 'claude_code' or 'gemini_cli'
1452
+ * @param {string} prompt - The user prompt
1453
+ * @param {string|null} model - Optional model name
1454
+ * @returns {import('child_process').ChildProcess} The spawned process
1455
+ */
1456
+ spawnStreamingCli(providerId, prompt, model = null) {
1457
+ const provider = this.providers.get(providerId);
1458
+ if (!provider) {
1459
+ throw new Error(`Unknown provider: ${providerId}`);
1460
+ }
1461
+
1462
+ const args = this.buildStreamingArgs(providerId, prompt, model);
1463
+
1464
+ console.log(`[Polydev CLI] Spawning streaming ${providerId}: ${provider.command} ${args.slice(0, 4).join(' ')}...`);
1465
+
1466
+ const child = spawn(provider.command, args, {
1467
+ stdio: ['pipe', 'pipe', 'pipe'],
1468
+ shell: process.platform === 'win32',
1469
+ env: {
1470
+ ...process.env,
1471
+ HOME: process.env.HOME || os.homedir(),
1472
+ CI: '1',
1473
+ NO_COLOR: '1',
1474
+ TERM: 'dumb',
1475
+ NONINTERACTIVE: '1',
1476
+ NO_BROWSER: '1',
1477
+ BROWSER: 'echo',
1478
+ DISPLAY: '',
1479
+ HEADLESS: '1',
1480
+ CODEX_DISABLE_UPDATE_CHECK: '1',
1481
+ CLAUDE_CODE_DISABLE_UPDATE_CHECK: '1',
1482
+ GEMINI_NO_BROWSER: '1',
1483
+ GOOGLE_NO_BROWSER: '1',
1484
+ npm_config_update_notifier: 'false',
1485
+ NO_UPDATE_NOTIFIER: '1'
1486
+ }
1487
+ });
1488
+
1489
+ // Close stdin immediately to prevent hanging (critical for Gemini CLI)
1490
+ if (child.stdin) {
1491
+ child.stdin.end();
1492
+ }
1493
+
1494
+ return child;
1495
+ }
1496
+
1377
1497
  // ============================================
1378
1498
  // Status Reporting Methods
1379
1499
  // ============================================
@@ -21,11 +21,60 @@ class TunnelClient {
21
21
  this.pollInterval = null;
22
22
  this._processing = new Set(); // track in-flight request IDs
23
23
  this._started = false;
24
+ this._consecutive401s = 0; // track auth failures for token reload
24
25
 
25
26
  // Configurable intervals
26
27
  this.HEARTBEAT_INTERVAL_MS = 30_000; // 30s
27
28
  this.POLL_INTERVAL_MS = 3_000; // 3s
28
29
  this.CLI_TIMEOUT_MS = 120_000; // 2 min per request
30
+
31
+ // Try to load freshest token from file on construction
32
+ // (env var may be stale if process was started long ago)
33
+ this._reloadTokenFromFile();
34
+ }
35
+
36
+ /**
37
+ * Reload token from ~/.polydev.env file.
38
+ * The env var POLYDEV_USER_TOKEN may be stale (set when the IDE started the process).
39
+ * The file is always updated by the latest login.
40
+ */
41
+ _reloadTokenFromFile() {
42
+ try {
43
+ const fs = require('fs');
44
+ const path = require('path');
45
+ const os = require('os');
46
+ const envFile = path.join(os.homedir(), '.polydev.env');
47
+
48
+ if (!fs.existsSync(envFile)) return false;
49
+
50
+ const content = fs.readFileSync(envFile, 'utf8');
51
+ const match = content.match(/POLYDEV_USER_TOKEN[=\s]["']?([^"'\n]+)["']?/);
52
+ if (match && match[1] && (match[1].startsWith('pd_') || match[1].startsWith('polydev_'))) {
53
+ if (match[1] !== this.authToken) {
54
+ console.error(`[Tunnel] Token reloaded from ${envFile} (was stale)`);
55
+ this.authToken = match[1];
56
+ this._consecutive401s = 0;
57
+ return true;
58
+ }
59
+ }
60
+ } catch {
61
+ // ignore file read errors
62
+ }
63
+ return false;
64
+ }
65
+
66
+ /**
67
+ * Handle a 401 response — try reloading token from file
68
+ */
69
+ _handle401() {
70
+ this._consecutive401s++;
71
+ // Try reload every 5 consecutive 401s (every ~15s at 3s poll interval)
72
+ if (this._consecutive401s % 5 === 1) {
73
+ const reloaded = this._reloadTokenFromFile();
74
+ if (reloaded) {
75
+ console.error('[Tunnel] Token refreshed after 401 — retrying');
76
+ }
77
+ }
29
78
  }
30
79
 
31
80
  /**
@@ -36,6 +85,7 @@ class TunnelClient {
36
85
  this._started = true;
37
86
 
38
87
  console.error('[Tunnel] Starting CLI-as-API tunnel client');
88
+ console.error(`[Tunnel] Auth token prefix: ${this.authToken ? this.authToken.substring(0, 8) + '...' : 'NONE'}`);
39
89
 
40
90
  // Send initial heartbeat immediately
41
91
  try {
@@ -102,9 +152,14 @@ class TunnelClient {
102
152
  });
103
153
 
104
154
  if (!res.ok) {
155
+ if (res.status === 401) {
156
+ this._handle401();
157
+ return;
158
+ }
105
159
  const text = await res.text().catch(() => '');
106
160
  throw new Error(`Heartbeat failed (${res.status}): ${text}`);
107
161
  }
162
+ this._consecutive401s = 0; // reset on success
108
163
  }
109
164
 
110
165
  /**
@@ -120,11 +175,14 @@ class TunnelClient {
120
175
  });
121
176
 
122
177
  if (!res.ok) {
123
- // 401 is expected if token expired don't spam logs
124
- if (res.status === 401) return;
178
+ if (res.status === 401) {
179
+ this._handle401();
180
+ return;
181
+ }
125
182
  const text = await res.text().catch(() => '');
126
183
  throw new Error(`Poll failed (${res.status}): ${text}`);
127
184
  }
185
+ this._consecutive401s = 0; // reset on success
128
186
 
129
187
  const data = await res.json();
130
188
  const requests = data.requests || [];
@@ -147,8 +205,13 @@ class TunnelClient {
147
205
  * Handle a single tunnel request by routing to CLI
148
206
  */
149
207
  async handleRequest(request) {
208
+ // Use streaming path for providers that support it
209
+ if (this.cliManager.supportsStreaming(request.provider)) {
210
+ return this.handleStreamingRequest(request);
211
+ }
212
+
150
213
  const startTime = Date.now();
151
- console.error(`[Tunnel] Processing request ${request.id} → ${request.provider}`);
214
+ console.error(`[Tunnel] Processing request ${request.id} → ${request.provider} (non-streaming)`);
152
215
 
153
216
  try {
154
217
  const result = await this.cliManager.sendCliPrompt(
@@ -168,6 +231,8 @@ class TunnelClient {
168
231
  content: result.content || '',
169
232
  model_used: result.model || result.detectedModel || request.provider,
170
233
  tokens_used: result.tokens_used || null,
234
+ input_tokens: result.input_tokens || null,
235
+ output_tokens: result.output_tokens || null,
171
236
  latency_ms: latencyMs,
172
237
  });
173
238
  } else {
@@ -189,6 +254,319 @@ class TunnelClient {
189
254
  }
190
255
  }
191
256
 
257
+ /**
258
+ * Handle a streaming-capable tunnel request (Claude Code or Gemini CLI).
259
+ * Spawns CLI with stream-json flags, reads NDJSON stdout line-by-line,
260
+ * extracts text deltas, batches them, and POSTs chunks to /api/tunnel/stream-chunk.
261
+ * On completion, POSTs final response to /api/tunnel/respond for backward compat.
262
+ */
263
+ async handleStreamingRequest(request) {
264
+ const startTime = Date.now();
265
+ const requestId = request.id;
266
+ const provider = request.provider;
267
+ console.error(`[Tunnel] Processing request ${requestId} → ${provider} (streaming)`);
268
+
269
+ let child;
270
+ let fullContent = '';
271
+ let chunkIndex = 0;
272
+ let pendingText = '';
273
+ let lastFlushTime = Date.now();
274
+ let modelUsed = null;
275
+ let inputTokens = null;
276
+ let outputTokens = null;
277
+ let costUsd = null;
278
+
279
+ const BATCH_INTERVAL_MS = 300;
280
+ const BATCH_MIN_CHARS = 10;
281
+
282
+ // Flush accumulated text as a stream chunk
283
+ const flushChunk = async (force = false) => {
284
+ if (!pendingText) return;
285
+ if (!force && pendingText.length < BATCH_MIN_CHARS && (Date.now() - lastFlushTime) < BATCH_INTERVAL_MS) {
286
+ return;
287
+ }
288
+
289
+ const text = pendingText;
290
+ pendingText = '';
291
+ lastFlushTime = Date.now();
292
+
293
+ try {
294
+ await this.sendStreamChunk({
295
+ request_id: requestId,
296
+ chunk_index: chunkIndex++,
297
+ content: text,
298
+ });
299
+ } catch (err) {
300
+ console.error(`[Tunnel] Failed to send stream chunk for ${requestId}:`, err.message);
301
+ }
302
+ };
303
+
304
+ // Set up periodic flushing
305
+ const flushInterval = setInterval(async () => {
306
+ if (pendingText.length > 0 && (Date.now() - lastFlushTime) >= BATCH_INTERVAL_MS) {
307
+ await flushChunk(true);
308
+ }
309
+ }, BATCH_INTERVAL_MS);
310
+
311
+ try {
312
+ child = this.cliManager.spawnStreamingCli(
313
+ provider,
314
+ request.prompt,
315
+ request.model_requested || null
316
+ );
317
+
318
+ // Set up CLI timeout
319
+ const timeoutHandle = setTimeout(() => {
320
+ if (child && !child.killed) {
321
+ console.error(`[Tunnel] Streaming request ${requestId} timed out after ${this.CLI_TIMEOUT_MS}ms`);
322
+ child.kill('SIGTERM');
323
+ setTimeout(() => {
324
+ if (!child.killed) child.kill('SIGKILL');
325
+ }, 2000);
326
+ }
327
+ }, this.CLI_TIMEOUT_MS);
328
+
329
+ // Read stdout line-by-line (NDJSON)
330
+ let lineBuf = '';
331
+
332
+ child.stdout.on('data', (data) => {
333
+ lineBuf += data.toString();
334
+
335
+ // Process complete lines
336
+ let newlineIdx;
337
+ while ((newlineIdx = lineBuf.indexOf('\n')) !== -1) {
338
+ const line = lineBuf.slice(0, newlineIdx).trim();
339
+ lineBuf = lineBuf.slice(newlineIdx + 1);
340
+
341
+ if (!line) continue;
342
+
343
+ try {
344
+ const event = JSON.parse(line);
345
+ const delta = this.extractTextDelta(provider, event);
346
+
347
+ if (delta) {
348
+ fullContent += delta;
349
+ pendingText += delta;
350
+ }
351
+
352
+ // Extract metadata from events
353
+ if (provider === 'claude_code') {
354
+ this._extractClaudeMetadata(event, (meta) => {
355
+ if (meta.model) modelUsed = meta.model;
356
+ if (meta.inputTokens != null) inputTokens = meta.inputTokens;
357
+ if (meta.outputTokens != null) outputTokens = meta.outputTokens;
358
+ if (meta.costUsd != null) costUsd = meta.costUsd;
359
+ });
360
+ } else if (provider === 'gemini_cli') {
361
+ this._extractGeminiMetadata(event, (meta) => {
362
+ if (meta.model) modelUsed = meta.model;
363
+ if (meta.inputTokens != null) inputTokens = meta.inputTokens;
364
+ if (meta.outputTokens != null) outputTokens = meta.outputTokens;
365
+ });
366
+ }
367
+ } catch {
368
+ // Skip non-JSON lines (stderr leaking into stdout, etc.)
369
+ }
370
+ }
371
+ });
372
+
373
+ // Capture stderr for debugging
374
+ let stderrBuf = '';
375
+ child.stderr.on('data', (data) => {
376
+ stderrBuf += data.toString();
377
+ });
378
+
379
+ // Wait for process to exit
380
+ await new Promise((resolve, reject) => {
381
+ child.on('close', (code) => {
382
+ clearTimeout(timeoutHandle);
383
+ resolve(code);
384
+ });
385
+ child.on('error', (err) => {
386
+ clearTimeout(timeoutHandle);
387
+ reject(err);
388
+ });
389
+ });
390
+
391
+ // Flush any remaining text
392
+ await flushChunk(true);
393
+ clearInterval(flushInterval);
394
+
395
+ // Send final is_final chunk marker
396
+ if (chunkIndex > 0) {
397
+ try {
398
+ await this.sendStreamChunk({
399
+ request_id: requestId,
400
+ chunk_index: chunkIndex++,
401
+ content: '',
402
+ is_final: true,
403
+ });
404
+ } catch (err) {
405
+ console.error(`[Tunnel] Failed to send final chunk for ${requestId}:`, err.message);
406
+ }
407
+ }
408
+
409
+ const latencyMs = Date.now() - startTime;
410
+
411
+ if (fullContent) {
412
+ console.error(`[Tunnel] Streaming request ${requestId} completed (${latencyMs}ms, ${chunkIndex} chunks, ${fullContent.length} chars)`);
413
+
414
+ // Estimate tokens if not extracted from metadata
415
+ const estInputTokens = inputTokens || Math.ceil(request.prompt.length / 4);
416
+ const estOutputTokens = outputTokens || Math.ceil(fullContent.length / 4);
417
+
418
+ await this.sendResponse({
419
+ request_id: requestId,
420
+ content: fullContent,
421
+ model_used: modelUsed || request.provider,
422
+ tokens_used: (estInputTokens + estOutputTokens) || null,
423
+ input_tokens: estInputTokens,
424
+ output_tokens: estOutputTokens,
425
+ latency_ms: latencyMs,
426
+ });
427
+ } else {
428
+ // No content extracted from streaming — fall back to error
429
+ const errorMsg = stderrBuf.trim().slice(0, 500) || 'Streaming CLI produced no output';
430
+ console.error(`[Tunnel] Streaming request ${requestId} failed: ${errorMsg}`);
431
+ await this.sendResponse({
432
+ request_id: requestId,
433
+ error: errorMsg,
434
+ latency_ms: latencyMs,
435
+ });
436
+ }
437
+
438
+ } catch (err) {
439
+ clearInterval(flushInterval);
440
+ const latencyMs = Date.now() - startTime;
441
+ console.error(`[Tunnel] Streaming request ${requestId} exception: ${err.message}`);
442
+
443
+ // If we got partial content before the error, still send it
444
+ if (fullContent) {
445
+ await flushChunk(true);
446
+ await this.sendResponse({
447
+ request_id: requestId,
448
+ content: fullContent,
449
+ model_used: modelUsed || request.provider,
450
+ latency_ms: latencyMs,
451
+ });
452
+ } else {
453
+ await this.sendResponse({
454
+ request_id: requestId,
455
+ error: err.message || 'Streaming execution failed',
456
+ latency_ms: latencyMs,
457
+ });
458
+ }
459
+ }
460
+ }
461
+
462
+ /**
463
+ * Extract text delta from a parsed NDJSON event based on provider type.
464
+ * Returns the incremental text string, or null if this event isn't a text delta.
465
+ */
466
+ extractTextDelta(provider, event) {
467
+ if (provider === 'claude_code') {
468
+ // Claude stream_event wrapping raw API events
469
+ // Filter: type === 'stream_event' && event.event.type === 'content_block_delta'
470
+ // && event.event.delta.type === 'text_delta'
471
+ if (event.type === 'stream_event' &&
472
+ event.event?.type === 'content_block_delta' &&
473
+ event.event?.delta?.type === 'text_delta') {
474
+ return event.event.delta.text || null;
475
+ }
476
+ return null;
477
+ }
478
+
479
+ if (provider === 'gemini_cli') {
480
+ // Gemini: type === 'message' && role === 'assistant' && delta === true
481
+ if (event.type === 'message' &&
482
+ event.role === 'assistant' &&
483
+ event.delta === true) {
484
+ return event.content || null;
485
+ }
486
+ return null;
487
+ }
488
+
489
+ return null;
490
+ }
491
+
492
+ /**
493
+ * Extract metadata (model, tokens, cost) from Claude Code stream events
494
+ */
495
+ _extractClaudeMetadata(event, callback) {
496
+ // From 'system' init event: model info
497
+ if (event.type === 'system' && event.subtype === 'init' && event.model) {
498
+ callback({ model: event.model });
499
+ }
500
+
501
+ // From 'result' event: usage stats
502
+ if (event.type === 'result') {
503
+ const meta = {};
504
+ if (event.usage) {
505
+ meta.inputTokens = event.usage.input_tokens || 0;
506
+ meta.outputTokens = event.usage.output_tokens || 0;
507
+ }
508
+ if (event.total_cost_usd != null) {
509
+ meta.costUsd = event.total_cost_usd;
510
+ }
511
+ // Extract model from modelUsage keys
512
+ if (event.modelUsage) {
513
+ const models = Object.keys(event.modelUsage);
514
+ if (models.length > 0) {
515
+ // Pick the model with highest cost as the primary model
516
+ let primary = models[0];
517
+ let highestCost = -1;
518
+ for (const [m, usage] of Object.entries(event.modelUsage)) {
519
+ const cost = usage.costUSD || 0;
520
+ if (cost > highestCost) {
521
+ highestCost = cost;
522
+ primary = m;
523
+ }
524
+ }
525
+ meta.model = primary;
526
+ }
527
+ }
528
+ callback(meta);
529
+ }
530
+ }
531
+
532
+ /**
533
+ * Extract metadata from Gemini CLI stream events
534
+ */
535
+ _extractGeminiMetadata(event, callback) {
536
+ // From 'init' event: model info
537
+ if (event.type === 'init' && event.model) {
538
+ callback({ model: event.model });
539
+ }
540
+
541
+ // From 'result' event: usage stats
542
+ if (event.type === 'result' && event.stats) {
543
+ callback({
544
+ inputTokens: event.stats.input_tokens || 0,
545
+ outputTokens: event.stats.output_tokens || 0,
546
+ });
547
+ }
548
+ }
549
+
550
+ /**
551
+ * Send a stream chunk to the server
552
+ */
553
+ async sendStreamChunk(chunkData) {
554
+ const url = `${this.serverBaseUrl}/api/tunnel/stream-chunk`;
555
+ const res = await fetch(url, {
556
+ method: 'POST',
557
+ headers: {
558
+ 'Authorization': `Bearer ${this.authToken}`,
559
+ 'Content-Type': 'application/json',
560
+ },
561
+ body: JSON.stringify(chunkData),
562
+ });
563
+
564
+ if (!res.ok) {
565
+ const text = await res.text().catch(() => '');
566
+ throw new Error(`Stream chunk POST failed (${res.status}): ${text}`);
567
+ }
568
+ }
569
+
192
570
  /**
193
571
  * Send response back to server
194
572
  */
@@ -2770,7 +2770,7 @@ To re-login: /polydev:login`
2770
2770
  const statusFile = path.join(polydevevDir, 'cli-status.json');
2771
2771
 
2772
2772
  // Ensure directory exists
2773
- if (!fs.existsSync(polydeveevDir)) {
2773
+ if (!fs.existsSync(polydevevDir)) {
2774
2774
  fs.mkdirSync(polydeveevDir, { recursive: true });
2775
2775
  }
2776
2776
 
@@ -2962,6 +2962,10 @@ To re-login: /polydev:login`
2962
2962
  return; // No auth, skip tunnel
2963
2963
  }
2964
2964
 
2965
+ // Reload token from file to get the freshest one
2966
+ // (env var may be stale if IDE started this process long ago)
2967
+ this.reloadTokenFromFiles();
2968
+
2965
2969
  try {
2966
2970
  this.tunnelClient = new TunnelClient(
2967
2971
  this.serverUrl, // https://www.polydev.ai/api/mcp
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "polydev-ai",
3
- "version": "1.9.39",
3
+ "version": "1.9.41",
4
4
  "engines": {
5
5
  "node": ">=20.x <=22.x"
6
6
  },