polydev-ai 1.9.40 → 1.9.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/cliManager.js CHANGED
@@ -578,6 +578,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
578
578
  success: true,
579
579
  content,
580
580
  tokens_used: this.estimateTokens(prompt + content),
581
+ input_tokens: Math.ceil(prompt.length / 4),
582
+ output_tokens: Math.ceil(content.length / 4),
581
583
  latency_ms: Date.now() - startTime,
582
584
  provider: providerId,
583
585
  mode: 'args',
@@ -651,8 +653,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
651
653
  args = ['--model', cliModel, ...args, prompt];
652
654
  } else if (providerId === 'gemini_cli') {
653
655
  // Gemini CLI: -m for model, -p for prompt (headless mode)
654
- // Add prompt prefix to prevent tool planning in non-interactive mode
655
- const geminiPrompt = `Answer directly without using any tools, file operations, or searches. Do not say "I will search" or "I will look up". Provide your analysis immediately.\n\n${prompt}`;
656
+ // Add prompt prefix to prevent tool planning in non-interactive mode and enforce English
657
+ const geminiPrompt = `IMPORTANT: Always respond in English. Answer directly without using any tools, file operations, or searches. Do not say "I will search" or "I will look up". Provide your analysis immediately.\n\n${prompt}`;
656
658
  args = ['-m', cliModel, '-p', geminiPrompt];
657
659
  } else {
658
660
  // Default: just append prompt
@@ -662,8 +664,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
662
664
  // No model specified
663
665
  if (providerId === 'gemini_cli') {
664
666
  // Gemini CLI: -p for headless mode
665
- // Add prompt prefix to prevent tool planning in non-interactive mode
666
- const geminiPrompt = `Answer directly without using any tools, file operations, or searches. Do not say "I will search" or "I will look up". Provide your analysis immediately.\n\n${prompt}`;
667
+ // Add prompt prefix to prevent tool planning in non-interactive mode and enforce English
668
+ const geminiPrompt = `IMPORTANT: Always respond in English. Answer directly without using any tools, file operations, or searches. Do not say "I will search" or "I will look up". Provide your analysis immediately.\n\n${prompt}`;
667
669
  args = ['-p', geminiPrompt];
668
670
  } else {
669
671
  args = [...args, prompt];
@@ -689,6 +691,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
689
691
  success: true,
690
692
  content: jsonResult.content,
691
693
  tokens_used: jsonResult.tokens_used,
694
+ input_tokens: jsonResult.input_tokens,
695
+ output_tokens: jsonResult.output_tokens,
692
696
  latency_ms: Date.now() - startTime,
693
697
  provider: providerId,
694
698
  mode: 'args',
@@ -715,6 +719,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
715
719
  success: true,
716
720
  content,
717
721
  tokens_used: this.estimateTokens(prompt + content),
722
+ input_tokens: Math.ceil(prompt.length / 4),
723
+ output_tokens: Math.ceil(content.length / 4),
718
724
  latency_ms: Date.now() - startTime,
719
725
  provider: providerId,
720
726
  mode: 'args',
@@ -771,6 +777,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
771
777
  success: true,
772
778
  content,
773
779
  tokens_used: this.estimateTokens(prompt + content),
780
+ input_tokens: Math.ceil(prompt.length / 4),
781
+ output_tokens: Math.ceil(content.length / 4),
774
782
  latency_ms: Date.now() - startTime,
775
783
  provider: providerId,
776
784
  mode: 'args',
@@ -1038,17 +1046,13 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
1038
1046
  const content = json.result;
1039
1047
 
1040
1048
  // Extract primary model from modelUsage
1041
- // The primary model is the one with highest cost - that's the user's configured main model
1042
- // (Haiku is used internally for quick tasks, but the expensive model is what the user chose)
1043
1049
  let primaryModel = CLI_DEFAULT_MODELS['claude_code'] || 'cli_default';
1044
1050
  const modelUsage = json.modelUsage || {};
1045
1051
  const modelNames = Object.keys(modelUsage);
1046
1052
 
1047
1053
  if (modelNames.length === 1) {
1048
- // Only one model used - that's the primary
1049
1054
  primaryModel = modelNames[0];
1050
1055
  } else if (modelNames.length > 1) {
1051
- // Multiple models - the one with highest cost is the user's configured main model
1052
1056
  let highestCost = -1;
1053
1057
  for (const [modelName, usage] of Object.entries(modelUsage)) {
1054
1058
  const cost = usage.costUSD || 0;
@@ -1059,13 +1063,15 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
1059
1063
  }
1060
1064
  }
1061
1065
 
1062
- // Calculate total tokens (excluding cache tokens which are just infrastructure overhead)
1066
+ // Calculate separate input and output tokens
1067
+ let totalInputTokens = 0;
1068
+ let totalOutputTokens = 0;
1063
1069
  let totalTokens = 0;
1064
1070
  let cacheTokens = 0;
1065
1071
  for (const usage of Object.values(modelUsage)) {
1066
- // Count actual input/output tokens
1072
+ totalInputTokens += (usage.inputTokens || 0);
1073
+ totalOutputTokens += (usage.outputTokens || 0);
1067
1074
  totalTokens += (usage.inputTokens || 0) + (usage.outputTokens || 0);
1068
- // Track cache tokens separately (for cost calculations, but not displayed as "tokens used")
1069
1075
  cacheTokens += (usage.cacheReadInputTokens || 0) + (usage.cacheCreationInputTokens || 0);
1070
1076
  }
1071
1077
 
@@ -1073,14 +1079,15 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
1073
1079
  content,
1074
1080
  model_used: primaryModel,
1075
1081
  tokens_used: totalTokens || json.usage?.input_tokens + json.usage?.output_tokens || 0,
1076
- cache_tokens: cacheTokens, // Separate field for cache tokens
1082
+ input_tokens: totalInputTokens,
1083
+ output_tokens: totalOutputTokens,
1084
+ cache_tokens: cacheTokens,
1077
1085
  cost_usd: json.total_cost_usd || 0,
1078
1086
  model_usage: modelUsage,
1079
1087
  session_id: json.session_id,
1080
1088
  duration_ms: json.duration_ms
1081
1089
  };
1082
1090
  } catch (e) {
1083
- // Not valid JSON, return null to fall back to text parsing
1084
1091
  return null;
1085
1092
  }
1086
1093
  }
@@ -1374,6 +1381,119 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
1374
1381
  return this.providers.get(providerId);
1375
1382
  }
1376
1383
 
1384
+ // ============================================
1385
+ // Streaming CLI Methods (CLI-as-API real streaming)
1386
+ // ============================================
1387
+
1388
+ /**
1389
+ * Check if a provider supports streaming output
1390
+ */
1391
+ supportsStreaming(providerId) {
1392
+ return providerId === 'claude_code' || providerId === 'gemini_cli';
1393
+ }
1394
+
1395
+ /**
1396
+ * Build CLI args for streaming mode (NDJSON output)
1397
+ * @param {string} providerId - 'claude_code' or 'gemini_cli'
1398
+ * @param {string} prompt - The user prompt
1399
+ * @param {string|null} model - Optional model name
1400
+ * @returns {string[]} CLI arguments array
1401
+ */
1402
+ buildStreamingArgs(providerId, prompt, model = null) {
1403
+ if (providerId === 'claude_code') {
1404
+ const args = ['-p', '--output-format', 'stream-json', '--verbose', '--include-partial-messages'];
1405
+
1406
+ // Add model flag if specified
1407
+ if (model) {
1408
+ // Normalize model names to Claude CLI aliases
1409
+ const claudeModelMap = {
1410
+ 'claude-opus-4-5': 'opus',
1411
+ 'claude-opus-4.5': 'opus',
1412
+ 'claude-4.5-opus': 'opus',
1413
+ 'claude-opus-4-5-20250514': 'opus',
1414
+ 'claude-sonnet-4-5': 'sonnet',
1415
+ 'claude-sonnet-4.5': 'sonnet',
1416
+ 'claude-4.5-sonnet': 'sonnet',
1417
+ 'claude-sonnet-4-5-20250514': 'sonnet',
1418
+ 'claude-3-5-sonnet': 'sonnet',
1419
+ 'claude-3-5-haiku': 'haiku',
1420
+ 'claude-haiku-3-5': 'haiku',
1421
+ };
1422
+ const cliModel = claudeModelMap[model.toLowerCase()] || model;
1423
+ args.unshift('--model', cliModel);
1424
+ }
1425
+
1426
+ args.push(prompt);
1427
+ return args;
1428
+ }
1429
+
1430
+ if (providerId === 'gemini_cli') {
1431
+ const args = ['-o', 'stream-json'];
1432
+
1433
+ // Add model flag if specified
1434
+ if (model) {
1435
+ args.push('-m', model);
1436
+ }
1437
+
1438
+ // Add prompt prefix to prevent tool planning in non-interactive mode and enforce English
1439
+ const geminiPrompt = `IMPORTANT: Always respond in English. Answer directly without using any tools, file operations, or searches. Do not say "I will search" or "I will look up". Provide your analysis immediately.\n\n${prompt}`;
1440
+ args.push('-p', geminiPrompt);
1441
+ return args;
1442
+ }
1443
+
1444
+ throw new Error(`Provider ${providerId} does not support streaming`);
1445
+ }
1446
+
1447
+ /**
1448
+ * Spawn a CLI process in streaming mode.
1449
+ * Returns the raw child process so the caller can read stdout line-by-line.
1450
+ *
1451
+ * @param {string} providerId - 'claude_code' or 'gemini_cli'
1452
+ * @param {string} prompt - The user prompt
1453
+ * @param {string|null} model - Optional model name
1454
+ * @returns {import('child_process').ChildProcess} The spawned process
1455
+ */
1456
+ spawnStreamingCli(providerId, prompt, model = null) {
1457
+ const provider = this.providers.get(providerId);
1458
+ if (!provider) {
1459
+ throw new Error(`Unknown provider: ${providerId}`);
1460
+ }
1461
+
1462
+ const args = this.buildStreamingArgs(providerId, prompt, model);
1463
+
1464
+ console.log(`[Polydev CLI] Spawning streaming ${providerId}: ${provider.command} ${args.slice(0, 4).join(' ')}...`);
1465
+
1466
+ const child = spawn(provider.command, args, {
1467
+ stdio: ['pipe', 'pipe', 'pipe'],
1468
+ shell: process.platform === 'win32',
1469
+ env: {
1470
+ ...process.env,
1471
+ HOME: process.env.HOME || os.homedir(),
1472
+ CI: '1',
1473
+ NO_COLOR: '1',
1474
+ TERM: 'dumb',
1475
+ NONINTERACTIVE: '1',
1476
+ NO_BROWSER: '1',
1477
+ BROWSER: 'echo',
1478
+ DISPLAY: '',
1479
+ HEADLESS: '1',
1480
+ CODEX_DISABLE_UPDATE_CHECK: '1',
1481
+ CLAUDE_CODE_DISABLE_UPDATE_CHECK: '1',
1482
+ GEMINI_NO_BROWSER: '1',
1483
+ GOOGLE_NO_BROWSER: '1',
1484
+ npm_config_update_notifier: 'false',
1485
+ NO_UPDATE_NOTIFIER: '1'
1486
+ }
1487
+ });
1488
+
1489
+ // Close stdin immediately to prevent hanging (critical for Gemini CLI)
1490
+ if (child.stdin) {
1491
+ child.stdin.end();
1492
+ }
1493
+
1494
+ return child;
1495
+ }
1496
+
1377
1497
  // ============================================
1378
1498
  // Status Reporting Methods
1379
1499
  // ============================================
@@ -205,8 +205,13 @@ class TunnelClient {
205
205
  * Handle a single tunnel request by routing to CLI
206
206
  */
207
207
  async handleRequest(request) {
208
+ // Use streaming path for providers that support it
209
+ if (this.cliManager.supportsStreaming(request.provider)) {
210
+ return this.handleStreamingRequest(request);
211
+ }
212
+
208
213
  const startTime = Date.now();
209
- console.error(`[Tunnel] Processing request ${request.id} → ${request.provider}`);
214
+ console.error(`[Tunnel] Processing request ${request.id} → ${request.provider} (non-streaming)`);
210
215
 
211
216
  try {
212
217
  const result = await this.cliManager.sendCliPrompt(
@@ -226,6 +231,8 @@ class TunnelClient {
226
231
  content: result.content || '',
227
232
  model_used: result.model || result.detectedModel || request.provider,
228
233
  tokens_used: result.tokens_used || null,
234
+ input_tokens: result.input_tokens || null,
235
+ output_tokens: result.output_tokens || null,
229
236
  latency_ms: latencyMs,
230
237
  });
231
238
  } else {
@@ -247,6 +254,319 @@ class TunnelClient {
247
254
  }
248
255
  }
249
256
 
257
+ /**
258
+ * Handle a streaming-capable tunnel request (Claude Code or Gemini CLI).
259
+ * Spawns CLI with stream-json flags, reads NDJSON stdout line-by-line,
260
+ * extracts text deltas, batches them, and POSTs chunks to /api/tunnel/stream-chunk.
261
+ * On completion, POSTs final response to /api/tunnel/respond for backward compat.
262
+ */
263
+ async handleStreamingRequest(request) {
264
+ const startTime = Date.now();
265
+ const requestId = request.id;
266
+ const provider = request.provider;
267
+ console.error(`[Tunnel] Processing request ${requestId} → ${provider} (streaming)`);
268
+
269
+ let child;
270
+ let fullContent = '';
271
+ let chunkIndex = 0;
272
+ let pendingText = '';
273
+ let lastFlushTime = Date.now();
274
+ let modelUsed = null;
275
+ let inputTokens = null;
276
+ let outputTokens = null;
277
+ let costUsd = null;
278
+
279
+ const BATCH_INTERVAL_MS = 150;
280
+ const BATCH_MIN_CHARS = 1;
281
+
282
+ // Flush accumulated text as a stream chunk
283
+ const flushChunk = async (force = false) => {
284
+ if (!pendingText) return;
285
+ if (!force && pendingText.length < BATCH_MIN_CHARS && (Date.now() - lastFlushTime) < BATCH_INTERVAL_MS) {
286
+ return;
287
+ }
288
+
289
+ const text = pendingText;
290
+ pendingText = '';
291
+ lastFlushTime = Date.now();
292
+
293
+ try {
294
+ await this.sendStreamChunk({
295
+ request_id: requestId,
296
+ chunk_index: chunkIndex++,
297
+ content: text,
298
+ });
299
+ } catch (err) {
300
+ console.error(`[Tunnel] Failed to send stream chunk for ${requestId}:`, err.message);
301
+ }
302
+ };
303
+
304
+ // Set up periodic flushing
305
+ const flushInterval = setInterval(async () => {
306
+ if (pendingText.length > 0 && (Date.now() - lastFlushTime) >= BATCH_INTERVAL_MS) {
307
+ await flushChunk(true);
308
+ }
309
+ }, BATCH_INTERVAL_MS);
310
+
311
+ try {
312
+ child = this.cliManager.spawnStreamingCli(
313
+ provider,
314
+ request.prompt,
315
+ request.model_requested || null
316
+ );
317
+
318
+ // Set up CLI timeout
319
+ const timeoutHandle = setTimeout(() => {
320
+ if (child && !child.killed) {
321
+ console.error(`[Tunnel] Streaming request ${requestId} timed out after ${this.CLI_TIMEOUT_MS}ms`);
322
+ child.kill('SIGTERM');
323
+ setTimeout(() => {
324
+ if (!child.killed) child.kill('SIGKILL');
325
+ }, 2000);
326
+ }
327
+ }, this.CLI_TIMEOUT_MS);
328
+
329
+ // Read stdout line-by-line (NDJSON)
330
+ let lineBuf = '';
331
+
332
+ child.stdout.on('data', (data) => {
333
+ lineBuf += data.toString();
334
+
335
+ // Process complete lines
336
+ let newlineIdx;
337
+ while ((newlineIdx = lineBuf.indexOf('\n')) !== -1) {
338
+ const line = lineBuf.slice(0, newlineIdx).trim();
339
+ lineBuf = lineBuf.slice(newlineIdx + 1);
340
+
341
+ if (!line) continue;
342
+
343
+ try {
344
+ const event = JSON.parse(line);
345
+ const delta = this.extractTextDelta(provider, event);
346
+
347
+ if (delta) {
348
+ fullContent += delta;
349
+ pendingText += delta;
350
+ }
351
+
352
+ // Extract metadata from events
353
+ if (provider === 'claude_code') {
354
+ this._extractClaudeMetadata(event, (meta) => {
355
+ if (meta.model) modelUsed = meta.model;
356
+ if (meta.inputTokens != null) inputTokens = meta.inputTokens;
357
+ if (meta.outputTokens != null) outputTokens = meta.outputTokens;
358
+ if (meta.costUsd != null) costUsd = meta.costUsd;
359
+ });
360
+ } else if (provider === 'gemini_cli') {
361
+ this._extractGeminiMetadata(event, (meta) => {
362
+ if (meta.model) modelUsed = meta.model;
363
+ if (meta.inputTokens != null) inputTokens = meta.inputTokens;
364
+ if (meta.outputTokens != null) outputTokens = meta.outputTokens;
365
+ });
366
+ }
367
+ } catch {
368
+ // Skip non-JSON lines (stderr leaking into stdout, etc.)
369
+ }
370
+ }
371
+ });
372
+
373
+ // Capture stderr for debugging
374
+ let stderrBuf = '';
375
+ child.stderr.on('data', (data) => {
376
+ stderrBuf += data.toString();
377
+ });
378
+
379
+ // Wait for process to exit
380
+ await new Promise((resolve, reject) => {
381
+ child.on('close', (code) => {
382
+ clearTimeout(timeoutHandle);
383
+ resolve(code);
384
+ });
385
+ child.on('error', (err) => {
386
+ clearTimeout(timeoutHandle);
387
+ reject(err);
388
+ });
389
+ });
390
+
391
+ // Flush any remaining text
392
+ await flushChunk(true);
393
+ clearInterval(flushInterval);
394
+
395
+ // Send final is_final chunk marker
396
+ if (chunkIndex > 0) {
397
+ try {
398
+ await this.sendStreamChunk({
399
+ request_id: requestId,
400
+ chunk_index: chunkIndex++,
401
+ content: '',
402
+ is_final: true,
403
+ });
404
+ } catch (err) {
405
+ console.error(`[Tunnel] Failed to send final chunk for ${requestId}:`, err.message);
406
+ }
407
+ }
408
+
409
+ const latencyMs = Date.now() - startTime;
410
+
411
+ if (fullContent) {
412
+ console.error(`[Tunnel] Streaming request ${requestId} completed (${latencyMs}ms, ${chunkIndex} chunks, ${fullContent.length} chars)`);
413
+
414
+ // Estimate tokens if not extracted from metadata
415
+ const estInputTokens = inputTokens || Math.ceil(request.prompt.length / 4);
416
+ const estOutputTokens = outputTokens || Math.ceil(fullContent.length / 4);
417
+
418
+ await this.sendResponse({
419
+ request_id: requestId,
420
+ content: fullContent,
421
+ model_used: modelUsed || request.provider,
422
+ tokens_used: (estInputTokens + estOutputTokens) || null,
423
+ input_tokens: estInputTokens,
424
+ output_tokens: estOutputTokens,
425
+ latency_ms: latencyMs,
426
+ });
427
+ } else {
428
+ // No content extracted from streaming — fall back to error
429
+ const errorMsg = stderrBuf.trim().slice(0, 500) || 'Streaming CLI produced no output';
430
+ console.error(`[Tunnel] Streaming request ${requestId} failed: ${errorMsg}`);
431
+ await this.sendResponse({
432
+ request_id: requestId,
433
+ error: errorMsg,
434
+ latency_ms: latencyMs,
435
+ });
436
+ }
437
+
438
+ } catch (err) {
439
+ clearInterval(flushInterval);
440
+ const latencyMs = Date.now() - startTime;
441
+ console.error(`[Tunnel] Streaming request ${requestId} exception: ${err.message}`);
442
+
443
+ // If we got partial content before the error, still send it
444
+ if (fullContent) {
445
+ await flushChunk(true);
446
+ await this.sendResponse({
447
+ request_id: requestId,
448
+ content: fullContent,
449
+ model_used: modelUsed || request.provider,
450
+ latency_ms: latencyMs,
451
+ });
452
+ } else {
453
+ await this.sendResponse({
454
+ request_id: requestId,
455
+ error: err.message || 'Streaming execution failed',
456
+ latency_ms: latencyMs,
457
+ });
458
+ }
459
+ }
460
+ }
461
+
462
+ /**
463
+ * Extract text delta from a parsed NDJSON event based on provider type.
464
+ * Returns the incremental text string, or null if this event isn't a text delta.
465
+ */
466
+ extractTextDelta(provider, event) {
467
+ if (provider === 'claude_code') {
468
+ // Claude stream_event wrapping raw API events
469
+ // Filter: type === 'stream_event' && event.event.type === 'content_block_delta'
470
+ // && event.event.delta.type === 'text_delta'
471
+ if (event.type === 'stream_event' &&
472
+ event.event?.type === 'content_block_delta' &&
473
+ event.event?.delta?.type === 'text_delta') {
474
+ return event.event.delta.text || null;
475
+ }
476
+ return null;
477
+ }
478
+
479
+ if (provider === 'gemini_cli') {
480
+ // Gemini: type === 'message' && role === 'assistant' && delta === true
481
+ if (event.type === 'message' &&
482
+ event.role === 'assistant' &&
483
+ event.delta === true) {
484
+ return event.content || null;
485
+ }
486
+ return null;
487
+ }
488
+
489
+ return null;
490
+ }
491
+
492
+ /**
493
+ * Extract metadata (model, tokens, cost) from Claude Code stream events
494
+ */
495
+ _extractClaudeMetadata(event, callback) {
496
+ // From 'system' init event: model info
497
+ if (event.type === 'system' && event.subtype === 'init' && event.model) {
498
+ callback({ model: event.model });
499
+ }
500
+
501
+ // From 'result' event: usage stats
502
+ if (event.type === 'result') {
503
+ const meta = {};
504
+ if (event.usage) {
505
+ meta.inputTokens = event.usage.input_tokens || 0;
506
+ meta.outputTokens = event.usage.output_tokens || 0;
507
+ }
508
+ if (event.total_cost_usd != null) {
509
+ meta.costUsd = event.total_cost_usd;
510
+ }
511
+ // Extract model from modelUsage keys
512
+ if (event.modelUsage) {
513
+ const models = Object.keys(event.modelUsage);
514
+ if (models.length > 0) {
515
+ // Pick the model with highest cost as the primary model
516
+ let primary = models[0];
517
+ let highestCost = -1;
518
+ for (const [m, usage] of Object.entries(event.modelUsage)) {
519
+ const cost = usage.costUSD || 0;
520
+ if (cost > highestCost) {
521
+ highestCost = cost;
522
+ primary = m;
523
+ }
524
+ }
525
+ meta.model = primary;
526
+ }
527
+ }
528
+ callback(meta);
529
+ }
530
+ }
531
+
532
+ /**
533
+ * Extract metadata from Gemini CLI stream events
534
+ */
535
+ _extractGeminiMetadata(event, callback) {
536
+ // From 'init' event: model info
537
+ if (event.type === 'init' && event.model) {
538
+ callback({ model: event.model });
539
+ }
540
+
541
+ // From 'result' event: usage stats
542
+ if (event.type === 'result' && event.stats) {
543
+ callback({
544
+ inputTokens: event.stats.input_tokens || 0,
545
+ outputTokens: event.stats.output_tokens || 0,
546
+ });
547
+ }
548
+ }
549
+
550
+ /**
551
+ * Send a stream chunk to the server
552
+ */
553
+ async sendStreamChunk(chunkData) {
554
+ const url = `${this.serverBaseUrl}/api/tunnel/stream-chunk`;
555
+ const res = await fetch(url, {
556
+ method: 'POST',
557
+ headers: {
558
+ 'Authorization': `Bearer ${this.authToken}`,
559
+ 'Content-Type': 'application/json',
560
+ },
561
+ body: JSON.stringify(chunkData),
562
+ });
563
+
564
+ if (!res.ok) {
565
+ const text = await res.text().catch(() => '');
566
+ throw new Error(`Stream chunk POST failed (${res.status}): ${text}`);
567
+ }
568
+ }
569
+
250
570
  /**
251
571
  * Send response back to server
252
572
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "polydev-ai",
3
- "version": "1.9.40",
3
+ "version": "1.9.42",
4
4
  "engines": {
5
5
  "node": ">=20.x <=22.x"
6
6
  },