npm - polydev-ai - Versions diffs - 1.9.40 → 1.9.42 - Mend

polydev-ai 1.9.40 → 1.9.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/lib/cliManager.js CHANGED Viewed

@@ -578,6 +578,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
               success: true,
               content,
               tokens_used: this.estimateTokens(prompt + content),
+              input_tokens: Math.ceil(prompt.length / 4),
+              output_tokens: Math.ceil(content.length / 4),
               latency_ms: Date.now() - startTime,
               provider: providerId,
               mode: 'args',
@@ -651,8 +653,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
             args = ['--model', cliModel, ...args, prompt];
           } else if (providerId === 'gemini_cli') {
             // Gemini CLI: -m for model, -p for prompt (headless mode)
-            // Add prompt prefix to prevent tool planning in non-interactive mode
-            const geminiPrompt = `Answer directly without using any tools, file operations, or searches. Do not say "I will search" or "I will look up". Provide your analysis immediately.\n\n${prompt}`;
+            // Add prompt prefix to prevent tool planning in non-interactive mode and enforce English
+            const geminiPrompt = `IMPORTANT: Always respond in English. Answer directly without using any tools, file operations, or searches. Do not say "I will search" or "I will look up". Provide your analysis immediately.\n\n${prompt}`;
             args = ['-m', cliModel, '-p', geminiPrompt];
           } else {
             // Default: just append prompt
@@ -662,8 +664,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
           // No model specified
           if (providerId === 'gemini_cli') {
             // Gemini CLI: -p for headless mode
-            // Add prompt prefix to prevent tool planning in non-interactive mode
-            const geminiPrompt = `Answer directly without using any tools, file operations, or searches. Do not say "I will search" or "I will look up". Provide your analysis immediately.\n\n${prompt}`;
+            // Add prompt prefix to prevent tool planning in non-interactive mode and enforce English
+            const geminiPrompt = `IMPORTANT: Always respond in English. Answer directly without using any tools, file operations, or searches. Do not say "I will search" or "I will look up". Provide your analysis immediately.\n\n${prompt}`;
             args = ['-p', geminiPrompt];
           } else {
             args = [...args, prompt];
@@ -689,6 +691,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
                   success: true,
                   content: jsonResult.content,
                   tokens_used: jsonResult.tokens_used,
+                  input_tokens: jsonResult.input_tokens,
+                  output_tokens: jsonResult.output_tokens,
                   latency_ms: Date.now() - startTime,
                   provider: providerId,
                   mode: 'args',
@@ -715,6 +719,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
               success: true,
               content,
               tokens_used: this.estimateTokens(prompt + content),
+              input_tokens: Math.ceil(prompt.length / 4),
+              output_tokens: Math.ceil(content.length / 4),
               latency_ms: Date.now() - startTime,
               provider: providerId,
               mode: 'args',
@@ -771,6 +777,8 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
                   success: true,
                   content,
                   tokens_used: this.estimateTokens(prompt + content),
+                  input_tokens: Math.ceil(prompt.length / 4),
+                  output_tokens: Math.ceil(content.length / 4),
                   latency_ms: Date.now() - startTime,
                   provider: providerId,
                   mode: 'args',
@@ -1038,17 +1046,13 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
       const content = json.result;
       // Extract primary model from modelUsage
-      // The primary model is the one with highest cost - that's the user's configured main model
-      // (Haiku is used internally for quick tasks, but the expensive model is what the user chose)
       let primaryModel = CLI_DEFAULT_MODELS['claude_code'] || 'cli_default';
       const modelUsage = json.modelUsage || {};
       const modelNames = Object.keys(modelUsage);
       if (modelNames.length === 1) {
-        // Only one model used - that's the primary
         primaryModel = modelNames[0];
       } else if (modelNames.length > 1) {
-        // Multiple models - the one with highest cost is the user's configured main model
         let highestCost = -1;
         for (const [modelName, usage] of Object.entries(modelUsage)) {
           const cost = usage.costUSD || 0;
@@ -1059,13 +1063,15 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
         }
       }
-      // Calculate total tokens (excluding cache tokens which are just infrastructure overhead)
+      // Calculate separate input and output tokens
+      let totalInputTokens = 0;
+      let totalOutputTokens = 0;
       let totalTokens = 0;
       let cacheTokens = 0;
       for (const usage of Object.values(modelUsage)) {
-        // Count actual input/output tokens
+        totalInputTokens += (usage.inputTokens || 0);
+        totalOutputTokens += (usage.outputTokens || 0);
         totalTokens += (usage.inputTokens || 0) + (usage.outputTokens || 0);
-        // Track cache tokens separately (for cost calculations, but not displayed as "tokens used")
         cacheTokens += (usage.cacheReadInputTokens || 0) + (usage.cacheCreationInputTokens || 0);
       }
@@ -1073,14 +1079,15 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
         content,
         model_used: primaryModel,
         tokens_used: totalTokens || json.usage?.input_tokens + json.usage?.output_tokens || 0,
-        cache_tokens: cacheTokens, // Separate field for cache tokens
+        input_tokens: totalInputTokens,
+        output_tokens: totalOutputTokens,
+        cache_tokens: cacheTokens,
         cost_usd: json.total_cost_usd || 0,
         model_usage: modelUsage,
         session_id: json.session_id,
         duration_ms: json.duration_ms
       };
     } catch (e) {
-      // Not valid JSON, return null to fall back to text parsing
       return null;
     }
   }
@@ -1374,6 +1381,119 @@ This is a known issue with @google/gemini-cli@0.3.4 and older Node.js versions.`
     return this.providers.get(providerId);
   }
+  // ============================================
+  // Streaming CLI Methods (CLI-as-API real streaming)
+  // ============================================
+  /**
+   * Check if a provider supports streaming output
+   */
+  supportsStreaming(providerId) {
+    return providerId === 'claude_code' || providerId === 'gemini_cli';
+  }
+  /**
+   * Build CLI args for streaming mode (NDJSON output)
+   * @param {string} providerId - 'claude_code' or 'gemini_cli'
+   * @param {string} prompt - The user prompt
+   * @param {string|null} model - Optional model name
+   * @returns {string[]} CLI arguments array
+   */
+  buildStreamingArgs(providerId, prompt, model = null) {
+    if (providerId === 'claude_code') {
+      const args = ['-p', '--output-format', 'stream-json', '--verbose', '--include-partial-messages'];
+      // Add model flag if specified
+      if (model) {
+        // Normalize model names to Claude CLI aliases
+        const claudeModelMap = {
+          'claude-opus-4-5': 'opus',
+          'claude-opus-4.5': 'opus',
+          'claude-4.5-opus': 'opus',
+          'claude-opus-4-5-20250514': 'opus',
+          'claude-sonnet-4-5': 'sonnet',
+          'claude-sonnet-4.5': 'sonnet',
+          'claude-4.5-sonnet': 'sonnet',
+          'claude-sonnet-4-5-20250514': 'sonnet',
+          'claude-3-5-sonnet': 'sonnet',
+          'claude-3-5-haiku': 'haiku',
+          'claude-haiku-3-5': 'haiku',
+        };
+        const cliModel = claudeModelMap[model.toLowerCase()] || model;
+        args.unshift('--model', cliModel);
+      }
+      args.push(prompt);
+      return args;
+    }
+    if (providerId === 'gemini_cli') {
+      const args = ['-o', 'stream-json'];
+      // Add model flag if specified
+      if (model) {
+        args.push('-m', model);
+      }
+      // Add prompt prefix to prevent tool planning in non-interactive mode and enforce English
+      const geminiPrompt = `IMPORTANT: Always respond in English. Answer directly without using any tools, file operations, or searches. Do not say "I will search" or "I will look up". Provide your analysis immediately.\n\n${prompt}`;
+      args.push('-p', geminiPrompt);
+      return args;
+    }
+    throw new Error(`Provider ${providerId} does not support streaming`);
+  }
+  /**
+   * Spawn a CLI process in streaming mode.
+   * Returns the raw child process so the caller can read stdout line-by-line.
+   *
+   * @param {string} providerId - 'claude_code' or 'gemini_cli'
+   * @param {string} prompt - The user prompt
+   * @param {string|null} model - Optional model name
+   * @returns {import('child_process').ChildProcess} The spawned process
+   */
+  spawnStreamingCli(providerId, prompt, model = null) {
+    const provider = this.providers.get(providerId);
+    if (!provider) {
+      throw new Error(`Unknown provider: ${providerId}`);
+    }
+    const args = this.buildStreamingArgs(providerId, prompt, model);
+    console.log(`[Polydev CLI] Spawning streaming ${providerId}: ${provider.command} ${args.slice(0, 4).join(' ')}...`);
+    const child = spawn(provider.command, args, {
+      stdio: ['pipe', 'pipe', 'pipe'],
+      shell: process.platform === 'win32',
+      env: {
+        ...process.env,
+        HOME: process.env.HOME || os.homedir(),
+        CI: '1',
+        NO_COLOR: '1',
+        TERM: 'dumb',
+        NONINTERACTIVE: '1',
+        NO_BROWSER: '1',
+        BROWSER: 'echo',
+        DISPLAY: '',
+        HEADLESS: '1',
+        CODEX_DISABLE_UPDATE_CHECK: '1',
+        CLAUDE_CODE_DISABLE_UPDATE_CHECK: '1',
+        GEMINI_NO_BROWSER: '1',
+        GOOGLE_NO_BROWSER: '1',
+        npm_config_update_notifier: 'false',
+        NO_UPDATE_NOTIFIER: '1'
+      }
+    });
+    // Close stdin immediately to prevent hanging (critical for Gemini CLI)
+    if (child.stdin) {
+      child.stdin.end();
+    }
+    return child;
+  }
   // ============================================
   // Status Reporting Methods
   // ============================================

package/lib/tunnelClient.js CHANGED Viewed

@@ -205,8 +205,13 @@ class TunnelClient {
    * Handle a single tunnel request by routing to CLI
    */
   async handleRequest(request) {
+    // Use streaming path for providers that support it
+    if (this.cliManager.supportsStreaming(request.provider)) {
+      return this.handleStreamingRequest(request);
+    }
     const startTime = Date.now();
-    console.error(`[Tunnel] Processing request ${request.id} → ${request.provider}`);
+    console.error(`[Tunnel] Processing request ${request.id} → ${request.provider} (non-streaming)`);
     try {
       const result = await this.cliManager.sendCliPrompt(
@@ -226,6 +231,8 @@ class TunnelClient {
           content: result.content || '',
           model_used: result.model || result.detectedModel || request.provider,
           tokens_used: result.tokens_used || null,
+          input_tokens: result.input_tokens || null,
+          output_tokens: result.output_tokens || null,
           latency_ms: latencyMs,
         });
       } else {
@@ -247,6 +254,319 @@ class TunnelClient {
     }
   }
+  /**
+   * Handle a streaming-capable tunnel request (Claude Code or Gemini CLI).
+   * Spawns CLI with stream-json flags, reads NDJSON stdout line-by-line,
+   * extracts text deltas, batches them, and POSTs chunks to /api/tunnel/stream-chunk.
+   * On completion, POSTs final response to /api/tunnel/respond for backward compat.
+   */
+  async handleStreamingRequest(request) {
+    const startTime = Date.now();
+    const requestId = request.id;
+    const provider = request.provider;
+    console.error(`[Tunnel] Processing request ${requestId} → ${provider} (streaming)`);
+    let child;
+    let fullContent = '';
+    let chunkIndex = 0;
+    let pendingText = '';
+    let lastFlushTime = Date.now();
+    let modelUsed = null;
+    let inputTokens = null;
+    let outputTokens = null;
+    let costUsd = null;
+    const BATCH_INTERVAL_MS = 150;
+    const BATCH_MIN_CHARS = 1;
+    // Flush accumulated text as a stream chunk
+    const flushChunk = async (force = false) => {
+      if (!pendingText) return;
+      if (!force && pendingText.length < BATCH_MIN_CHARS && (Date.now() - lastFlushTime) < BATCH_INTERVAL_MS) {
+        return;
+      }
+      const text = pendingText;
+      pendingText = '';
+      lastFlushTime = Date.now();
+      try {
+        await this.sendStreamChunk({
+          request_id: requestId,
+          chunk_index: chunkIndex++,
+          content: text,
+        });
+      } catch (err) {
+        console.error(`[Tunnel] Failed to send stream chunk for ${requestId}:`, err.message);
+      }
+    };
+    // Set up periodic flushing
+    const flushInterval = setInterval(async () => {
+      if (pendingText.length > 0 && (Date.now() - lastFlushTime) >= BATCH_INTERVAL_MS) {
+        await flushChunk(true);
+      }
+    }, BATCH_INTERVAL_MS);
+    try {
+      child = this.cliManager.spawnStreamingCli(
+        provider,
+        request.prompt,
+        request.model_requested || null
+      );
+      // Set up CLI timeout
+      const timeoutHandle = setTimeout(() => {
+        if (child && !child.killed) {
+          console.error(`[Tunnel] Streaming request ${requestId} timed out after ${this.CLI_TIMEOUT_MS}ms`);
+          child.kill('SIGTERM');
+          setTimeout(() => {
+            if (!child.killed) child.kill('SIGKILL');
+          }, 2000);
+        }
+      }, this.CLI_TIMEOUT_MS);
+      // Read stdout line-by-line (NDJSON)
+      let lineBuf = '';
+      child.stdout.on('data', (data) => {
+        lineBuf += data.toString();
+        // Process complete lines
+        let newlineIdx;
+        while ((newlineIdx = lineBuf.indexOf('\n')) !== -1) {
+          const line = lineBuf.slice(0, newlineIdx).trim();
+          lineBuf = lineBuf.slice(newlineIdx + 1);
+          if (!line) continue;
+          try {
+            const event = JSON.parse(line);
+            const delta = this.extractTextDelta(provider, event);
+            if (delta) {
+              fullContent += delta;
+              pendingText += delta;
+            }
+            // Extract metadata from events
+            if (provider === 'claude_code') {
+              this._extractClaudeMetadata(event, (meta) => {
+                if (meta.model) modelUsed = meta.model;
+                if (meta.inputTokens != null) inputTokens = meta.inputTokens;
+                if (meta.outputTokens != null) outputTokens = meta.outputTokens;
+                if (meta.costUsd != null) costUsd = meta.costUsd;
+              });
+            } else if (provider === 'gemini_cli') {
+              this._extractGeminiMetadata(event, (meta) => {
+                if (meta.model) modelUsed = meta.model;
+                if (meta.inputTokens != null) inputTokens = meta.inputTokens;
+                if (meta.outputTokens != null) outputTokens = meta.outputTokens;
+              });
+            }
+          } catch {
+            // Skip non-JSON lines (stderr leaking into stdout, etc.)
+          }
+        }
+      });
+      // Capture stderr for debugging
+      let stderrBuf = '';
+      child.stderr.on('data', (data) => {
+        stderrBuf += data.toString();
+      });
+      // Wait for process to exit
+      await new Promise((resolve, reject) => {
+        child.on('close', (code) => {
+          clearTimeout(timeoutHandle);
+          resolve(code);
+        });
+        child.on('error', (err) => {
+          clearTimeout(timeoutHandle);
+          reject(err);
+        });
+      });
+      // Flush any remaining text
+      await flushChunk(true);
+      clearInterval(flushInterval);
+      // Send final is_final chunk marker
+      if (chunkIndex > 0) {
+        try {
+          await this.sendStreamChunk({
+            request_id: requestId,
+            chunk_index: chunkIndex++,
+            content: '',
+            is_final: true,
+          });
+        } catch (err) {
+          console.error(`[Tunnel] Failed to send final chunk for ${requestId}:`, err.message);
+        }
+      }
+      const latencyMs = Date.now() - startTime;
+      if (fullContent) {
+        console.error(`[Tunnel] Streaming request ${requestId} completed (${latencyMs}ms, ${chunkIndex} chunks, ${fullContent.length} chars)`);
+        // Estimate tokens if not extracted from metadata
+        const estInputTokens = inputTokens || Math.ceil(request.prompt.length / 4);
+        const estOutputTokens = outputTokens || Math.ceil(fullContent.length / 4);
+        await this.sendResponse({
+          request_id: requestId,
+          content: fullContent,
+          model_used: modelUsed || request.provider,
+          tokens_used: (estInputTokens + estOutputTokens) || null,
+          input_tokens: estInputTokens,
+          output_tokens: estOutputTokens,
+          latency_ms: latencyMs,
+        });
+      } else {
+        // No content extracted from streaming — fall back to error
+        const errorMsg = stderrBuf.trim().slice(0, 500) || 'Streaming CLI produced no output';
+        console.error(`[Tunnel] Streaming request ${requestId} failed: ${errorMsg}`);
+        await this.sendResponse({
+          request_id: requestId,
+          error: errorMsg,
+          latency_ms: latencyMs,
+        });
+      }
+    } catch (err) {
+      clearInterval(flushInterval);
+      const latencyMs = Date.now() - startTime;
+      console.error(`[Tunnel] Streaming request ${requestId} exception: ${err.message}`);
+      // If we got partial content before the error, still send it
+      if (fullContent) {
+        await flushChunk(true);
+        await this.sendResponse({
+          request_id: requestId,
+          content: fullContent,
+          model_used: modelUsed || request.provider,
+          latency_ms: latencyMs,
+        });
+      } else {
+        await this.sendResponse({
+          request_id: requestId,
+          error: err.message || 'Streaming execution failed',
+          latency_ms: latencyMs,
+        });
+      }
+    }
+  }
+  /**
+   * Extract text delta from a parsed NDJSON event based on provider type.
+   * Returns the incremental text string, or null if this event isn't a text delta.
+   */
+  extractTextDelta(provider, event) {
+    if (provider === 'claude_code') {
+      // Claude stream_event wrapping raw API events
+      // Filter: type === 'stream_event' && event.event.type === 'content_block_delta'
+      //         && event.event.delta.type === 'text_delta'
+      if (event.type === 'stream_event' &&
+          event.event?.type === 'content_block_delta' &&
+          event.event?.delta?.type === 'text_delta') {
+        return event.event.delta.text || null;
+      }
+      return null;
+    }
+    if (provider === 'gemini_cli') {
+      // Gemini: type === 'message' && role === 'assistant' && delta === true
+      if (event.type === 'message' &&
+          event.role === 'assistant' &&
+          event.delta === true) {
+        return event.content || null;
+      }
+      return null;
+    }
+    return null;
+  }
+  /**
+   * Extract metadata (model, tokens, cost) from Claude Code stream events
+   */
+  _extractClaudeMetadata(event, callback) {
+    // From 'system' init event: model info
+    if (event.type === 'system' && event.subtype === 'init' && event.model) {
+      callback({ model: event.model });
+    }
+    // From 'result' event: usage stats
+    if (event.type === 'result') {
+      const meta = {};
+      if (event.usage) {
+        meta.inputTokens = event.usage.input_tokens || 0;
+        meta.outputTokens = event.usage.output_tokens || 0;
+      }
+      if (event.total_cost_usd != null) {
+        meta.costUsd = event.total_cost_usd;
+      }
+      // Extract model from modelUsage keys
+      if (event.modelUsage) {
+        const models = Object.keys(event.modelUsage);
+        if (models.length > 0) {
+          // Pick the model with highest cost as the primary model
+          let primary = models[0];
+          let highestCost = -1;
+          for (const [m, usage] of Object.entries(event.modelUsage)) {
+            const cost = usage.costUSD || 0;
+            if (cost > highestCost) {
+              highestCost = cost;
+              primary = m;
+            }
+          }
+          meta.model = primary;
+        }
+      }
+      callback(meta);
+    }
+  }
+  /**
+   * Extract metadata from Gemini CLI stream events
+   */
+  _extractGeminiMetadata(event, callback) {
+    // From 'init' event: model info
+    if (event.type === 'init' && event.model) {
+      callback({ model: event.model });
+    }
+    // From 'result' event: usage stats
+    if (event.type === 'result' && event.stats) {
+      callback({
+        inputTokens: event.stats.input_tokens || 0,
+        outputTokens: event.stats.output_tokens || 0,
+      });
+    }
+  }
+  /**
+   * Send a stream chunk to the server
+   */
+  async sendStreamChunk(chunkData) {
+    const url = `${this.serverBaseUrl}/api/tunnel/stream-chunk`;
+    const res = await fetch(url, {
+      method: 'POST',
+      headers: {
+        'Authorization': `Bearer ${this.authToken}`,
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify(chunkData),
+    });
+    if (!res.ok) {
+      const text = await res.text().catch(() => '');
+      throw new Error(`Stream chunk POST failed (${res.status}): ${text}`);
+    }
+  }
   /**
    * Send response back to server
    */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "polydev-ai",
-  "version": "1.9.40",
+  "version": "1.9.42",
   "engines": {
     "node": ">=20.x <=22.x"
   },