npm - snow-ai - Versions diffs - 0.2.11 → 0.2.13 - Mend

snow-ai 0.2.11 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/dist/api/anthropic.d.ts +2 -0
package/dist/api/anthropic.js +64 -18
package/dist/api/chat.d.ts +3 -0
package/dist/api/chat.js +5 -4
package/dist/api/gemini.d.ts +3 -0
package/dist/api/gemini.js +168 -101
package/dist/api/responses.d.ts +3 -0
package/dist/api/responses.js +5 -4
package/dist/api/systemPrompt.d.ts +1 -1
package/dist/api/systemPrompt.js +149 -40
package/dist/hooks/useConversation.d.ts +1 -1
package/dist/hooks/useConversation.js +5 -3
package/dist/hooks/useGlobalNavigation.js +2 -0
package/dist/hooks/useToolConfirmation.d.ts +2 -1
package/dist/hooks/useToolConfirmation.js +2 -1
package/dist/mcp/filesystem.d.ts +16 -1
package/dist/mcp/filesystem.js +193 -89
package/dist/mcp/multiLanguageASTParser.d.ts +67 -0
package/dist/mcp/multiLanguageASTParser.js +360 -0
package/dist/mcp/todo.d.ts +1 -1
package/dist/mcp/todo.js +21 -26
package/dist/ui/components/ChatInput.d.ts +4 -1
package/dist/ui/components/ChatInput.js +105 -39
package/dist/ui/components/DiffViewer.d.ts +1 -2
package/dist/ui/components/DiffViewer.js +65 -65
package/dist/ui/components/MCPInfoPanel.js +1 -2
package/dist/ui/components/TodoTree.js +1 -1
package/dist/ui/components/ToolConfirmation.d.ts +11 -1
package/dist/ui/components/ToolConfirmation.js +86 -6
package/dist/ui/pages/ChatScreen.js +223 -108
package/dist/ui/pages/SystemPromptConfigScreen.js +25 -12
package/dist/utils/apiConfig.d.ts +6 -1
package/dist/utils/apiConfig.js +24 -0
package/dist/utils/commands/ide.js +18 -1
package/dist/utils/mcpToolsManager.d.ts +1 -1
package/dist/utils/mcpToolsManager.js +45 -36
package/dist/utils/textBuffer.d.ts +5 -0
package/dist/utils/textBuffer.js +23 -2
package/dist/utils/vscodeConnection.js +10 -1
package/package.json +14 -2
package/readme.md +36 -6

package/dist/api/anthropic.d.ts CHANGED Viewed

@@ -12,6 +12,8 @@ export interface UsageInfo {
     prompt_tokens: number;
     completion_tokens: number;
     total_tokens: number;
+    cache_creation_input_tokens?: number;
+    cache_read_input_tokens?: number;
 }
 export interface AnthropicStreamChunk {
     type: 'content' | 'tool_calls' | 'tool_call_delta' | 'done' | 'usage';

package/dist/api/anthropic.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import Anthropic from '@anthropic-ai/sdk';
 import { createHash, randomUUID } from 'crypto';
-import { getOpenAiConfig } from '../utils/apiConfig.js';
+import { getOpenAiConfig, getCustomSystemPrompt } from '../utils/apiConfig.js';
 import { SYSTEM_PROMPT } from './systemPrompt.js';
 let anthropicClient = null;
 function getAnthropicClient() {
@@ -45,12 +45,13 @@ function generateUserId(sessionId) {
 }
 /**
  * Convert OpenAI-style tools to Anthropic tool format
+ * Adds cache_control to the last tool for prompt caching
  */
 function convertToolsToAnthropic(tools) {
     if (!tools || tools.length === 0) {
         return undefined;
     }
-    return tools
+    const convertedTools = tools
         .filter(tool => tool.type === 'function' && 'function' in tool)
         .map(tool => {
         if (tool.type === 'function' && 'function' in tool) {
@@ -62,6 +63,12 @@ function convertToolsToAnthropic(tools) {
         }
         throw new Error('Invalid tool format');
     });
+    // Add cache_control to the last tool for prompt caching
+    if (convertedTools.length > 0) {
+        const lastTool = convertedTools[convertedTools.length - 1];
+        lastTool.cache_control = { type: 'ephemeral' };
+    }
+    return convertedTools;
 }
 /**
  * Convert our ChatMessage format to Anthropic's message format
@@ -71,8 +78,7 @@ function convertToolsToAnthropic(tools) {
  * 2. If no custom system prompt: use default as system
  */
 function convertToAnthropicMessages(messages) {
-    const config = getOpenAiConfig();
-    const customSystemPrompt = config.systemPrompt;
+    const customSystemPrompt = getCustomSystemPrompt();
     let systemContent;
     const anthropicMessages = [];
     for (const msg of messages) {
@@ -231,6 +237,7 @@ export async function* createStreamingAnthropicCompletion(options, abortSignal)
         let toolCallsBuffer = new Map();
         let hasToolCalls = false;
         let usageData;
+        let currentToolUseId = null; // Track current tool use block ID
         for await (const event of stream) {
             if (abortSignal?.aborted) {
                 return;
@@ -241,12 +248,13 @@ export async function* createStreamingAnthropicCompletion(options, abortSignal)
                 // Handle tool use blocks
                 if (block.type === 'tool_use') {
                     hasToolCalls = true;
+                    currentToolUseId = block.id; // Store current tool use ID
                     toolCallsBuffer.set(block.id, {
                         id: block.id,
                         type: 'function',
                         function: {
                             name: block.name,
-                            arguments: ''
+                            arguments: '{}' // Initialize with empty object instead of empty string
                         }
                     });
                     // Yield delta for token counting
@@ -270,29 +278,44 @@ export async function* createStreamingAnthropicCompletion(options, abortSignal)
                 // Handle tool input deltas
                 if (delta.type === 'input_json_delta') {
                     const jsonDelta = delta.partial_json;
-                    const toolCall = toolCallsBuffer.get(event.index.toString());
-                    if (toolCall) {
-                        toolCall.function.arguments += jsonDelta;
-                        // Yield delta for token counting
-                        yield {
-                            type: 'tool_call_delta',
-                            delta: jsonDelta
-                        };
+                    // Use currentToolUseId instead of event.index
+                    if (currentToolUseId) {
+                        const toolCall = toolCallsBuffer.get(currentToolUseId);
+                        if (toolCall) {
+                            // If this is the first delta and arguments is still '{}', replace it
+                            if (toolCall.function.arguments === '{}') {
+                                toolCall.function.arguments = jsonDelta;
+                            }
+                            else {
+                                toolCall.function.arguments += jsonDelta;
+                            }
+                            // Yield delta for token counting
+                            yield {
+                                type: 'tool_call_delta',
+                                delta: jsonDelta
+                            };
+                        }
                     }
                 }
             }
+            else if (event.type === 'content_block_stop') {
+                // Reset current tool use ID when block ends
+                currentToolUseId = null;
+            }
             else if (event.type === 'message_start') {
-                // Capture initial usage data
+                // Capture initial usage data (including cache metrics)
                 if (event.message.usage) {
                     usageData = {
                         prompt_tokens: event.message.usage.input_tokens || 0,
                         completion_tokens: event.message.usage.output_tokens || 0,
-                        total_tokens: (event.message.usage.input_tokens || 0) + (event.message.usage.output_tokens || 0)
+                        total_tokens: (event.message.usage.input_tokens || 0) + (event.message.usage.output_tokens || 0),
+                        cache_creation_input_tokens: event.message.usage.cache_creation_input_tokens,
+                        cache_read_input_tokens: event.message.usage.cache_read_input_tokens
                     };
                 }
             }
             else if (event.type === 'message_delta') {
-                // Update usage data with final token counts
+                // Update usage data with final token counts (including cache metrics)
                 if (event.usage) {
                     if (!usageData) {
                         usageData = {
@@ -303,14 +326,37 @@ export async function* createStreamingAnthropicCompletion(options, abortSignal)
                     }
                     usageData.completion_tokens = event.usage.output_tokens || 0;
                     usageData.total_tokens = usageData.prompt_tokens + usageData.completion_tokens;
+                    // Update cache metrics if present
+                    if (event.usage.cache_creation_input_tokens !== undefined) {
+                        usageData.cache_creation_input_tokens = event.usage.cache_creation_input_tokens;
+                    }
+                    if (event.usage.cache_read_input_tokens !== undefined) {
+                        usageData.cache_read_input_tokens = event.usage.cache_read_input_tokens;
+                    }
                 }
             }
         }
-        // Yield tool calls if any
+        // Yield tool calls if any (only after stream completes)
         if (hasToolCalls && toolCallsBuffer.size > 0) {
+            // Validate that all tool call arguments are complete valid JSON
+            const toolCalls = Array.from(toolCallsBuffer.values());
+            for (const toolCall of toolCalls) {
+                try {
+                    // Validate JSON completeness
+                    // Empty string should be treated as empty object
+                    const args = toolCall.function.arguments.trim() || '{}';
+                    JSON.parse(args);
+                    // Update with normalized version
+                    toolCall.function.arguments = args;
+                }
+                catch (e) {
+                    const errorMsg = e instanceof Error ? e.message : 'Unknown error';
+                    throw new Error(`Incomplete tool call JSON for ${toolCall.function.name}: ${toolCall.function.arguments} (${errorMsg})`);
+                }
+            }
             yield {
                 type: 'tool_calls',
-                tool_calls: Array.from(toolCallsBuffer.values())
+                tool_calls: toolCalls
             };
         }
         // Yield usage information if available

package/dist/api/chat.d.ts CHANGED Viewed

@@ -69,6 +69,9 @@ export interface UsageInfo {
     prompt_tokens: number;
     completion_tokens: number;
     total_tokens: number;
+    cache_creation_input_tokens?: number;
+    cache_read_input_tokens?: number;
+    cached_tokens?: number;
 }
 export interface StreamChunk {
     type: 'content' | 'tool_calls' | 'tool_call_delta' | 'reasoning_delta' | 'done' | 'usage';

package/dist/api/chat.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import OpenAI from 'openai';
-import { getOpenAiConfig } from '../utils/apiConfig.js';
+import { getOpenAiConfig, getCustomSystemPrompt } from '../utils/apiConfig.js';
 import { executeMCPTool } from '../utils/mcpToolsManager.js';
 import { SYSTEM_PROMPT } from './systemPrompt.js';
 /**
@@ -10,8 +10,7 @@ import { SYSTEM_PROMPT } from './systemPrompt.js';
  * 2. If no custom system prompt: use default as system
  */
 function convertToOpenAIMessages(messages, includeSystemPrompt = true) {
-    const config = getOpenAiConfig();
-    const customSystemPrompt = config.systemPrompt;
+    const customSystemPrompt = getCustomSystemPrompt();
     let result = messages.map(msg => {
         // 如果消息包含图片，使用 content 数组格式
         if (msg.role === 'user' && msg.images && msg.images.length > 0) {
@@ -276,7 +275,9 @@ export async function* createStreamingChatCompletion(options, abortSignal) {
                 usageData = {
                     prompt_tokens: usageValue.prompt_tokens || 0,
                     completion_tokens: usageValue.completion_tokens || 0,
-                    total_tokens: usageValue.total_tokens || 0
+                    total_tokens: usageValue.total_tokens || 0,
+                    // OpenAI Chat API: cached_tokens in prompt_tokens_details
+                    cached_tokens: usageValue.prompt_tokens_details?.cached_tokens
                 };
             }
             // Skip content processing if no choices (but usage is already captured above)

package/dist/api/gemini.d.ts CHANGED Viewed

@@ -10,6 +10,9 @@ export interface UsageInfo {
     prompt_tokens: number;
     completion_tokens: number;
     total_tokens: number;
+    cache_creation_input_tokens?: number;
+    cache_read_input_tokens?: number;
+    cached_tokens?: number;
 }
 export interface GeminiStreamChunk {
     type: 'content' | 'tool_calls' | 'tool_call_delta' | 'done' | 'usage';

package/dist/api/gemini.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { GoogleGenerativeAI } from '@google/generative-ai';
-import { getOpenAiConfig } from '../utils/apiConfig.js';
+import { GoogleGenAI } from '@google/genai';
+import { getOpenAiConfig, getCustomSystemPrompt } from '../utils/apiConfig.js';
 import { SYSTEM_PROMPT } from './systemPrompt.js';
 let geminiClient = null;
 function getGeminiClient() {
@@ -8,7 +8,20 @@ function getGeminiClient() {
         if (!config.apiKey) {
             throw new Error('Gemini API configuration is incomplete. Please configure API key first.');
         }
-        geminiClient = new GoogleGenerativeAI(config.apiKey);
+        // Create client configuration
+        const clientConfig = {
+            apiKey: config.apiKey
+        };
+        // Support custom baseUrl and headers for proxy servers
+        if (config.baseUrl && config.baseUrl !== 'https://api.openai.com/v1') {
+            clientConfig.httpOptions = {
+                baseUrl: config.baseUrl,
+                headers: {
+                    'x-goog-api-key': config.apiKey, // Gemini API requires this header
+                }
+            };
+        }
+        geminiClient = new GoogleGenAI(clientConfig);
     }
     return geminiClient;
 }
@@ -26,10 +39,16 @@ function convertToolsToGemini(tools) {
         .filter(tool => tool.type === 'function' && 'function' in tool)
         .map(tool => {
         if (tool.type === 'function' && 'function' in tool) {
+            // Convert OpenAI parameters schema to Gemini format
+            const params = tool.function.parameters;
             return {
                 name: tool.function.name,
                 description: tool.function.description || '',
-                parameters: tool.function.parameters
+                parametersJsonSchema: {
+                    type: 'object',
+                    properties: params.properties || {},
+                    required: params.required || []
+                }
             };
         }
         throw new Error('Invalid tool format');
@@ -37,40 +56,110 @@ function convertToolsToGemini(tools) {
     return [{ functionDeclarations }];
 }
 /**
- * Convert our ChatMessage format to Gemini's Content format
- * Logic:
- * 1. If custom system prompt exists: use custom as systemInstruction, prepend default as first user message
- * 2. If no custom system prompt: use default as systemInstruction
+ * Convert our ChatMessage format to Gemini's format
  */
 function convertToGeminiMessages(messages) {
-    const config = getOpenAiConfig();
-    const customSystemPrompt = config.systemPrompt;
+    const customSystemPrompt = getCustomSystemPrompt();
     let systemInstruction;
     const contents = [];
-    for (const msg of messages) {
+    for (let i = 0; i < messages.length; i++) {
+        const msg = messages[i];
+        if (!msg)
+            continue;
         // Extract system message as systemInstruction
         if (msg.role === 'system') {
             systemInstruction = msg.content;
             continue;
         }
-        // Skip tool messages for now (Gemini handles them differently)
+        // Handle tool results
         if (msg.role === 'tool') {
-            // Tool results in Gemini are represented as function response parts
-            const parts = [{
-                    functionResponse: {
-                        name: 'function_name', // This should be mapped from tool_call_id
-                        response: {
-                            content: msg.content
+            // Find the corresponding function call to get the function name
+            // Look backwards in contents to find the matching tool call
+            let functionName = 'unknown_function';
+            for (let j = contents.length - 1; j >= 0; j--) {
+                const contentMsg = contents[j];
+                if (contentMsg.role === 'model' && contentMsg.parts) {
+                    for (const part of contentMsg.parts) {
+                        if (part.functionCall) {
+                            functionName = part.functionCall.name;
+                            break;
                         }
                     }
-                }];
+                    if (functionName !== 'unknown_function')
+                        break;
+                }
+            }
+            // Tool response must be a valid object for Gemini API
+            // If content is a JSON string, parse it; otherwise wrap it in an object
+            let responseData;
+            if (!msg.content) {
+                responseData = {};
+            }
+            else {
+                let contentToParse = msg.content;
+                // Sometimes the content is double-encoded as JSON
+                // First, try to parse it once
+                try {
+                    const firstParse = JSON.parse(contentToParse);
+                    // If it's a string, it might be double-encoded, try parsing again
+                    if (typeof firstParse === 'string') {
+                        contentToParse = firstParse;
+                    }
+                }
+                catch {
+                    // Not JSON, use as-is
+                }
+                // Now parse or wrap the final content
+                try {
+                    const parsed = JSON.parse(contentToParse);
+                    // If parsed result is an object (not array, not null), use it directly
+                    if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) {
+                        responseData = parsed;
+                    }
+                    else {
+                        // If it's a primitive, array, or null, wrap it
+                        responseData = { content: parsed };
+                    }
+                }
+                catch {
+                    // Not valid JSON, wrap the raw string
+                    responseData = { content: contentToParse };
+                }
+            }
             contents.push({
-                role: 'function',
+                role: 'user',
+                parts: [{
+                        functionResponse: {
+                            name: functionName,
+                            response: responseData
+                        }
+                    }]
+            });
+            continue;
+        }
+        // Handle tool calls in assistant messages
+        if (msg.role === 'assistant' && msg.tool_calls && msg.tool_calls.length > 0) {
+            const parts = [];
+            // Add text content if exists
+            if (msg.content) {
+                parts.push({ text: msg.content });
+            }
+            // Add function calls
+            for (const toolCall of msg.tool_calls) {
+                parts.push({
+                    functionCall: {
+                        name: toolCall.function.name,
+                        args: JSON.parse(toolCall.function.arguments)
+                    }
+                });
+            }
+            contents.push({
+                role: 'model',
                 parts
             });
             continue;
         }
-        // Convert user/assistant messages
+        // Build message parts
         const parts = [];
         // Add text content
         if (msg.content) {
@@ -79,7 +168,6 @@ function convertToGeminiMessages(messages) {
         // Add images for user messages
         if (msg.role === 'user' && msg.images && msg.images.length > 0) {
             for (const image of msg.images) {
-                // Extract base64 data and mime type
                 const base64Match = image.data.match(/^data:([^;]+);base64,(.+)$/);
                 if (base64Match) {
                     parts.push({
@@ -91,35 +179,20 @@ function convertToGeminiMessages(messages) {
                 }
             }
         }
-        // Handle tool calls in assistant messages
-        if (msg.role === 'assistant' && msg.tool_calls && msg.tool_calls.length > 0) {
-            for (const toolCall of msg.tool_calls) {
-                parts.push({
-                    functionCall: {
-                        name: toolCall.function.name,
-                        args: JSON.parse(toolCall.function.arguments)
-                    }
-                });
-            }
-        }
-        // Map role (Gemini uses 'user' and 'model' instead of 'user' and 'assistant')
+        // Add to contents
         const role = msg.role === 'assistant' ? 'model' : 'user';
-        contents.push({
-            role,
-            parts
-        });
+        contents.push({ role, parts });
     }
-    // 如果配置了自定义系统提示词
+    // Handle system instruction
     if (customSystemPrompt) {
-        // 自定义系统提示词作为 systemInstruction，默认系统提示词作为第一条用户消息
         systemInstruction = customSystemPrompt;
+        // Prepend default system prompt as first user message
         contents.unshift({
             role: 'user',
             parts: [{ text: SYSTEM_PROMPT }]
         });
     }
     else if (!systemInstruction) {
-        // 没有自定义系统提示词，默认系统提示词作为 systemInstruction
         systemInstruction = SYSTEM_PROMPT;
     }
     return { systemInstruction, contents };
@@ -129,83 +202,79 @@ function convertToGeminiMessages(messages) {
  */
 export async function* createStreamingGeminiCompletion(options, abortSignal) {
     const client = getGeminiClient();
-    const config = getOpenAiConfig();
     try {
         const { systemInstruction, contents } = convertToGeminiMessages(options.messages);
-        // Initialize the model with optional custom baseUrl
-        // Note: For Gemini API, baseUrl should be in format: https://your-proxy.com/v1beta
-        // Default is: https://generativelanguage.googleapis.com/v1beta
-        const modelConfig = {
+        // Build request config
+        const requestConfig = {
             model: options.model,
-            systemInstruction,
-            tools: convertToolsToGemini(options.tools),
-            generationConfig: {
+            contents,
+            config: {
+                systemInstruction,
                 temperature: options.temperature ?? 0.7,
             }
         };
-        // Support custom baseUrl for proxy servers
-        const requestOptions = {};
-        if (config.baseUrl && config.baseUrl !== 'https://api.openai.com/v1') {
-            // Only set custom baseUrl if it's not the default OpenAI URL
-            requestOptions.baseUrl = config.baseUrl;
-        }
-        const model = client.getGenerativeModel(modelConfig, requestOptions);
-        // Start chat session
-        const chat = model.startChat({
-            history: contents.slice(0, -1), // All messages except the last one
-        });
-        // Get the last user message
-        const lastMessage = contents[contents.length - 1];
-        if (!lastMessage) {
-            throw new Error('No user message found');
+        // Add tools if provided
+        const geminiTools = convertToolsToGemini(options.tools);
+        if (geminiTools) {
+            requestConfig.config.tools = geminiTools;
         }
         // Stream the response
-        const result = await chat.sendMessageStream(lastMessage.parts);
+        const stream = await client.models.generateContentStream(requestConfig);
         let contentBuffer = '';
         let toolCallsBuffer = [];
         let hasToolCalls = false;
         let toolCallIndex = 0;
-        for await (const chunk of result.stream) {
+        let totalTokens = { prompt: 0, completion: 0, total: 0 };
+        // Save original console.warn to suppress SDK warnings
+        const originalWarn = console.warn;
+        console.warn = () => { }; // Suppress "there are non-text parts" warnings
+        for await (const chunk of stream) {
             if (abortSignal?.aborted) {
+                console.warn = originalWarn; // Restore console.warn
                 return;
             }
-            const candidate = chunk.candidates?.[0];
-            if (!candidate)
-                continue;
             // Process text content
-            const text = chunk.text();
-            if (text) {
-                contentBuffer += text;
+            if (chunk.text) {
+                contentBuffer += chunk.text;
                 yield {
                     type: 'content',
-                    content: text
+                    content: chunk.text
                 };
             }
-            // Process function calls (tool calls)
-            const functionCalls = candidate.content?.parts?.filter(part => 'functionCall' in part);
-            if (functionCalls && functionCalls.length > 0) {
+            // Process function calls using the official API
+            if (chunk.functionCalls && chunk.functionCalls.length > 0) {
                 hasToolCalls = true;
-                for (const fc of functionCalls) {
-                    if ('functionCall' in fc && fc.functionCall) {
-                        const toolCall = {
-                            id: `call_${toolCallIndex++}`,
-                            type: 'function',
-                            function: {
-                                name: fc.functionCall.name,
-                                arguments: JSON.stringify(fc.functionCall.args)
-                            }
-                        };
-                        toolCallsBuffer.push(toolCall);
-                        // Yield delta for token counting
-                        const deltaText = fc.functionCall.name + JSON.stringify(fc.functionCall.args);
-                        yield {
-                            type: 'tool_call_delta',
-                            delta: deltaText
-                        };
-                    }
+                for (const fc of chunk.functionCalls) {
+                    if (!fc.name)
+                        continue;
+                    const toolCall = {
+                        id: `call_${toolCallIndex++}`,
+                        type: 'function',
+                        function: {
+                            name: fc.name,
+                            arguments: JSON.stringify(fc.args)
+                        }
+                    };
+                    toolCallsBuffer.push(toolCall);
+                    // Yield delta for token counting
+                    const deltaText = fc.name + JSON.stringify(fc.args);
+                    yield {
+                        type: 'tool_call_delta',
+                        delta: deltaText
+                    };
                 }
             }
+            // Track usage info
+            if (chunk.usageMetadata) {
+                totalTokens = {
+                    prompt: chunk.usageMetadata.promptTokenCount || 0,
+                    completion: chunk.usageMetadata.candidatesTokenCount || 0,
+                    total: chunk.usageMetadata.totalTokenCount || 0
+                };
+            }
         }
+        // Restore console.warn
+        console.warn = originalWarn;
         // Yield tool calls if any
         if (hasToolCalls && toolCallsBuffer.length > 0) {
             yield {
@@ -213,16 +282,14 @@ export async function* createStreamingGeminiCompletion(options, abortSignal) {
                 tool_calls: toolCallsBuffer
             };
         }
-        // Get final response for usage info
-        const finalResponse = await result.response;
-        const usageMetadata = finalResponse.usageMetadata;
-        if (usageMetadata) {
+        // Yield usage info
+        if (totalTokens.total > 0) {
             yield {
                 type: 'usage',
                 usage: {
-                    prompt_tokens: usageMetadata.promptTokenCount || 0,
-                    completion_tokens: usageMetadata.candidatesTokenCount || 0,
-                    total_tokens: usageMetadata.totalTokenCount || 0
+                    prompt_tokens: totalTokens.prompt,
+                    completion_tokens: totalTokens.completion,
+                    total_tokens: totalTokens.total
                 }
             };
         }

package/dist/api/responses.d.ts CHANGED Viewed

@@ -26,6 +26,9 @@ export interface UsageInfo {
     prompt_tokens: number;
     completion_tokens: number;
     total_tokens: number;
+    cache_creation_input_tokens?: number;
+    cache_read_input_tokens?: number;
+    cached_tokens?: number;
 }
 export interface ResponseStreamChunk {
     type: 'content' | 'tool_calls' | 'tool_call_delta' | 'reasoning_delta' | 'done' | 'usage';

package/dist/api/responses.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import OpenAI from 'openai';
-import { getOpenAiConfig } from '../utils/apiConfig.js';
+import { getOpenAiConfig, getCustomSystemPrompt } from '../utils/apiConfig.js';
 import { executeMCPTool } from '../utils/mcpToolsManager.js';
 import { SYSTEM_PROMPT } from './systemPrompt.js';
 /**
@@ -83,8 +83,7 @@ export function resetOpenAIClient() {
  * 2. If no custom system prompt: use default as instructions
  */
 function convertToResponseInput(messages) {
-    const config = getOpenAiConfig();
-    const customSystemPrompt = config.systemPrompt;
+    const customSystemPrompt = getCustomSystemPrompt();
     const result = [];
     for (const msg of messages) {
         if (!msg)
@@ -410,7 +409,9 @@ export async function* createStreamingResponse(options, abortSignal) {
                     usageData = {
                         prompt_tokens: chunk.response.usage.input_tokens || 0,
                         completion_tokens: chunk.response.usage.output_tokens || 0,
-                        total_tokens: chunk.response.usage.total_tokens || 0
+                        total_tokens: chunk.response.usage.total_tokens || 0,
+                        // OpenAI Responses API: cached_tokens in input_tokens_details (note: tokenS)
+                        cached_tokens: chunk.response.usage.input_tokens_details?.cached_tokens
                     };
                 }
                 break;