snow-ai 0.2.11 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/api/anthropic.d.ts +2 -0
  2. package/dist/api/anthropic.js +64 -18
  3. package/dist/api/chat.d.ts +3 -0
  4. package/dist/api/chat.js +5 -4
  5. package/dist/api/gemini.d.ts +3 -0
  6. package/dist/api/gemini.js +168 -101
  7. package/dist/api/responses.d.ts +3 -0
  8. package/dist/api/responses.js +5 -4
  9. package/dist/api/systemPrompt.d.ts +1 -1
  10. package/dist/api/systemPrompt.js +149 -40
  11. package/dist/hooks/useConversation.d.ts +1 -1
  12. package/dist/hooks/useConversation.js +5 -3
  13. package/dist/hooks/useGlobalNavigation.js +2 -0
  14. package/dist/hooks/useToolConfirmation.d.ts +2 -1
  15. package/dist/hooks/useToolConfirmation.js +2 -1
  16. package/dist/mcp/filesystem.d.ts +16 -1
  17. package/dist/mcp/filesystem.js +193 -89
  18. package/dist/mcp/multiLanguageASTParser.d.ts +67 -0
  19. package/dist/mcp/multiLanguageASTParser.js +360 -0
  20. package/dist/mcp/todo.d.ts +1 -1
  21. package/dist/mcp/todo.js +21 -26
  22. package/dist/ui/components/ChatInput.d.ts +4 -1
  23. package/dist/ui/components/ChatInput.js +105 -39
  24. package/dist/ui/components/DiffViewer.d.ts +1 -2
  25. package/dist/ui/components/DiffViewer.js +65 -65
  26. package/dist/ui/components/MCPInfoPanel.js +1 -2
  27. package/dist/ui/components/TodoTree.js +1 -1
  28. package/dist/ui/components/ToolConfirmation.d.ts +11 -1
  29. package/dist/ui/components/ToolConfirmation.js +86 -6
  30. package/dist/ui/pages/ChatScreen.js +223 -108
  31. package/dist/ui/pages/SystemPromptConfigScreen.js +25 -12
  32. package/dist/utils/apiConfig.d.ts +6 -1
  33. package/dist/utils/apiConfig.js +24 -0
  34. package/dist/utils/commands/ide.js +18 -1
  35. package/dist/utils/mcpToolsManager.d.ts +1 -1
  36. package/dist/utils/mcpToolsManager.js +45 -36
  37. package/dist/utils/textBuffer.d.ts +5 -0
  38. package/dist/utils/textBuffer.js +23 -2
  39. package/dist/utils/vscodeConnection.js +10 -1
  40. package/package.json +14 -2
  41. package/readme.md +36 -6
@@ -12,6 +12,8 @@ export interface UsageInfo {
12
12
  prompt_tokens: number;
13
13
  completion_tokens: number;
14
14
  total_tokens: number;
15
+ cache_creation_input_tokens?: number;
16
+ cache_read_input_tokens?: number;
15
17
  }
16
18
  export interface AnthropicStreamChunk {
17
19
  type: 'content' | 'tool_calls' | 'tool_call_delta' | 'done' | 'usage';
@@ -1,6 +1,6 @@
1
1
  import Anthropic from '@anthropic-ai/sdk';
2
2
  import { createHash, randomUUID } from 'crypto';
3
- import { getOpenAiConfig } from '../utils/apiConfig.js';
3
+ import { getOpenAiConfig, getCustomSystemPrompt } from '../utils/apiConfig.js';
4
4
  import { SYSTEM_PROMPT } from './systemPrompt.js';
5
5
  let anthropicClient = null;
6
6
  function getAnthropicClient() {
@@ -45,12 +45,13 @@ function generateUserId(sessionId) {
45
45
  }
46
46
  /**
47
47
  * Convert OpenAI-style tools to Anthropic tool format
48
+ * Adds cache_control to the last tool for prompt caching
48
49
  */
49
50
  function convertToolsToAnthropic(tools) {
50
51
  if (!tools || tools.length === 0) {
51
52
  return undefined;
52
53
  }
53
- return tools
54
+ const convertedTools = tools
54
55
  .filter(tool => tool.type === 'function' && 'function' in tool)
55
56
  .map(tool => {
56
57
  if (tool.type === 'function' && 'function' in tool) {
@@ -62,6 +63,12 @@ function convertToolsToAnthropic(tools) {
62
63
  }
63
64
  throw new Error('Invalid tool format');
64
65
  });
66
+ // Add cache_control to the last tool for prompt caching
67
+ if (convertedTools.length > 0) {
68
+ const lastTool = convertedTools[convertedTools.length - 1];
69
+ lastTool.cache_control = { type: 'ephemeral' };
70
+ }
71
+ return convertedTools;
65
72
  }
66
73
  /**
67
74
  * Convert our ChatMessage format to Anthropic's message format
@@ -71,8 +78,7 @@ function convertToolsToAnthropic(tools) {
71
78
  * 2. If no custom system prompt: use default as system
72
79
  */
73
80
  function convertToAnthropicMessages(messages) {
74
- const config = getOpenAiConfig();
75
- const customSystemPrompt = config.systemPrompt;
81
+ const customSystemPrompt = getCustomSystemPrompt();
76
82
  let systemContent;
77
83
  const anthropicMessages = [];
78
84
  for (const msg of messages) {
@@ -231,6 +237,7 @@ export async function* createStreamingAnthropicCompletion(options, abortSignal)
231
237
  let toolCallsBuffer = new Map();
232
238
  let hasToolCalls = false;
233
239
  let usageData;
240
+ let currentToolUseId = null; // Track current tool use block ID
234
241
  for await (const event of stream) {
235
242
  if (abortSignal?.aborted) {
236
243
  return;
@@ -241,12 +248,13 @@ export async function* createStreamingAnthropicCompletion(options, abortSignal)
241
248
  // Handle tool use blocks
242
249
  if (block.type === 'tool_use') {
243
250
  hasToolCalls = true;
251
+ currentToolUseId = block.id; // Store current tool use ID
244
252
  toolCallsBuffer.set(block.id, {
245
253
  id: block.id,
246
254
  type: 'function',
247
255
  function: {
248
256
  name: block.name,
249
- arguments: ''
257
+ arguments: '{}' // Initialize with empty object instead of empty string
250
258
  }
251
259
  });
252
260
  // Yield delta for token counting
@@ -270,29 +278,44 @@ export async function* createStreamingAnthropicCompletion(options, abortSignal)
270
278
  // Handle tool input deltas
271
279
  if (delta.type === 'input_json_delta') {
272
280
  const jsonDelta = delta.partial_json;
273
- const toolCall = toolCallsBuffer.get(event.index.toString());
274
- if (toolCall) {
275
- toolCall.function.arguments += jsonDelta;
276
- // Yield delta for token counting
277
- yield {
278
- type: 'tool_call_delta',
279
- delta: jsonDelta
280
- };
281
+ // Use currentToolUseId instead of event.index
282
+ if (currentToolUseId) {
283
+ const toolCall = toolCallsBuffer.get(currentToolUseId);
284
+ if (toolCall) {
285
+ // If this is the first delta and arguments is still '{}', replace it
286
+ if (toolCall.function.arguments === '{}') {
287
+ toolCall.function.arguments = jsonDelta;
288
+ }
289
+ else {
290
+ toolCall.function.arguments += jsonDelta;
291
+ }
292
+ // Yield delta for token counting
293
+ yield {
294
+ type: 'tool_call_delta',
295
+ delta: jsonDelta
296
+ };
297
+ }
281
298
  }
282
299
  }
283
300
  }
301
+ else if (event.type === 'content_block_stop') {
302
+ // Reset current tool use ID when block ends
303
+ currentToolUseId = null;
304
+ }
284
305
  else if (event.type === 'message_start') {
285
- // Capture initial usage data
306
+ // Capture initial usage data (including cache metrics)
286
307
  if (event.message.usage) {
287
308
  usageData = {
288
309
  prompt_tokens: event.message.usage.input_tokens || 0,
289
310
  completion_tokens: event.message.usage.output_tokens || 0,
290
- total_tokens: (event.message.usage.input_tokens || 0) + (event.message.usage.output_tokens || 0)
311
+ total_tokens: (event.message.usage.input_tokens || 0) + (event.message.usage.output_tokens || 0),
312
+ cache_creation_input_tokens: event.message.usage.cache_creation_input_tokens,
313
+ cache_read_input_tokens: event.message.usage.cache_read_input_tokens
291
314
  };
292
315
  }
293
316
  }
294
317
  else if (event.type === 'message_delta') {
295
- // Update usage data with final token counts
318
+ // Update usage data with final token counts (including cache metrics)
296
319
  if (event.usage) {
297
320
  if (!usageData) {
298
321
  usageData = {
@@ -303,14 +326,37 @@ export async function* createStreamingAnthropicCompletion(options, abortSignal)
303
326
  }
304
327
  usageData.completion_tokens = event.usage.output_tokens || 0;
305
328
  usageData.total_tokens = usageData.prompt_tokens + usageData.completion_tokens;
329
+ // Update cache metrics if present
330
+ if (event.usage.cache_creation_input_tokens !== undefined) {
331
+ usageData.cache_creation_input_tokens = event.usage.cache_creation_input_tokens;
332
+ }
333
+ if (event.usage.cache_read_input_tokens !== undefined) {
334
+ usageData.cache_read_input_tokens = event.usage.cache_read_input_tokens;
335
+ }
306
336
  }
307
337
  }
308
338
  }
309
- // Yield tool calls if any
339
+ // Yield tool calls if any (only after stream completes)
310
340
  if (hasToolCalls && toolCallsBuffer.size > 0) {
341
+ // Validate that all tool call arguments are complete valid JSON
342
+ const toolCalls = Array.from(toolCallsBuffer.values());
343
+ for (const toolCall of toolCalls) {
344
+ try {
345
+ // Validate JSON completeness
346
+ // Empty string should be treated as empty object
347
+ const args = toolCall.function.arguments.trim() || '{}';
348
+ JSON.parse(args);
349
+ // Update with normalized version
350
+ toolCall.function.arguments = args;
351
+ }
352
+ catch (e) {
353
+ const errorMsg = e instanceof Error ? e.message : 'Unknown error';
354
+ throw new Error(`Incomplete tool call JSON for ${toolCall.function.name}: ${toolCall.function.arguments} (${errorMsg})`);
355
+ }
356
+ }
311
357
  yield {
312
358
  type: 'tool_calls',
313
- tool_calls: Array.from(toolCallsBuffer.values())
359
+ tool_calls: toolCalls
314
360
  };
315
361
  }
316
362
  // Yield usage information if available
@@ -69,6 +69,9 @@ export interface UsageInfo {
69
69
  prompt_tokens: number;
70
70
  completion_tokens: number;
71
71
  total_tokens: number;
72
+ cache_creation_input_tokens?: number;
73
+ cache_read_input_tokens?: number;
74
+ cached_tokens?: number;
72
75
  }
73
76
  export interface StreamChunk {
74
77
  type: 'content' | 'tool_calls' | 'tool_call_delta' | 'reasoning_delta' | 'done' | 'usage';
package/dist/api/chat.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import OpenAI from 'openai';
2
- import { getOpenAiConfig } from '../utils/apiConfig.js';
2
+ import { getOpenAiConfig, getCustomSystemPrompt } from '../utils/apiConfig.js';
3
3
  import { executeMCPTool } from '../utils/mcpToolsManager.js';
4
4
  import { SYSTEM_PROMPT } from './systemPrompt.js';
5
5
  /**
@@ -10,8 +10,7 @@ import { SYSTEM_PROMPT } from './systemPrompt.js';
10
10
  * 2. If no custom system prompt: use default as system
11
11
  */
12
12
  function convertToOpenAIMessages(messages, includeSystemPrompt = true) {
13
- const config = getOpenAiConfig();
14
- const customSystemPrompt = config.systemPrompt;
13
+ const customSystemPrompt = getCustomSystemPrompt();
15
14
  let result = messages.map(msg => {
16
15
  // 如果消息包含图片,使用 content 数组格式
17
16
  if (msg.role === 'user' && msg.images && msg.images.length > 0) {
@@ -276,7 +275,9 @@ export async function* createStreamingChatCompletion(options, abortSignal) {
276
275
  usageData = {
277
276
  prompt_tokens: usageValue.prompt_tokens || 0,
278
277
  completion_tokens: usageValue.completion_tokens || 0,
279
- total_tokens: usageValue.total_tokens || 0
278
+ total_tokens: usageValue.total_tokens || 0,
279
+ // OpenAI Chat API: cached_tokens in prompt_tokens_details
280
+ cached_tokens: usageValue.prompt_tokens_details?.cached_tokens
280
281
  };
281
282
  }
282
283
  // Skip content processing if no choices (but usage is already captured above)
@@ -10,6 +10,9 @@ export interface UsageInfo {
10
10
  prompt_tokens: number;
11
11
  completion_tokens: number;
12
12
  total_tokens: number;
13
+ cache_creation_input_tokens?: number;
14
+ cache_read_input_tokens?: number;
15
+ cached_tokens?: number;
13
16
  }
14
17
  export interface GeminiStreamChunk {
15
18
  type: 'content' | 'tool_calls' | 'tool_call_delta' | 'done' | 'usage';
@@ -1,5 +1,5 @@
1
- import { GoogleGenerativeAI } from '@google/generative-ai';
2
- import { getOpenAiConfig } from '../utils/apiConfig.js';
1
+ import { GoogleGenAI } from '@google/genai';
2
+ import { getOpenAiConfig, getCustomSystemPrompt } from '../utils/apiConfig.js';
3
3
  import { SYSTEM_PROMPT } from './systemPrompt.js';
4
4
  let geminiClient = null;
5
5
  function getGeminiClient() {
@@ -8,7 +8,20 @@ function getGeminiClient() {
8
8
  if (!config.apiKey) {
9
9
  throw new Error('Gemini API configuration is incomplete. Please configure API key first.');
10
10
  }
11
- geminiClient = new GoogleGenerativeAI(config.apiKey);
11
+ // Create client configuration
12
+ const clientConfig = {
13
+ apiKey: config.apiKey
14
+ };
15
+ // Support custom baseUrl and headers for proxy servers
16
+ if (config.baseUrl && config.baseUrl !== 'https://api.openai.com/v1') {
17
+ clientConfig.httpOptions = {
18
+ baseUrl: config.baseUrl,
19
+ headers: {
20
+ 'x-goog-api-key': config.apiKey, // Gemini API requires this header
21
+ }
22
+ };
23
+ }
24
+ geminiClient = new GoogleGenAI(clientConfig);
12
25
  }
13
26
  return geminiClient;
14
27
  }
@@ -26,10 +39,16 @@ function convertToolsToGemini(tools) {
26
39
  .filter(tool => tool.type === 'function' && 'function' in tool)
27
40
  .map(tool => {
28
41
  if (tool.type === 'function' && 'function' in tool) {
42
+ // Convert OpenAI parameters schema to Gemini format
43
+ const params = tool.function.parameters;
29
44
  return {
30
45
  name: tool.function.name,
31
46
  description: tool.function.description || '',
32
- parameters: tool.function.parameters
47
+ parametersJsonSchema: {
48
+ type: 'object',
49
+ properties: params.properties || {},
50
+ required: params.required || []
51
+ }
33
52
  };
34
53
  }
35
54
  throw new Error('Invalid tool format');
@@ -37,40 +56,110 @@ function convertToolsToGemini(tools) {
37
56
  return [{ functionDeclarations }];
38
57
  }
39
58
  /**
40
- * Convert our ChatMessage format to Gemini's Content format
41
- * Logic:
42
- * 1. If custom system prompt exists: use custom as systemInstruction, prepend default as first user message
43
- * 2. If no custom system prompt: use default as systemInstruction
59
+ * Convert our ChatMessage format to Gemini's format
44
60
  */
45
61
  function convertToGeminiMessages(messages) {
46
- const config = getOpenAiConfig();
47
- const customSystemPrompt = config.systemPrompt;
62
+ const customSystemPrompt = getCustomSystemPrompt();
48
63
  let systemInstruction;
49
64
  const contents = [];
50
- for (const msg of messages) {
65
+ for (let i = 0; i < messages.length; i++) {
66
+ const msg = messages[i];
67
+ if (!msg)
68
+ continue;
51
69
  // Extract system message as systemInstruction
52
70
  if (msg.role === 'system') {
53
71
  systemInstruction = msg.content;
54
72
  continue;
55
73
  }
56
- // Skip tool messages for now (Gemini handles them differently)
74
+ // Handle tool results
57
75
  if (msg.role === 'tool') {
58
- // Tool results in Gemini are represented as function response parts
59
- const parts = [{
60
- functionResponse: {
61
- name: 'function_name', // This should be mapped from tool_call_id
62
- response: {
63
- content: msg.content
76
+ // Find the corresponding function call to get the function name
77
+ // Look backwards in contents to find the matching tool call
78
+ let functionName = 'unknown_function';
79
+ for (let j = contents.length - 1; j >= 0; j--) {
80
+ const contentMsg = contents[j];
81
+ if (contentMsg.role === 'model' && contentMsg.parts) {
82
+ for (const part of contentMsg.parts) {
83
+ if (part.functionCall) {
84
+ functionName = part.functionCall.name;
85
+ break;
64
86
  }
65
87
  }
66
- }];
88
+ if (functionName !== 'unknown_function')
89
+ break;
90
+ }
91
+ }
92
+ // Tool response must be a valid object for Gemini API
93
+ // If content is a JSON string, parse it; otherwise wrap it in an object
94
+ let responseData;
95
+ if (!msg.content) {
96
+ responseData = {};
97
+ }
98
+ else {
99
+ let contentToParse = msg.content;
100
+ // Sometimes the content is double-encoded as JSON
101
+ // First, try to parse it once
102
+ try {
103
+ const firstParse = JSON.parse(contentToParse);
104
+ // If it's a string, it might be double-encoded, try parsing again
105
+ if (typeof firstParse === 'string') {
106
+ contentToParse = firstParse;
107
+ }
108
+ }
109
+ catch {
110
+ // Not JSON, use as-is
111
+ }
112
+ // Now parse or wrap the final content
113
+ try {
114
+ const parsed = JSON.parse(contentToParse);
115
+ // If parsed result is an object (not array, not null), use it directly
116
+ if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) {
117
+ responseData = parsed;
118
+ }
119
+ else {
120
+ // If it's a primitive, array, or null, wrap it
121
+ responseData = { content: parsed };
122
+ }
123
+ }
124
+ catch {
125
+ // Not valid JSON, wrap the raw string
126
+ responseData = { content: contentToParse };
127
+ }
128
+ }
67
129
  contents.push({
68
- role: 'function',
130
+ role: 'user',
131
+ parts: [{
132
+ functionResponse: {
133
+ name: functionName,
134
+ response: responseData
135
+ }
136
+ }]
137
+ });
138
+ continue;
139
+ }
140
+ // Handle tool calls in assistant messages
141
+ if (msg.role === 'assistant' && msg.tool_calls && msg.tool_calls.length > 0) {
142
+ const parts = [];
143
+ // Add text content if exists
144
+ if (msg.content) {
145
+ parts.push({ text: msg.content });
146
+ }
147
+ // Add function calls
148
+ for (const toolCall of msg.tool_calls) {
149
+ parts.push({
150
+ functionCall: {
151
+ name: toolCall.function.name,
152
+ args: JSON.parse(toolCall.function.arguments)
153
+ }
154
+ });
155
+ }
156
+ contents.push({
157
+ role: 'model',
69
158
  parts
70
159
  });
71
160
  continue;
72
161
  }
73
- // Convert user/assistant messages
162
+ // Build message parts
74
163
  const parts = [];
75
164
  // Add text content
76
165
  if (msg.content) {
@@ -79,7 +168,6 @@ function convertToGeminiMessages(messages) {
79
168
  // Add images for user messages
80
169
  if (msg.role === 'user' && msg.images && msg.images.length > 0) {
81
170
  for (const image of msg.images) {
82
- // Extract base64 data and mime type
83
171
  const base64Match = image.data.match(/^data:([^;]+);base64,(.+)$/);
84
172
  if (base64Match) {
85
173
  parts.push({
@@ -91,35 +179,20 @@ function convertToGeminiMessages(messages) {
91
179
  }
92
180
  }
93
181
  }
94
- // Handle tool calls in assistant messages
95
- if (msg.role === 'assistant' && msg.tool_calls && msg.tool_calls.length > 0) {
96
- for (const toolCall of msg.tool_calls) {
97
- parts.push({
98
- functionCall: {
99
- name: toolCall.function.name,
100
- args: JSON.parse(toolCall.function.arguments)
101
- }
102
- });
103
- }
104
- }
105
- // Map role (Gemini uses 'user' and 'model' instead of 'user' and 'assistant')
182
+ // Add to contents
106
183
  const role = msg.role === 'assistant' ? 'model' : 'user';
107
- contents.push({
108
- role,
109
- parts
110
- });
184
+ contents.push({ role, parts });
111
185
  }
112
- // 如果配置了自定义系统提示词
186
+ // Handle system instruction
113
187
  if (customSystemPrompt) {
114
- // 自定义系统提示词作为 systemInstruction,默认系统提示词作为第一条用户消息
115
188
  systemInstruction = customSystemPrompt;
189
+ // Prepend default system prompt as first user message
116
190
  contents.unshift({
117
191
  role: 'user',
118
192
  parts: [{ text: SYSTEM_PROMPT }]
119
193
  });
120
194
  }
121
195
  else if (!systemInstruction) {
122
- // 没有自定义系统提示词,默认系统提示词作为 systemInstruction
123
196
  systemInstruction = SYSTEM_PROMPT;
124
197
  }
125
198
  return { systemInstruction, contents };
@@ -129,83 +202,79 @@ function convertToGeminiMessages(messages) {
129
202
  */
130
203
  export async function* createStreamingGeminiCompletion(options, abortSignal) {
131
204
  const client = getGeminiClient();
132
- const config = getOpenAiConfig();
133
205
  try {
134
206
  const { systemInstruction, contents } = convertToGeminiMessages(options.messages);
135
- // Initialize the model with optional custom baseUrl
136
- // Note: For Gemini API, baseUrl should be in format: https://your-proxy.com/v1beta
137
- // Default is: https://generativelanguage.googleapis.com/v1beta
138
- const modelConfig = {
207
+ // Build request config
208
+ const requestConfig = {
139
209
  model: options.model,
140
- systemInstruction,
141
- tools: convertToolsToGemini(options.tools),
142
- generationConfig: {
210
+ contents,
211
+ config: {
212
+ systemInstruction,
143
213
  temperature: options.temperature ?? 0.7,
144
214
  }
145
215
  };
146
- // Support custom baseUrl for proxy servers
147
- const requestOptions = {};
148
- if (config.baseUrl && config.baseUrl !== 'https://api.openai.com/v1') {
149
- // Only set custom baseUrl if it's not the default OpenAI URL
150
- requestOptions.baseUrl = config.baseUrl;
151
- }
152
- const model = client.getGenerativeModel(modelConfig, requestOptions);
153
- // Start chat session
154
- const chat = model.startChat({
155
- history: contents.slice(0, -1), // All messages except the last one
156
- });
157
- // Get the last user message
158
- const lastMessage = contents[contents.length - 1];
159
- if (!lastMessage) {
160
- throw new Error('No user message found');
216
+ // Add tools if provided
217
+ const geminiTools = convertToolsToGemini(options.tools);
218
+ if (geminiTools) {
219
+ requestConfig.config.tools = geminiTools;
161
220
  }
162
221
  // Stream the response
163
- const result = await chat.sendMessageStream(lastMessage.parts);
222
+ const stream = await client.models.generateContentStream(requestConfig);
164
223
  let contentBuffer = '';
165
224
  let toolCallsBuffer = [];
166
225
  let hasToolCalls = false;
167
226
  let toolCallIndex = 0;
168
- for await (const chunk of result.stream) {
227
+ let totalTokens = { prompt: 0, completion: 0, total: 0 };
228
+ // Save original console.warn to suppress SDK warnings
229
+ const originalWarn = console.warn;
230
+ console.warn = () => { }; // Suppress "there are non-text parts" warnings
231
+ for await (const chunk of stream) {
169
232
  if (abortSignal?.aborted) {
233
+ console.warn = originalWarn; // Restore console.warn
170
234
  return;
171
235
  }
172
- const candidate = chunk.candidates?.[0];
173
- if (!candidate)
174
- continue;
175
236
  // Process text content
176
- const text = chunk.text();
177
- if (text) {
178
- contentBuffer += text;
237
+ if (chunk.text) {
238
+ contentBuffer += chunk.text;
179
239
  yield {
180
240
  type: 'content',
181
- content: text
241
+ content: chunk.text
182
242
  };
183
243
  }
184
- // Process function calls (tool calls)
185
- const functionCalls = candidate.content?.parts?.filter(part => 'functionCall' in part);
186
- if (functionCalls && functionCalls.length > 0) {
244
+ // Process function calls using the official API
245
+ if (chunk.functionCalls && chunk.functionCalls.length > 0) {
187
246
  hasToolCalls = true;
188
- for (const fc of functionCalls) {
189
- if ('functionCall' in fc && fc.functionCall) {
190
- const toolCall = {
191
- id: `call_${toolCallIndex++}`,
192
- type: 'function',
193
- function: {
194
- name: fc.functionCall.name,
195
- arguments: JSON.stringify(fc.functionCall.args)
196
- }
197
- };
198
- toolCallsBuffer.push(toolCall);
199
- // Yield delta for token counting
200
- const deltaText = fc.functionCall.name + JSON.stringify(fc.functionCall.args);
201
- yield {
202
- type: 'tool_call_delta',
203
- delta: deltaText
204
- };
205
- }
247
+ for (const fc of chunk.functionCalls) {
248
+ if (!fc.name)
249
+ continue;
250
+ const toolCall = {
251
+ id: `call_${toolCallIndex++}`,
252
+ type: 'function',
253
+ function: {
254
+ name: fc.name,
255
+ arguments: JSON.stringify(fc.args)
256
+ }
257
+ };
258
+ toolCallsBuffer.push(toolCall);
259
+ // Yield delta for token counting
260
+ const deltaText = fc.name + JSON.stringify(fc.args);
261
+ yield {
262
+ type: 'tool_call_delta',
263
+ delta: deltaText
264
+ };
206
265
  }
207
266
  }
267
+ // Track usage info
268
+ if (chunk.usageMetadata) {
269
+ totalTokens = {
270
+ prompt: chunk.usageMetadata.promptTokenCount || 0,
271
+ completion: chunk.usageMetadata.candidatesTokenCount || 0,
272
+ total: chunk.usageMetadata.totalTokenCount || 0
273
+ };
274
+ }
208
275
  }
276
+ // Restore console.warn
277
+ console.warn = originalWarn;
209
278
  // Yield tool calls if any
210
279
  if (hasToolCalls && toolCallsBuffer.length > 0) {
211
280
  yield {
@@ -213,16 +282,14 @@ export async function* createStreamingGeminiCompletion(options, abortSignal) {
213
282
  tool_calls: toolCallsBuffer
214
283
  };
215
284
  }
216
- // Get final response for usage info
217
- const finalResponse = await result.response;
218
- const usageMetadata = finalResponse.usageMetadata;
219
- if (usageMetadata) {
285
+ // Yield usage info
286
+ if (totalTokens.total > 0) {
220
287
  yield {
221
288
  type: 'usage',
222
289
  usage: {
223
- prompt_tokens: usageMetadata.promptTokenCount || 0,
224
- completion_tokens: usageMetadata.candidatesTokenCount || 0,
225
- total_tokens: usageMetadata.totalTokenCount || 0
290
+ prompt_tokens: totalTokens.prompt,
291
+ completion_tokens: totalTokens.completion,
292
+ total_tokens: totalTokens.total
226
293
  }
227
294
  };
228
295
  }
@@ -26,6 +26,9 @@ export interface UsageInfo {
26
26
  prompt_tokens: number;
27
27
  completion_tokens: number;
28
28
  total_tokens: number;
29
+ cache_creation_input_tokens?: number;
30
+ cache_read_input_tokens?: number;
31
+ cached_tokens?: number;
29
32
  }
30
33
  export interface ResponseStreamChunk {
31
34
  type: 'content' | 'tool_calls' | 'tool_call_delta' | 'reasoning_delta' | 'done' | 'usage';
@@ -1,5 +1,5 @@
1
1
  import OpenAI from 'openai';
2
- import { getOpenAiConfig } from '../utils/apiConfig.js';
2
+ import { getOpenAiConfig, getCustomSystemPrompt } from '../utils/apiConfig.js';
3
3
  import { executeMCPTool } from '../utils/mcpToolsManager.js';
4
4
  import { SYSTEM_PROMPT } from './systemPrompt.js';
5
5
  /**
@@ -83,8 +83,7 @@ export function resetOpenAIClient() {
83
83
  * 2. If no custom system prompt: use default as instructions
84
84
  */
85
85
  function convertToResponseInput(messages) {
86
- const config = getOpenAiConfig();
87
- const customSystemPrompt = config.systemPrompt;
86
+ const customSystemPrompt = getCustomSystemPrompt();
88
87
  const result = [];
89
88
  for (const msg of messages) {
90
89
  if (!msg)
@@ -410,7 +409,9 @@ export async function* createStreamingResponse(options, abortSignal) {
410
409
  usageData = {
411
410
  prompt_tokens: chunk.response.usage.input_tokens || 0,
412
411
  completion_tokens: chunk.response.usage.output_tokens || 0,
413
- total_tokens: chunk.response.usage.total_tokens || 0
412
+ total_tokens: chunk.response.usage.total_tokens || 0,
413
+ // OpenAI Responses API: cached_tokens in input_tokens_details (note: tokenS)
414
+ cached_tokens: chunk.response.usage.input_tokens_details?.cached_tokens
414
415
  };
415
416
  }
416
417
  break;