snow-ai 0.2.11 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,8 @@ export interface UsageInfo {
12
12
  prompt_tokens: number;
13
13
  completion_tokens: number;
14
14
  total_tokens: number;
15
+ cache_creation_input_tokens?: number;
16
+ cache_read_input_tokens?: number;
15
17
  }
16
18
  export interface AnthropicStreamChunk {
17
19
  type: 'content' | 'tool_calls' | 'tool_call_delta' | 'done' | 'usage';
@@ -1,6 +1,6 @@
1
1
  import Anthropic from '@anthropic-ai/sdk';
2
2
  import { createHash, randomUUID } from 'crypto';
3
- import { getOpenAiConfig } from '../utils/apiConfig.js';
3
+ import { getOpenAiConfig, getCustomSystemPrompt } from '../utils/apiConfig.js';
4
4
  import { SYSTEM_PROMPT } from './systemPrompt.js';
5
5
  let anthropicClient = null;
6
6
  function getAnthropicClient() {
@@ -45,12 +45,13 @@ function generateUserId(sessionId) {
45
45
  }
46
46
  /**
47
47
  * Convert OpenAI-style tools to Anthropic tool format
48
+ * Adds cache_control to the last tool for prompt caching
48
49
  */
49
50
  function convertToolsToAnthropic(tools) {
50
51
  if (!tools || tools.length === 0) {
51
52
  return undefined;
52
53
  }
53
- return tools
54
+ const convertedTools = tools
54
55
  .filter(tool => tool.type === 'function' && 'function' in tool)
55
56
  .map(tool => {
56
57
  if (tool.type === 'function' && 'function' in tool) {
@@ -62,6 +63,12 @@ function convertToolsToAnthropic(tools) {
62
63
  }
63
64
  throw new Error('Invalid tool format');
64
65
  });
66
+ // Add cache_control to the last tool for prompt caching
67
+ if (convertedTools.length > 0) {
68
+ const lastTool = convertedTools[convertedTools.length - 1];
69
+ lastTool.cache_control = { type: 'ephemeral' };
70
+ }
71
+ return convertedTools;
65
72
  }
66
73
  /**
67
74
  * Convert our ChatMessage format to Anthropic's message format
@@ -71,8 +78,7 @@ function convertToolsToAnthropic(tools) {
71
78
  * 2. If no custom system prompt: use default as system
72
79
  */
73
80
  function convertToAnthropicMessages(messages) {
74
- const config = getOpenAiConfig();
75
- const customSystemPrompt = config.systemPrompt;
81
+ const customSystemPrompt = getCustomSystemPrompt();
76
82
  let systemContent;
77
83
  const anthropicMessages = [];
78
84
  for (const msg of messages) {
@@ -231,6 +237,7 @@ export async function* createStreamingAnthropicCompletion(options, abortSignal)
231
237
  let toolCallsBuffer = new Map();
232
238
  let hasToolCalls = false;
233
239
  let usageData;
240
+ let currentToolUseId = null; // Track current tool use block ID
234
241
  for await (const event of stream) {
235
242
  if (abortSignal?.aborted) {
236
243
  return;
@@ -241,6 +248,7 @@ export async function* createStreamingAnthropicCompletion(options, abortSignal)
241
248
  // Handle tool use blocks
242
249
  if (block.type === 'tool_use') {
243
250
  hasToolCalls = true;
251
+ currentToolUseId = block.id; // Store current tool use ID
244
252
  toolCallsBuffer.set(block.id, {
245
253
  id: block.id,
246
254
  type: 'function',
@@ -270,29 +278,38 @@ export async function* createStreamingAnthropicCompletion(options, abortSignal)
270
278
  // Handle tool input deltas
271
279
  if (delta.type === 'input_json_delta') {
272
280
  const jsonDelta = delta.partial_json;
273
- const toolCall = toolCallsBuffer.get(event.index.toString());
274
- if (toolCall) {
275
- toolCall.function.arguments += jsonDelta;
276
- // Yield delta for token counting
277
- yield {
278
- type: 'tool_call_delta',
279
- delta: jsonDelta
280
- };
281
+ // Use currentToolUseId instead of event.index
282
+ if (currentToolUseId) {
283
+ const toolCall = toolCallsBuffer.get(currentToolUseId);
284
+ if (toolCall) {
285
+ toolCall.function.arguments += jsonDelta;
286
+ // Yield delta for token counting
287
+ yield {
288
+ type: 'tool_call_delta',
289
+ delta: jsonDelta
290
+ };
291
+ }
281
292
  }
282
293
  }
283
294
  }
295
+ else if (event.type === 'content_block_stop') {
296
+ // Reset current tool use ID when block ends
297
+ currentToolUseId = null;
298
+ }
284
299
  else if (event.type === 'message_start') {
285
- // Capture initial usage data
300
+ // Capture initial usage data (including cache metrics)
286
301
  if (event.message.usage) {
287
302
  usageData = {
288
303
  prompt_tokens: event.message.usage.input_tokens || 0,
289
304
  completion_tokens: event.message.usage.output_tokens || 0,
290
- total_tokens: (event.message.usage.input_tokens || 0) + (event.message.usage.output_tokens || 0)
305
+ total_tokens: (event.message.usage.input_tokens || 0) + (event.message.usage.output_tokens || 0),
306
+ cache_creation_input_tokens: event.message.usage.cache_creation_input_tokens,
307
+ cache_read_input_tokens: event.message.usage.cache_read_input_tokens
291
308
  };
292
309
  }
293
310
  }
294
311
  else if (event.type === 'message_delta') {
295
- // Update usage data with final token counts
312
+ // Update usage data with final token counts (including cache metrics)
296
313
  if (event.usage) {
297
314
  if (!usageData) {
298
315
  usageData = {
@@ -303,14 +320,32 @@ export async function* createStreamingAnthropicCompletion(options, abortSignal)
303
320
  }
304
321
  usageData.completion_tokens = event.usage.output_tokens || 0;
305
322
  usageData.total_tokens = usageData.prompt_tokens + usageData.completion_tokens;
323
+ // Update cache metrics if present
324
+ if (event.usage.cache_creation_input_tokens !== undefined) {
325
+ usageData.cache_creation_input_tokens = event.usage.cache_creation_input_tokens;
326
+ }
327
+ if (event.usage.cache_read_input_tokens !== undefined) {
328
+ usageData.cache_read_input_tokens = event.usage.cache_read_input_tokens;
329
+ }
306
330
  }
307
331
  }
308
332
  }
309
- // Yield tool calls if any
333
+ // Yield tool calls if any (only after stream completes)
310
334
  if (hasToolCalls && toolCallsBuffer.size > 0) {
335
+ // Validate that all tool call arguments are complete valid JSON
336
+ const toolCalls = Array.from(toolCallsBuffer.values());
337
+ for (const toolCall of toolCalls) {
338
+ try {
339
+ // Validate JSON completeness
340
+ JSON.parse(toolCall.function.arguments);
341
+ }
342
+ catch (e) {
343
+ throw new Error(`Incomplete tool call JSON for ${toolCall.function.name}: ${toolCall.function.arguments}`);
344
+ }
345
+ }
311
346
  yield {
312
347
  type: 'tool_calls',
313
- tool_calls: Array.from(toolCallsBuffer.values())
348
+ tool_calls: toolCalls
314
349
  };
315
350
  }
316
351
  // Yield usage information if available
@@ -69,6 +69,9 @@ export interface UsageInfo {
69
69
  prompt_tokens: number;
70
70
  completion_tokens: number;
71
71
  total_tokens: number;
72
+ cache_creation_input_tokens?: number;
73
+ cache_read_input_tokens?: number;
74
+ cached_tokens?: number;
72
75
  }
73
76
  export interface StreamChunk {
74
77
  type: 'content' | 'tool_calls' | 'tool_call_delta' | 'reasoning_delta' | 'done' | 'usage';
package/dist/api/chat.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import OpenAI from 'openai';
2
- import { getOpenAiConfig } from '../utils/apiConfig.js';
2
+ import { getOpenAiConfig, getCustomSystemPrompt } from '../utils/apiConfig.js';
3
3
  import { executeMCPTool } from '../utils/mcpToolsManager.js';
4
4
  import { SYSTEM_PROMPT } from './systemPrompt.js';
5
5
  /**
@@ -10,8 +10,7 @@ import { SYSTEM_PROMPT } from './systemPrompt.js';
10
10
  * 2. If no custom system prompt: use default as system
11
11
  */
12
12
  function convertToOpenAIMessages(messages, includeSystemPrompt = true) {
13
- const config = getOpenAiConfig();
14
- const customSystemPrompt = config.systemPrompt;
13
+ const customSystemPrompt = getCustomSystemPrompt();
15
14
  let result = messages.map(msg => {
16
15
  // 如果消息包含图片,使用 content 数组格式
17
16
  if (msg.role === 'user' && msg.images && msg.images.length > 0) {
@@ -276,7 +275,9 @@ export async function* createStreamingChatCompletion(options, abortSignal) {
276
275
  usageData = {
277
276
  prompt_tokens: usageValue.prompt_tokens || 0,
278
277
  completion_tokens: usageValue.completion_tokens || 0,
279
- total_tokens: usageValue.total_tokens || 0
278
+ total_tokens: usageValue.total_tokens || 0,
279
+ // OpenAI Chat API: cached_tokens in prompt_tokens_details
280
+ cached_tokens: usageValue.prompt_tokens_details?.cached_tokens
280
281
  };
281
282
  }
282
283
  // Skip content processing if no choices (but usage is already captured above)
@@ -10,6 +10,9 @@ export interface UsageInfo {
10
10
  prompt_tokens: number;
11
11
  completion_tokens: number;
12
12
  total_tokens: number;
13
+ cache_creation_input_tokens?: number;
14
+ cache_read_input_tokens?: number;
15
+ cached_tokens?: number;
13
16
  }
14
17
  export interface GeminiStreamChunk {
15
18
  type: 'content' | 'tool_calls' | 'tool_call_delta' | 'done' | 'usage';
@@ -1,5 +1,5 @@
1
- import { GoogleGenerativeAI } from '@google/generative-ai';
2
- import { getOpenAiConfig } from '../utils/apiConfig.js';
1
+ import { GoogleGenAI } from '@google/genai';
2
+ import { getOpenAiConfig, getCustomSystemPrompt } from '../utils/apiConfig.js';
3
3
  import { SYSTEM_PROMPT } from './systemPrompt.js';
4
4
  let geminiClient = null;
5
5
  function getGeminiClient() {
@@ -8,7 +8,20 @@ function getGeminiClient() {
8
8
  if (!config.apiKey) {
9
9
  throw new Error('Gemini API configuration is incomplete. Please configure API key first.');
10
10
  }
11
- geminiClient = new GoogleGenerativeAI(config.apiKey);
11
+ // Create client configuration
12
+ const clientConfig = {
13
+ apiKey: config.apiKey
14
+ };
15
+ // Support custom baseUrl and headers for proxy servers
16
+ if (config.baseUrl && config.baseUrl !== 'https://api.openai.com/v1') {
17
+ clientConfig.httpOptions = {
18
+ baseUrl: config.baseUrl,
19
+ headers: {
20
+ 'x-goog-api-key': config.apiKey, // Gemini API requires this header
21
+ }
22
+ };
23
+ }
24
+ geminiClient = new GoogleGenAI(clientConfig);
12
25
  }
13
26
  return geminiClient;
14
27
  }
@@ -26,10 +39,16 @@ function convertToolsToGemini(tools) {
26
39
  .filter(tool => tool.type === 'function' && 'function' in tool)
27
40
  .map(tool => {
28
41
  if (tool.type === 'function' && 'function' in tool) {
42
+ // Convert OpenAI parameters schema to Gemini format
43
+ const params = tool.function.parameters;
29
44
  return {
30
45
  name: tool.function.name,
31
46
  description: tool.function.description || '',
32
- parameters: tool.function.parameters
47
+ parametersJsonSchema: {
48
+ type: 'object',
49
+ properties: params.properties || {},
50
+ required: params.required || []
51
+ }
33
52
  };
34
53
  }
35
54
  throw new Error('Invalid tool format');
@@ -37,40 +56,110 @@ function convertToolsToGemini(tools) {
37
56
  return [{ functionDeclarations }];
38
57
  }
39
58
  /**
40
- * Convert our ChatMessage format to Gemini's Content format
41
- * Logic:
42
- * 1. If custom system prompt exists: use custom as systemInstruction, prepend default as first user message
43
- * 2. If no custom system prompt: use default as systemInstruction
59
+ * Convert our ChatMessage format to Gemini's format
44
60
  */
45
61
  function convertToGeminiMessages(messages) {
46
- const config = getOpenAiConfig();
47
- const customSystemPrompt = config.systemPrompt;
62
+ const customSystemPrompt = getCustomSystemPrompt();
48
63
  let systemInstruction;
49
64
  const contents = [];
50
- for (const msg of messages) {
65
+ for (let i = 0; i < messages.length; i++) {
66
+ const msg = messages[i];
67
+ if (!msg)
68
+ continue;
51
69
  // Extract system message as systemInstruction
52
70
  if (msg.role === 'system') {
53
71
  systemInstruction = msg.content;
54
72
  continue;
55
73
  }
56
- // Skip tool messages for now (Gemini handles them differently)
74
+ // Handle tool results
57
75
  if (msg.role === 'tool') {
58
- // Tool results in Gemini are represented as function response parts
59
- const parts = [{
60
- functionResponse: {
61
- name: 'function_name', // This should be mapped from tool_call_id
62
- response: {
63
- content: msg.content
76
+ // Find the corresponding function call to get the function name
77
+ // Look backwards in contents to find the matching tool call
78
+ let functionName = 'unknown_function';
79
+ for (let j = contents.length - 1; j >= 0; j--) {
80
+ const contentMsg = contents[j];
81
+ if (contentMsg.role === 'model' && contentMsg.parts) {
82
+ for (const part of contentMsg.parts) {
83
+ if (part.functionCall) {
84
+ functionName = part.functionCall.name;
85
+ break;
64
86
  }
65
87
  }
66
- }];
88
+ if (functionName !== 'unknown_function')
89
+ break;
90
+ }
91
+ }
92
+ // Tool response must be a valid object for Gemini API
93
+ // If content is a JSON string, parse it; otherwise wrap it in an object
94
+ let responseData;
95
+ if (!msg.content) {
96
+ responseData = {};
97
+ }
98
+ else {
99
+ let contentToParse = msg.content;
100
+ // Sometimes the content is double-encoded as JSON
101
+ // First, try to parse it once
102
+ try {
103
+ const firstParse = JSON.parse(contentToParse);
104
+ // If it's a string, it might be double-encoded, try parsing again
105
+ if (typeof firstParse === 'string') {
106
+ contentToParse = firstParse;
107
+ }
108
+ }
109
+ catch {
110
+ // Not JSON, use as-is
111
+ }
112
+ // Now parse or wrap the final content
113
+ try {
114
+ const parsed = JSON.parse(contentToParse);
115
+ // If parsed result is an object (not array, not null), use it directly
116
+ if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) {
117
+ responseData = parsed;
118
+ }
119
+ else {
120
+ // If it's a primitive, array, or null, wrap it
121
+ responseData = { content: parsed };
122
+ }
123
+ }
124
+ catch {
125
+ // Not valid JSON, wrap the raw string
126
+ responseData = { content: contentToParse };
127
+ }
128
+ }
67
129
  contents.push({
68
- role: 'function',
130
+ role: 'user',
131
+ parts: [{
132
+ functionResponse: {
133
+ name: functionName,
134
+ response: responseData
135
+ }
136
+ }]
137
+ });
138
+ continue;
139
+ }
140
+ // Handle tool calls in assistant messages
141
+ if (msg.role === 'assistant' && msg.tool_calls && msg.tool_calls.length > 0) {
142
+ const parts = [];
143
+ // Add text content if exists
144
+ if (msg.content) {
145
+ parts.push({ text: msg.content });
146
+ }
147
+ // Add function calls
148
+ for (const toolCall of msg.tool_calls) {
149
+ parts.push({
150
+ functionCall: {
151
+ name: toolCall.function.name,
152
+ args: JSON.parse(toolCall.function.arguments)
153
+ }
154
+ });
155
+ }
156
+ contents.push({
157
+ role: 'model',
69
158
  parts
70
159
  });
71
160
  continue;
72
161
  }
73
- // Convert user/assistant messages
162
+ // Build message parts
74
163
  const parts = [];
75
164
  // Add text content
76
165
  if (msg.content) {
@@ -79,7 +168,6 @@ function convertToGeminiMessages(messages) {
79
168
  // Add images for user messages
80
169
  if (msg.role === 'user' && msg.images && msg.images.length > 0) {
81
170
  for (const image of msg.images) {
82
- // Extract base64 data and mime type
83
171
  const base64Match = image.data.match(/^data:([^;]+);base64,(.+)$/);
84
172
  if (base64Match) {
85
173
  parts.push({
@@ -91,35 +179,20 @@ function convertToGeminiMessages(messages) {
91
179
  }
92
180
  }
93
181
  }
94
- // Handle tool calls in assistant messages
95
- if (msg.role === 'assistant' && msg.tool_calls && msg.tool_calls.length > 0) {
96
- for (const toolCall of msg.tool_calls) {
97
- parts.push({
98
- functionCall: {
99
- name: toolCall.function.name,
100
- args: JSON.parse(toolCall.function.arguments)
101
- }
102
- });
103
- }
104
- }
105
- // Map role (Gemini uses 'user' and 'model' instead of 'user' and 'assistant')
182
+ // Add to contents
106
183
  const role = msg.role === 'assistant' ? 'model' : 'user';
107
- contents.push({
108
- role,
109
- parts
110
- });
184
+ contents.push({ role, parts });
111
185
  }
112
- // 如果配置了自定义系统提示词
186
+ // Handle system instruction
113
187
  if (customSystemPrompt) {
114
- // 自定义系统提示词作为 systemInstruction,默认系统提示词作为第一条用户消息
115
188
  systemInstruction = customSystemPrompt;
189
+ // Prepend default system prompt as first user message
116
190
  contents.unshift({
117
191
  role: 'user',
118
192
  parts: [{ text: SYSTEM_PROMPT }]
119
193
  });
120
194
  }
121
195
  else if (!systemInstruction) {
122
- // 没有自定义系统提示词,默认系统提示词作为 systemInstruction
123
196
  systemInstruction = SYSTEM_PROMPT;
124
197
  }
125
198
  return { systemInstruction, contents };
@@ -129,83 +202,79 @@ function convertToGeminiMessages(messages) {
129
202
  */
130
203
  export async function* createStreamingGeminiCompletion(options, abortSignal) {
131
204
  const client = getGeminiClient();
132
- const config = getOpenAiConfig();
133
205
  try {
134
206
  const { systemInstruction, contents } = convertToGeminiMessages(options.messages);
135
- // Initialize the model with optional custom baseUrl
136
- // Note: For Gemini API, baseUrl should be in format: https://your-proxy.com/v1beta
137
- // Default is: https://generativelanguage.googleapis.com/v1beta
138
- const modelConfig = {
207
+ // Build request config
208
+ const requestConfig = {
139
209
  model: options.model,
140
- systemInstruction,
141
- tools: convertToolsToGemini(options.tools),
142
- generationConfig: {
210
+ contents,
211
+ config: {
212
+ systemInstruction,
143
213
  temperature: options.temperature ?? 0.7,
144
214
  }
145
215
  };
146
- // Support custom baseUrl for proxy servers
147
- const requestOptions = {};
148
- if (config.baseUrl && config.baseUrl !== 'https://api.openai.com/v1') {
149
- // Only set custom baseUrl if it's not the default OpenAI URL
150
- requestOptions.baseUrl = config.baseUrl;
151
- }
152
- const model = client.getGenerativeModel(modelConfig, requestOptions);
153
- // Start chat session
154
- const chat = model.startChat({
155
- history: contents.slice(0, -1), // All messages except the last one
156
- });
157
- // Get the last user message
158
- const lastMessage = contents[contents.length - 1];
159
- if (!lastMessage) {
160
- throw new Error('No user message found');
216
+ // Add tools if provided
217
+ const geminiTools = convertToolsToGemini(options.tools);
218
+ if (geminiTools) {
219
+ requestConfig.config.tools = geminiTools;
161
220
  }
162
221
  // Stream the response
163
- const result = await chat.sendMessageStream(lastMessage.parts);
222
+ const stream = await client.models.generateContentStream(requestConfig);
164
223
  let contentBuffer = '';
165
224
  let toolCallsBuffer = [];
166
225
  let hasToolCalls = false;
167
226
  let toolCallIndex = 0;
168
- for await (const chunk of result.stream) {
227
+ let totalTokens = { prompt: 0, completion: 0, total: 0 };
228
+ // Save original console.warn to suppress SDK warnings
229
+ const originalWarn = console.warn;
230
+ console.warn = () => { }; // Suppress "there are non-text parts" warnings
231
+ for await (const chunk of stream) {
169
232
  if (abortSignal?.aborted) {
233
+ console.warn = originalWarn; // Restore console.warn
170
234
  return;
171
235
  }
172
- const candidate = chunk.candidates?.[0];
173
- if (!candidate)
174
- continue;
175
236
  // Process text content
176
- const text = chunk.text();
177
- if (text) {
178
- contentBuffer += text;
237
+ if (chunk.text) {
238
+ contentBuffer += chunk.text;
179
239
  yield {
180
240
  type: 'content',
181
- content: text
241
+ content: chunk.text
182
242
  };
183
243
  }
184
- // Process function calls (tool calls)
185
- const functionCalls = candidate.content?.parts?.filter(part => 'functionCall' in part);
186
- if (functionCalls && functionCalls.length > 0) {
244
+ // Process function calls using the official API
245
+ if (chunk.functionCalls && chunk.functionCalls.length > 0) {
187
246
  hasToolCalls = true;
188
- for (const fc of functionCalls) {
189
- if ('functionCall' in fc && fc.functionCall) {
190
- const toolCall = {
191
- id: `call_${toolCallIndex++}`,
192
- type: 'function',
193
- function: {
194
- name: fc.functionCall.name,
195
- arguments: JSON.stringify(fc.functionCall.args)
196
- }
197
- };
198
- toolCallsBuffer.push(toolCall);
199
- // Yield delta for token counting
200
- const deltaText = fc.functionCall.name + JSON.stringify(fc.functionCall.args);
201
- yield {
202
- type: 'tool_call_delta',
203
- delta: deltaText
204
- };
205
- }
247
+ for (const fc of chunk.functionCalls) {
248
+ if (!fc.name)
249
+ continue;
250
+ const toolCall = {
251
+ id: `call_${toolCallIndex++}`,
252
+ type: 'function',
253
+ function: {
254
+ name: fc.name,
255
+ arguments: JSON.stringify(fc.args)
256
+ }
257
+ };
258
+ toolCallsBuffer.push(toolCall);
259
+ // Yield delta for token counting
260
+ const deltaText = fc.name + JSON.stringify(fc.args);
261
+ yield {
262
+ type: 'tool_call_delta',
263
+ delta: deltaText
264
+ };
206
265
  }
207
266
  }
267
+ // Track usage info
268
+ if (chunk.usageMetadata) {
269
+ totalTokens = {
270
+ prompt: chunk.usageMetadata.promptTokenCount || 0,
271
+ completion: chunk.usageMetadata.candidatesTokenCount || 0,
272
+ total: chunk.usageMetadata.totalTokenCount || 0
273
+ };
274
+ }
208
275
  }
276
+ // Restore console.warn
277
+ console.warn = originalWarn;
209
278
  // Yield tool calls if any
210
279
  if (hasToolCalls && toolCallsBuffer.length > 0) {
211
280
  yield {
@@ -213,16 +282,14 @@ export async function* createStreamingGeminiCompletion(options, abortSignal) {
213
282
  tool_calls: toolCallsBuffer
214
283
  };
215
284
  }
216
- // Get final response for usage info
217
- const finalResponse = await result.response;
218
- const usageMetadata = finalResponse.usageMetadata;
219
- if (usageMetadata) {
285
+ // Yield usage info
286
+ if (totalTokens.total > 0) {
220
287
  yield {
221
288
  type: 'usage',
222
289
  usage: {
223
- prompt_tokens: usageMetadata.promptTokenCount || 0,
224
- completion_tokens: usageMetadata.candidatesTokenCount || 0,
225
- total_tokens: usageMetadata.totalTokenCount || 0
290
+ prompt_tokens: totalTokens.prompt,
291
+ completion_tokens: totalTokens.completion,
292
+ total_tokens: totalTokens.total
226
293
  }
227
294
  };
228
295
  }
@@ -26,6 +26,9 @@ export interface UsageInfo {
26
26
  prompt_tokens: number;
27
27
  completion_tokens: number;
28
28
  total_tokens: number;
29
+ cache_creation_input_tokens?: number;
30
+ cache_read_input_tokens?: number;
31
+ cached_tokens?: number;
29
32
  }
30
33
  export interface ResponseStreamChunk {
31
34
  type: 'content' | 'tool_calls' | 'tool_call_delta' | 'reasoning_delta' | 'done' | 'usage';
@@ -1,5 +1,5 @@
1
1
  import OpenAI from 'openai';
2
- import { getOpenAiConfig } from '../utils/apiConfig.js';
2
+ import { getOpenAiConfig, getCustomSystemPrompt } from '../utils/apiConfig.js';
3
3
  import { executeMCPTool } from '../utils/mcpToolsManager.js';
4
4
  import { SYSTEM_PROMPT } from './systemPrompt.js';
5
5
  /**
@@ -83,8 +83,7 @@ export function resetOpenAIClient() {
83
83
  * 2. If no custom system prompt: use default as instructions
84
84
  */
85
85
  function convertToResponseInput(messages) {
86
- const config = getOpenAiConfig();
87
- const customSystemPrompt = config.systemPrompt;
86
+ const customSystemPrompt = getCustomSystemPrompt();
88
87
  const result = [];
89
88
  for (const msg of messages) {
90
89
  if (!msg)
@@ -410,7 +409,9 @@ export async function* createStreamingResponse(options, abortSignal) {
410
409
  usageData = {
411
410
  prompt_tokens: chunk.response.usage.input_tokens || 0,
412
411
  completion_tokens: chunk.response.usage.output_tokens || 0,
413
- total_tokens: chunk.response.usage.total_tokens || 0
412
+ total_tokens: chunk.response.usage.total_tokens || 0,
413
+ // OpenAI Responses API: cached_tokens in input_tokens_details (note: tokenS)
414
+ cached_tokens: chunk.response.usage.input_tokens_details?.cached_tokens
414
415
  };
415
416
  }
416
417
  break;
@@ -16,6 +16,9 @@ type Props = {
16
16
  contextUsage?: {
17
17
  inputTokens: number;
18
18
  maxContextTokens: number;
19
+ cacheCreationTokens?: number;
20
+ cacheReadTokens?: number;
21
+ cachedTokens?: number;
19
22
  };
20
23
  snapshotFileCount?: Map<number, number>;
21
24
  };
@@ -647,7 +647,15 @@ end try'`;
647
647
  React.createElement(Text, { color: "yellow", dimColor: true }, "\u2741 YOLO MODE ACTIVE - All tools will be auto-approved without confirmation"))),
648
648
  contextUsage && (React.createElement(Box, { marginTop: 1, paddingX: 1 },
649
649
  React.createElement(Text, { color: "gray", dimColor: true }, (() => {
650
- const percentage = Math.min(100, (contextUsage.inputTokens / contextUsage.maxContextTokens) * 100);
650
+ // Determine which caching system is being used
651
+ const isAnthropic = (contextUsage.cacheCreationTokens || 0) > 0 || (contextUsage.cacheReadTokens || 0) > 0;
652
+ const isOpenAI = (contextUsage.cachedTokens || 0) > 0;
653
+ // For Anthropic: Total = inputTokens + cacheCreationTokens
654
+ // For OpenAI: Total = inputTokens (cachedTokens are already included in inputTokens)
655
+ const totalInputTokens = isAnthropic
656
+ ? contextUsage.inputTokens + (contextUsage.cacheCreationTokens || 0)
657
+ : contextUsage.inputTokens;
658
+ const percentage = Math.min(100, (totalInputTokens / contextUsage.maxContextTokens) * 100);
651
659
  let color;
652
660
  if (percentage < 50)
653
661
  color = 'green';
@@ -662,13 +670,32 @@ end try'`;
662
670
  return `${(num / 1000).toFixed(1)}k`;
663
671
  return num.toString();
664
672
  };
673
+ const hasCacheMetrics = isAnthropic || isOpenAI;
665
674
  return (React.createElement(React.Fragment, null,
666
675
  React.createElement(Text, { color: color },
667
676
  percentage.toFixed(1),
668
677
  "%"),
669
678
  React.createElement(Text, null, " \u00B7 "),
670
- React.createElement(Text, { color: color }, formatNumber(contextUsage.inputTokens)),
671
- React.createElement(Text, null, " tokens")));
679
+ React.createElement(Text, { color: color }, formatNumber(totalInputTokens)),
680
+ React.createElement(Text, null, " tokens"),
681
+ hasCacheMetrics && (React.createElement(React.Fragment, null,
682
+ React.createElement(Text, null, " \u00B7 "),
683
+ isAnthropic && (React.createElement(React.Fragment, null,
684
+ (contextUsage.cacheReadTokens || 0) > 0 && (React.createElement(React.Fragment, null,
685
+ React.createElement(Text, { color: "cyan" },
686
+ "\u21AF ",
687
+ formatNumber(contextUsage.cacheReadTokens || 0),
688
+ " cached"))),
689
+ (contextUsage.cacheCreationTokens || 0) > 0 && (React.createElement(React.Fragment, null,
690
+ (contextUsage.cacheReadTokens || 0) > 0 && React.createElement(Text, null, " \u00B7 "),
691
+ React.createElement(Text, { color: "magenta" },
692
+ "\u25C6 ",
693
+ formatNumber(contextUsage.cacheCreationTokens || 0),
694
+ " new cache"))))),
695
+ isOpenAI && (React.createElement(Text, { color: "cyan" },
696
+ "\u21AF ",
697
+ formatNumber(contextUsage.cachedTokens || 0),
698
+ " cached"))))));
672
699
  })()))),
673
700
  React.createElement(Box, { marginTop: 1 },
674
701
  React.createElement(Text, { color: "gray", dimColor: true }, showCommands && getFilteredCommands().length > 0
@@ -728,7 +728,10 @@ export default function ChatScreen({}) {
728
728
  !pendingToolConfirmation && !isCompressing && !showSessionPanel && !showMcpPanel && !pendingRollback && (React.createElement(React.Fragment, null,
729
729
  React.createElement(ChatInput, { onSubmit: handleMessageSubmit, onCommand: handleCommandExecution, placeholder: "Ask me anything about coding...", disabled: !!pendingToolConfirmation, chatHistory: messages, onHistorySelect: handleHistorySelect, yoloMode: yoloMode, contextUsage: contextUsage ? {
730
730
  inputTokens: contextUsage.prompt_tokens,
731
- maxContextTokens: getOpenAiConfig().maxContextTokens || 4000
731
+ maxContextTokens: getOpenAiConfig().maxContextTokens || 4000,
732
+ cacheCreationTokens: contextUsage.cache_creation_input_tokens,
733
+ cacheReadTokens: contextUsage.cache_read_input_tokens,
734
+ cachedTokens: contextUsage.cached_tokens
732
735
  } : undefined, snapshotFileCount: snapshotFileCount }),
733
736
  vscodeConnectionStatus !== 'disconnected' && (React.createElement(Box, { marginTop: 1 },
734
737
  React.createElement(Text, { color: vscodeConnectionStatus === 'connecting' ? 'yellow' :
@@ -4,7 +4,6 @@ import { spawn } from 'child_process';
4
4
  import { writeFileSync, readFileSync, existsSync, mkdirSync } from 'fs';
5
5
  import { join } from 'path';
6
6
  import { homedir, platform } from 'os';
7
- import { getOpenAiConfig, updateOpenAiConfig, } from '../../utils/apiConfig.js';
8
7
  import { SYSTEM_PROMPT } from '../../api/systemPrompt.js';
9
8
  const CONFIG_DIR = join(homedir(), '.snow');
10
9
  const SYSTEM_PROMPT_FILE = join(CONFIG_DIR, 'system-prompt.txt');
@@ -24,9 +23,17 @@ export default function SystemPromptConfigScreen({ onBack }) {
24
23
  useEffect(() => {
25
24
  const openEditor = async () => {
26
25
  ensureConfigDirectory();
27
- // 读取当前配置的自定义系统提示词,如果为空则使用默认系统提示词
28
- const config = getOpenAiConfig();
29
- const currentPrompt = config.systemPrompt || SYSTEM_PROMPT;
26
+ // 读取系统提示词文件,如果不存在则使用默认系统提示词
27
+ let currentPrompt = SYSTEM_PROMPT;
28
+ if (existsSync(SYSTEM_PROMPT_FILE)) {
29
+ try {
30
+ currentPrompt = readFileSync(SYSTEM_PROMPT_FILE, 'utf8');
31
+ }
32
+ catch {
33
+ // 读取失败,使用默认
34
+ currentPrompt = SYSTEM_PROMPT;
35
+ }
36
+ }
30
37
  // 写入临时文件供编辑
31
38
  writeFileSync(SYSTEM_PROMPT_FILE, currentPrompt, 'utf8');
32
39
  const editor = getSystemEditor();
@@ -39,22 +46,28 @@ export default function SystemPromptConfigScreen({ onBack }) {
39
46
  if (existsSync(SYSTEM_PROMPT_FILE)) {
40
47
  try {
41
48
  const editedContent = readFileSync(SYSTEM_PROMPT_FILE, 'utf8');
42
- // 如果编辑后的内容为空或与默认提示词相同,则保存为空(使用默认)
43
- // 否则保存自定义提示词
44
49
  const trimmedContent = editedContent.trim();
45
50
  if (trimmedContent === '' || trimmedContent === SYSTEM_PROMPT.trim()) {
46
- // 保存为空,表示使用默认提示词
47
- updateOpenAiConfig({ systemPrompt: undefined });
48
- console.log('System prompt reset to default. Please use `snow` to restart!');
51
+ // 内容为空或与默认相同,删除文件,使用默认提示词
52
+ try {
53
+ const fs = require('fs');
54
+ fs.unlinkSync(SYSTEM_PROMPT_FILE);
55
+ console.log('System prompt reset to default. Please use `snow` to restart!');
56
+ }
57
+ catch {
58
+ // 删除失败,保存空内容
59
+ writeFileSync(SYSTEM_PROMPT_FILE, '', 'utf8');
60
+ console.log('System prompt reset to default. Please use `snow` to restart!');
61
+ }
49
62
  }
50
63
  else {
51
- // 保存自定义提示词
52
- updateOpenAiConfig({ systemPrompt: editedContent });
64
+ // 保存自定义提示词到文件
65
+ writeFileSync(SYSTEM_PROMPT_FILE, editedContent, 'utf8');
53
66
  console.log('Custom system prompt saved successfully! Please use `snow` to restart!');
54
67
  }
55
68
  }
56
69
  catch (error) {
57
- console.error('Failed to read edited content:', error instanceof Error ? error.message : 'Unknown error');
70
+ console.error('Failed to save system prompt:', error instanceof Error ? error.message : 'Unknown error');
58
71
  }
59
72
  }
60
73
  process.exit(0);
@@ -14,7 +14,6 @@ export interface ApiConfig {
14
14
  maxTokens?: number;
15
15
  compactModel?: CompactModelConfig;
16
16
  anthropicBeta?: boolean;
17
- systemPrompt?: string;
18
17
  }
19
18
  export interface MCPServer {
20
19
  url?: string;
@@ -37,3 +36,9 @@ export declare function validateApiConfig(config: Partial<ApiConfig>): string[];
37
36
  export declare function updateMCPConfig(mcpConfig: MCPConfig): void;
38
37
  export declare function getMCPConfig(): MCPConfig;
39
38
  export declare function validateMCPConfig(config: Partial<MCPConfig>): string[];
39
+ /**
40
+ * 读取自定义系统提示词
41
+ * 如果 system-prompt.txt 文件存在且不为空,返回其内容
42
+ * 否则返回 undefined (使用默认系统提示词)
43
+ */
44
+ export declare function getCustomSystemPrompt(): string | undefined;
@@ -17,6 +17,7 @@ const DEFAULT_MCP_CONFIG = {
17
17
  mcpServers: {},
18
18
  };
19
19
  const CONFIG_DIR = join(homedir(), '.snow');
20
+ const SYSTEM_PROMPT_FILE = join(CONFIG_DIR, 'system-prompt.txt');
20
21
  function normalizeRequestMethod(method) {
21
22
  if (method === 'chat' || method === 'responses' || method === 'gemini' || method === 'anthropic') {
22
23
  return method;
@@ -192,3 +193,26 @@ export function validateMCPConfig(config) {
192
193
  }
193
194
  return errors;
194
195
  }
196
+ /**
197
+ * 读取自定义系统提示词
198
+ * 如果 system-prompt.txt 文件存在且不为空,返回其内容
199
+ * 否则返回 undefined (使用默认系统提示词)
200
+ */
201
+ export function getCustomSystemPrompt() {
202
+ ensureConfigDirectory();
203
+ if (!existsSync(SYSTEM_PROMPT_FILE)) {
204
+ return undefined;
205
+ }
206
+ try {
207
+ const content = readFileSync(SYSTEM_PROMPT_FILE, 'utf8');
208
+ // 只有当文件完全为空时才返回 undefined
209
+ if (content.length === 0) {
210
+ return undefined;
211
+ }
212
+ // 返回原始内容,不做任何处理
213
+ return content;
214
+ }
215
+ catch {
216
+ return undefined;
217
+ }
218
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "snow-ai",
3
- "version": "0.2.11",
3
+ "version": "0.2.12",
4
4
  "description": "Intelligent Command Line Assistant powered by AI",
5
5
  "license": "MIT",
6
6
  "bin": {
@@ -40,7 +40,7 @@
40
40
  ],
41
41
  "dependencies": {
42
42
  "@anthropic-ai/sdk": "^0.65.0",
43
- "@google/generative-ai": "^0.24.1",
43
+ "@google/genai": "^1.23.0",
44
44
  "@inkjs/ui": "^2.0.0",
45
45
  "@modelcontextprotocol/sdk": "^1.17.3",
46
46
  "chalk-template": "^1.1.2",
package/readme.md CHANGED
@@ -1,6 +1,7 @@
1
1
  # snow-ai
2
2
 
3
- > This readme is automatically generated by [create-ink-app](https://github.com/vadimdemedes/create-ink-app)
3
+ English | [中文](readme_zh.md)
4
+
4
5
 
5
6
  ## Install
6
7
 
@@ -21,15 +22,17 @@ $ snow --update
21
22
  ## Config example `./User/.snow/config.json`
22
23
  ```json
23
24
  {
24
- "openai": {
25
- "baseUrl": "https://api.openai.com/v1",
25
+ "snowcfg": {
26
+ "baseUrl": "https://api.openai.com/v1",//Gemini:https://generativelanguage.googleapis.com Anthropic:https://api.anthropic.com
26
27
  "apiKey": "your-api-key",
27
28
  "requestMethod": "responses",
28
29
  "advancedModel": "gpt-5-codex",
29
30
  "basicModel": "gpt-5-codex",
30
- "maxContextTokens": 200000,
31
+ "maxContextTokens": 32000, //The maximum context length of the model
32
+ "maxTokens": 4096, // The maximum generation length of the model
33
+ "anthropicBeta": false,
31
34
  "compactModel": {
32
- "baseUrl": "https://api.openai.com/v1",
35
+ "baseUrl": "https://api.opeai.com/v1",
33
36
  "apiKey": "your-api-key",
34
37
  "modelName": "gpt-4.1-mini"
35
38
  }
@@ -56,7 +59,24 @@ $ npm uninstall --global snow-ai
56
59
  * **Agent**
57
60
 
58
61
  ![alt text](image-1.png)
62
+ * In the middle of the conversation: click ESC to stop AI generation
63
+
64
+ * When mounting: double-click ESC, view the dialogue recorder, select rollback, including file checkpoints
59
65
 
60
66
  * **Commands**
61
67
 
62
- ![alt text](image-2.png)
68
+ ![alt text](image-2.png)
69
+ - /clear —— Create a new session
70
+
71
+ - /resume - The recovery history has
72
+
73
+ - /mcp - Check the status of MCP service
74
+
75
+ - /yolo - Unattended mode, all tools automatically agree to execute
76
+
77
+ - /init - Initialize the project and generate the SNOW.md description document
78
+
79
+ - /ide - Connect to VSCode, you need to install the plug-in
80
+
81
+ - /compact - compress the context into a sentence
82
+