snow-ai 0.3.9 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,13 @@
1
1
  import type { Message } from '../ui/components/MessageList.js';
2
2
  import type { UsageInfo } from '../api/chat.js';
3
+ /**
4
+ * 执行上下文压缩
5
+ * @returns 返回压缩后的UI消息列表和token使用信息,如果失败返回null
6
+ */
7
+ export declare function executeContextCompression(): Promise<{
8
+ uiMessages: Message[];
9
+ usage: UsageInfo;
10
+ } | null>;
3
11
  type CommandHandlerOptions = {
4
12
  messages: Message[];
5
13
  setMessages: React.Dispatch<React.SetStateAction<Message[]>>;
@@ -6,6 +6,105 @@ import { navigateTo } from './useGlobalNavigation.js';
6
6
  import { resetTerminal } from '../utils/terminal.js';
7
7
  import { showSaveDialog, isFileDialogSupported } from '../utils/fileDialog.js';
8
8
  import { exportMessagesToFile } from '../utils/chatExporter.js';
9
+ /**
10
+ * 执行上下文压缩
11
+ * @returns 返回压缩后的UI消息列表和token使用信息,如果失败返回null
12
+ */
13
+ export async function executeContextCompression() {
14
+ try {
15
+ // 从会话文件读取真实的消息记录
16
+ const currentSession = sessionManager.getCurrentSession();
17
+ if (!currentSession || currentSession.messages.length === 0) {
18
+ throw new Error('No active session or no messages to compress');
19
+ }
20
+ // 使用会话文件中的消息进行压缩(这是真实的对话记录)
21
+ const sessionMessages = currentSession.messages;
22
+ // 转换为 ChatMessage 格式(保留所有关键字段)
23
+ const chatMessages = sessionMessages.map(msg => ({
24
+ role: msg.role,
25
+ content: msg.content,
26
+ tool_call_id: msg.tool_call_id,
27
+ tool_calls: msg.tool_calls,
28
+ images: msg.images,
29
+ reasoning: msg.reasoning,
30
+ subAgentInternal: msg.subAgentInternal,
31
+ }));
32
+ // Compress the context (全量压缩,保留最后一轮完整对话)
33
+ const compressionResult = await compressContext(chatMessages);
34
+ // 如果返回null,说明无法安全压缩(历史不足或只有当前轮次)
35
+ if (!compressionResult) {
36
+ console.warn('Compression skipped: not enough history to compress');
37
+ return null;
38
+ }
39
+ // 构建新的会话消息列表
40
+ const newSessionMessages = [];
41
+ // 添加压缩摘要到会话
42
+ newSessionMessages.push({
43
+ role: 'assistant',
44
+ content: compressionResult.summary,
45
+ timestamp: Date.now(),
46
+ });
47
+ // 添加保留的最后一轮完整对话(保留完整的消息结构)
48
+ if (compressionResult.preservedMessages && compressionResult.preservedMessages.length > 0) {
49
+ for (const msg of compressionResult.preservedMessages) {
50
+ // 保留完整的消息结构,包括所有关键字段
51
+ newSessionMessages.push({
52
+ role: msg.role,
53
+ content: msg.content,
54
+ timestamp: Date.now(),
55
+ ...(msg.tool_call_id && { tool_call_id: msg.tool_call_id }),
56
+ ...(msg.tool_calls && { tool_calls: msg.tool_calls }),
57
+ ...(msg.images && { images: msg.images }),
58
+ ...(msg.reasoning && { reasoning: msg.reasoning }),
59
+ ...(msg.subAgentInternal !== undefined && { subAgentInternal: msg.subAgentInternal }),
60
+ });
61
+ }
62
+ }
63
+ // 更新当前会话的消息(不新建会话)
64
+ currentSession.messages = newSessionMessages;
65
+ currentSession.messageCount = newSessionMessages.length;
66
+ currentSession.updatedAt = Date.now();
67
+ // 保存更新后的会话文件
68
+ await sessionManager.saveSession(currentSession);
69
+ // 同步更新UI消息列表:从会话消息转换为UI Message格式
70
+ const newUIMessages = [];
71
+ for (const sessionMsg of newSessionMessages) {
72
+ // 跳过 tool 角色的消息(工具执行结果),避免UI显示大量JSON
73
+ if (sessionMsg.role === 'tool') {
74
+ continue;
75
+ }
76
+ const uiMessage = {
77
+ role: sessionMsg.role,
78
+ content: sessionMsg.content,
79
+ streaming: false,
80
+ };
81
+ // 如果有 tool_calls,显示工具调用信息(但不显示详细参数)
82
+ if (sessionMsg.tool_calls && sessionMsg.tool_calls.length > 0) {
83
+ // 在内容中添加简洁的工具调用摘要
84
+ const toolSummary = sessionMsg.tool_calls
85
+ .map((tc) => `[Tool: ${tc.function.name}]`)
86
+ .join(', ');
87
+ // 如果内容为空或很短,显示工具调用摘要
88
+ if (!uiMessage.content || uiMessage.content.length < 10) {
89
+ uiMessage.content = toolSummary;
90
+ }
91
+ }
92
+ newUIMessages.push(uiMessage);
93
+ }
94
+ return {
95
+ uiMessages: newUIMessages,
96
+ usage: {
97
+ prompt_tokens: compressionResult.usage.prompt_tokens,
98
+ completion_tokens: compressionResult.usage.completion_tokens,
99
+ total_tokens: compressionResult.usage.total_tokens,
100
+ },
101
+ };
102
+ }
103
+ catch (error) {
104
+ console.error('Context compression failed:', error);
105
+ return null;
106
+ }
107
+ }
9
108
  export function useCommandHandler(options) {
10
109
  const { stdout } = useStdout();
11
110
  const handleCommandExecution = useCallback(async (commandName, result) => {
@@ -17,44 +116,19 @@ export function useCommandHandler(options) {
17
116
  options.setIsCompressing(true);
18
117
  options.setCompressionError(null);
19
118
  try {
20
- // Convert messages to ChatMessage format for compression
21
- const chatMessages = options.messages
22
- .filter(msg => msg.role !== 'command')
23
- .map(msg => ({
24
- role: msg.role,
25
- content: msg.content,
26
- tool_call_id: msg.toolCallId,
27
- }));
28
- // Compress the context
29
- const result = await compressContext(chatMessages);
30
- // Replace all messages with a summary message (不包含 "Context Compressed" 标题)
31
- const summaryMessage = {
32
- role: 'assistant',
33
- content: result.summary,
34
- streaming: false,
35
- };
36
- // Clear session and create new session with compressed summary
37
- sessionManager.clearCurrentSession();
38
- const newSession = await sessionManager.createNewSession();
39
- // Save the summary message to the new session so it's included in next API call
40
- if (newSession) {
41
- await sessionManager.addMessage({
42
- role: 'assistant',
43
- content: result.summary,
44
- timestamp: Date.now(),
45
- });
119
+ // 使用提取的压缩函数
120
+ const compressionResult = await executeContextCompression();
121
+ if (!compressionResult) {
122
+ throw new Error('Compression failed');
46
123
  }
124
+ // 更新UI
47
125
  options.clearSavedMessages();
48
- options.setMessages([summaryMessage]);
126
+ options.setMessages(compressionResult.uiMessages);
49
127
  options.setRemountKey(prev => prev + 1);
50
128
  // Reset system info flag to include in next message
51
129
  options.setShouldIncludeSystemInfo(true);
52
130
  // Update token usage with compression result
53
- options.setContextUsage({
54
- prompt_tokens: result.usage.prompt_tokens,
55
- completion_tokens: result.usage.completion_tokens,
56
- total_tokens: result.usage.total_tokens,
57
- });
131
+ options.setContextUsage(compressionResult.usage);
58
132
  }
59
133
  catch (error) {
60
134
  // Show error message
@@ -34,6 +34,10 @@ export type ConversationHandlerOptions = {
34
34
  remainingSeconds?: number;
35
35
  errorMessage?: string;
36
36
  } | null>>;
37
+ clearSavedMessages?: () => void;
38
+ setRemountKey?: React.Dispatch<React.SetStateAction<number>>;
39
+ setShouldIncludeSystemInfo?: React.Dispatch<React.SetStateAction<boolean>>;
40
+ getCurrentContextPercentage?: () => number;
37
41
  };
38
42
  /**
39
43
  * Handle conversation with streaming and tool calls
@@ -12,6 +12,7 @@ import { formatTodoContext } from '../utils/todoPreprocessor.js';
12
12
  import { formatToolCallMessage } from '../utils/messageFormatter.js';
13
13
  import { resourceMonitor } from '../utils/resourceMonitor.js';
14
14
  import { isToolNeedTwoStepDisplay } from '../utils/toolDisplayConfig.js';
15
+ import { shouldAutoCompress, performAutoCompression, } from '../utils/autoCompress.js';
15
16
  /**
16
17
  * Handle conversation with streaming and tool calls
17
18
  * Returns the usage data collected during the conversation
@@ -234,6 +235,7 @@ export async function handleConversationWithTools(options) {
234
235
  total_tokens: chunk.usage.total_tokens || 0,
235
236
  cache_creation_input_tokens: chunk.usage.cache_creation_input_tokens,
236
237
  cache_read_input_tokens: chunk.usage.cache_read_input_tokens,
238
+ cached_tokens: chunk.usage.cached_tokens,
237
239
  };
238
240
  }
239
241
  else {
@@ -252,6 +254,10 @@ export async function handleConversationWithTools(options) {
252
254
  (accumulatedUsage.cache_read_input_tokens || 0) +
253
255
  chunk.usage.cache_read_input_tokens;
254
256
  }
257
+ if (chunk.usage.cached_tokens !== undefined) {
258
+ accumulatedUsage.cached_tokens =
259
+ (accumulatedUsage.cached_tokens || 0) + chunk.usage.cached_tokens;
260
+ }
255
261
  }
256
262
  }
257
263
  }
@@ -552,6 +558,44 @@ export async function handleConversationWithTools(options) {
552
558
  freeEncoder();
553
559
  break;
554
560
  }
561
+ // 在工具执行完成后、发送结果到AI前,检查是否需要压缩
562
+ if (options.getCurrentContextPercentage &&
563
+ shouldAutoCompress(options.getCurrentContextPercentage())) {
564
+ try {
565
+ // 显示压缩提示消息
566
+ const compressingMessage = {
567
+ role: 'assistant',
568
+ content: '✵ Auto-compressing context before sending tool results...',
569
+ streaming: false,
570
+ };
571
+ setMessages(prev => [...prev, compressingMessage]);
572
+ const compressionResult = await performAutoCompression();
573
+ if (compressionResult && options.clearSavedMessages) {
574
+ // 更新UI和token使用情况
575
+ options.clearSavedMessages();
576
+ setMessages(compressionResult.uiMessages);
577
+ if (options.setRemountKey) {
578
+ options.setRemountKey(prev => prev + 1);
579
+ }
580
+ options.setContextUsage(compressionResult.usage);
581
+ if (options.setShouldIncludeSystemInfo) {
582
+ options.setShouldIncludeSystemInfo(true);
583
+ }
584
+ // 更新累计的usage为压缩后的usage
585
+ accumulatedUsage = compressionResult.usage;
586
+ // 压缩后需要重新构建conversationMessages
587
+ conversationMessages = [];
588
+ const session = sessionManager.getCurrentSession();
589
+ if (session && session.messages.length > 0) {
590
+ conversationMessages.push(...session.messages);
591
+ }
592
+ }
593
+ }
594
+ catch (error) {
595
+ console.error('Auto-compression after tool execution failed:', error);
596
+ // 即使压缩失败也继续处理工具结果
597
+ }
598
+ }
555
599
  // Check if there are TODO related tool calls, if yes refresh TODO list
556
600
  const hasTodoTools = approvedTools.some(t => t.function.name.startsWith('todo-'));
557
601
  const hasTodoUpdateTools = approvedTools.some(t => t.function.name === 'todo-update');
@@ -683,6 +727,44 @@ export async function handleConversationWithTools(options) {
683
727
  if (options.getPendingMessages && options.clearPendingMessages) {
684
728
  const pendingMessages = options.getPendingMessages();
685
729
  if (pendingMessages.length > 0) {
730
+ // 检查 token 占用,如果 >= 80% 先执行自动压缩
731
+ if (options.getCurrentContextPercentage &&
732
+ shouldAutoCompress(options.getCurrentContextPercentage())) {
733
+ try {
734
+ // 显示压缩提示消息
735
+ const compressingMessage = {
736
+ role: 'assistant',
737
+ content: '✵ Auto-compressing context before processing pending messages...',
738
+ streaming: false,
739
+ };
740
+ setMessages(prev => [...prev, compressingMessage]);
741
+ const compressionResult = await performAutoCompression();
742
+ if (compressionResult && options.clearSavedMessages) {
743
+ // 更新UI和token使用情况
744
+ options.clearSavedMessages();
745
+ setMessages(compressionResult.uiMessages);
746
+ if (options.setRemountKey) {
747
+ options.setRemountKey(prev => prev + 1);
748
+ }
749
+ options.setContextUsage(compressionResult.usage);
750
+ if (options.setShouldIncludeSystemInfo) {
751
+ options.setShouldIncludeSystemInfo(true);
752
+ }
753
+ // 更新累计的usage为压缩后的usage
754
+ accumulatedUsage = compressionResult.usage;
755
+ // 压缩后需要重新构建conversationMessages
756
+ conversationMessages = [];
757
+ const session = sessionManager.getCurrentSession();
758
+ if (session && session.messages.length > 0) {
759
+ conversationMessages.push(...session.messages);
760
+ }
761
+ }
762
+ }
763
+ catch (error) {
764
+ console.error('Auto-compression before pending messages failed:', error);
765
+ // 即使压缩失败也继续处理pending消息
766
+ }
767
+ }
686
768
  // Clear pending messages
687
769
  options.clearPendingMessages();
688
770
  // Combine multiple pending messages into one
@@ -1,4 +1,15 @@
1
1
  import React from 'react';
2
+ /**
3
+ * Calculate context usage percentage
4
+ * This is the same logic used in ChatInput to display usage
5
+ */
6
+ export declare function calculateContextPercentage(contextUsage: {
7
+ inputTokens: number;
8
+ maxContextTokens: number;
9
+ cacheCreationTokens?: number;
10
+ cacheReadTokens?: number;
11
+ cachedTokens?: number;
12
+ }): number;
2
13
  type Props = {
3
14
  onSubmit: (message: string, images?: Array<{
4
15
  data: string;
@@ -22,6 +33,7 @@ type Props = {
22
33
  cachedTokens?: number;
23
34
  };
24
35
  initialContent?: string | null;
36
+ onContextPercentageChange?: (percentage: number) => void;
25
37
  };
26
- export default function ChatInput({ onSubmit, onCommand, placeholder, disabled, isProcessing, chatHistory, onHistorySelect, yoloMode, contextUsage, initialContent, }: Props): React.JSX.Element;
38
+ export default function ChatInput({ onSubmit, onCommand, placeholder, disabled, isProcessing, chatHistory, onHistorySelect, yoloMode, contextUsage, initialContent, onContextPercentageChange, }: Props): React.JSX.Element;
27
39
  export {};
@@ -11,7 +11,24 @@ import { useClipboard } from '../../hooks/useClipboard.js';
11
11
  import { useKeyboardInput } from '../../hooks/useKeyboardInput.js';
12
12
  import { useTerminalSize } from '../../hooks/useTerminalSize.js';
13
13
  import { useTerminalFocus } from '../../hooks/useTerminalFocus.js';
14
- export default function ChatInput({ onSubmit, onCommand, placeholder = 'Type your message...', disabled = false, isProcessing = false, chatHistory = [], onHistorySelect, yoloMode = false, contextUsage, initialContent = null, }) {
14
+ /**
15
+ * Calculate context usage percentage
16
+ * This is the same logic used in ChatInput to display usage
17
+ */
18
+ export function calculateContextPercentage(contextUsage) {
19
+ // Determine which caching system is being used
20
+ const isAnthropic = (contextUsage.cacheCreationTokens || 0) > 0 ||
21
+ (contextUsage.cacheReadTokens || 0) > 0;
22
+ // For Anthropic: Total = inputTokens + cacheCreationTokens + cacheReadTokens
23
+ // For OpenAI: Total = inputTokens (cachedTokens are already included in inputTokens)
24
+ const totalInputTokens = isAnthropic
25
+ ? contextUsage.inputTokens +
26
+ (contextUsage.cacheCreationTokens || 0) +
27
+ (contextUsage.cacheReadTokens || 0)
28
+ : contextUsage.inputTokens;
29
+ return Math.min(100, (totalInputTokens / contextUsage.maxContextTokens) * 100);
30
+ }
31
+ export default function ChatInput({ onSubmit, onCommand, placeholder = 'Type your message...', disabled = false, isProcessing = false, chatHistory = [], onHistorySelect, yoloMode = false, contextUsage, initialContent = null, onContextPercentageChange, }) {
15
32
  // Use terminal size hook to listen for resize events
16
33
  const { columns: terminalWidth } = useTerminalSize();
17
34
  const prevTerminalWidthRef = useRef(terminalWidth);
@@ -101,6 +118,13 @@ export default function ChatInput({ onSubmit, onCommand, placeholder = 'Type you
101
118
  }, 100);
102
119
  return () => clearTimeout(timer);
103
120
  }, [terminalWidth, forceUpdate]);
121
+ // Notify parent of context percentage changes
122
+ useEffect(() => {
123
+ if (contextUsage && onContextPercentageChange) {
124
+ const percentage = calculateContextPercentage(contextUsage);
125
+ onContextPercentageChange(percentage);
126
+ }
127
+ }, [contextUsage, onContextPercentageChange]);
104
128
  // Render cursor based on focus state
105
129
  const renderCursor = useCallback((char) => {
106
130
  if (hasFocus) {
@@ -245,14 +269,14 @@ export default function ChatInput({ onSubmit, onCommand, placeholder = 'Type you
245
269
  const isAnthropic = (contextUsage.cacheCreationTokens || 0) > 0 ||
246
270
  (contextUsage.cacheReadTokens || 0) > 0;
247
271
  const isOpenAI = (contextUsage.cachedTokens || 0) > 0;
248
- // For Anthropic: Total = inputTokens + cacheCreationTokens + cacheReadTokens
249
- // For OpenAI: Total = inputTokens (cachedTokens are already included in inputTokens)
272
+ // Use the exported function for consistent calculation
273
+ const percentage = calculateContextPercentage(contextUsage);
274
+ // Calculate total tokens for display
250
275
  const totalInputTokens = isAnthropic
251
276
  ? contextUsage.inputTokens +
252
277
  (contextUsage.cacheCreationTokens || 0) +
253
278
  (contextUsage.cacheReadTokens || 0)
254
279
  : contextUsage.inputTokens;
255
- const percentage = Math.min(100, (totalInputTokens / contextUsage.maxContextTokens) * 100);
256
280
  let color;
257
281
  if (percentage < 50)
258
282
  color = 'green';
@@ -31,6 +31,7 @@ import { executeCommand } from '../../utils/commandExecutor.js';
31
31
  import { convertSessionMessagesToUI } from '../../utils/sessionConverter.js';
32
32
  import { incrementalSnapshotManager } from '../../utils/incrementalSnapshot.js';
33
33
  import { formatElapsedTime } from '../../utils/textUtils.js';
34
+ import { shouldAutoCompress, performAutoCompression, } from '../../utils/autoCompress.js';
34
35
  // Import commands to register them
35
36
  import '../../utils/commands/clear.js';
36
37
  import '../../utils/commands/resume.js';
@@ -54,6 +55,12 @@ export default function ChatScreen({ skipWelcome }) {
54
55
  const [remountKey, setRemountKey] = useState(0);
55
56
  const [showMcpInfo, setShowMcpInfo] = useState(false);
56
57
  const [mcpPanelKey, setMcpPanelKey] = useState(0);
58
+ const [currentContextPercentage, setCurrentContextPercentage] = useState(0); // Track context percentage from ChatInput
59
+ const currentContextPercentageRef = useRef(0); // Use ref to avoid closure issues
60
+ // Sync state to ref
61
+ useEffect(() => {
62
+ currentContextPercentageRef.current = currentContextPercentage;
63
+ }, [currentContextPercentage]);
57
64
  const [yoloMode, setYoloMode] = useState(() => {
58
65
  // Load yolo mode from localStorage on initialization
59
66
  try {
@@ -492,6 +499,47 @@ export default function ChatScreen({ skipWelcome }) {
492
499
  await processMessage(message, images);
493
500
  };
494
501
  const processMessage = async (message, images, useBasicModel, hideUserMessage) => {
502
+ // 检查 token 占用,如果 >= 80% 先执行自动压缩
503
+ if (shouldAutoCompress(currentContextPercentageRef.current)) {
504
+ setIsCompressing(true);
505
+ setCompressionError(null);
506
+ try {
507
+ // 显示压缩提示消息
508
+ const compressingMessage = {
509
+ role: 'assistant',
510
+ content: '✵ Auto-compressing context due to token limit...',
511
+ streaming: false,
512
+ };
513
+ setMessages(prev => [...prev, compressingMessage]);
514
+ const compressionResult = await performAutoCompression();
515
+ if (compressionResult) {
516
+ // 更新UI和token使用情况
517
+ clearSavedMessages();
518
+ setMessages(compressionResult.uiMessages);
519
+ setRemountKey(prev => prev + 1);
520
+ streamingState.setContextUsage(compressionResult.usage);
521
+ setShouldIncludeSystemInfo(true);
522
+ }
523
+ else {
524
+ throw new Error('Compression failed');
525
+ }
526
+ }
527
+ catch (error) {
528
+ const errorMsg = error instanceof Error ? error.message : 'Unknown error';
529
+ setCompressionError(errorMsg);
530
+ const errorMessage = {
531
+ role: 'assistant',
532
+ content: `**Auto-compression Failed**\n\n${errorMsg}`,
533
+ streaming: false,
534
+ };
535
+ setMessages(prev => [...prev, errorMessage]);
536
+ setIsCompressing(false);
537
+ return; // 停止处理,等待用户手动处理
538
+ }
539
+ finally {
540
+ setIsCompressing(false);
541
+ }
542
+ }
495
543
  // Clear any previous retry status when starting a new request
496
544
  streamingState.setRetryStatus(null);
497
545
  // Parse and validate file references
@@ -557,6 +605,10 @@ export default function ChatScreen({ skipWelcome }) {
557
605
  setIsStreaming: streamingState.setIsStreaming,
558
606
  setIsReasoning: streamingState.setIsReasoning,
559
607
  setRetryStatus: streamingState.setRetryStatus,
608
+ clearSavedMessages,
609
+ setRemountKey,
610
+ setShouldIncludeSystemInfo,
611
+ getCurrentContextPercentage: () => currentContextPercentageRef.current,
560
612
  });
561
613
  }
562
614
  catch (error) {
@@ -644,6 +696,10 @@ export default function ChatScreen({ skipWelcome }) {
644
696
  setIsStreaming: streamingState.setIsStreaming,
645
697
  setIsReasoning: streamingState.setIsReasoning,
646
698
  setRetryStatus: streamingState.setRetryStatus,
699
+ clearSavedMessages,
700
+ setRemountKey,
701
+ getCurrentContextPercentage: () => currentContextPercentageRef.current,
702
+ setShouldIncludeSystemInfo,
647
703
  });
648
704
  }
649
705
  catch (error) {
@@ -897,7 +953,7 @@ export default function ChatScreen({ skipWelcome }) {
897
953
  cacheReadTokens: streamingState.contextUsage.cache_read_input_tokens,
898
954
  cachedTokens: streamingState.contextUsage.cached_tokens,
899
955
  }
900
- : undefined, initialContent: restoreInputContent }),
956
+ : undefined, initialContent: restoreInputContent, onContextPercentageChange: setCurrentContextPercentage }),
901
957
  vscodeState.vscodeConnectionStatus !== 'disconnected' && (React.createElement(Box, { marginTop: 1, paddingX: 1 },
902
958
  React.createElement(Text, { color: vscodeState.vscodeConnectionStatus === 'connecting'
903
959
  ? 'yellow'
@@ -0,0 +1,15 @@
1
+ /**
2
+ * 检查 token 使用率是否达到阈值
3
+ * @param percentage 当前上下文使用百分比(由 ChatInput 计算)
4
+ * @param threshold 阈值百分比(默认80)
5
+ * @returns 是否需要压缩
6
+ */
7
+ export declare function shouldAutoCompress(percentage: number, threshold?: number): boolean;
8
+ /**
9
+ * 执行自动压缩
10
+ * @returns 压缩结果,如果失败返回null
11
+ */
12
+ export declare function performAutoCompression(): Promise<{
13
+ uiMessages: import("../ui/components/MessageList.js").Message[];
14
+ usage: import("../api/types.js").UsageInfo;
15
+ } | null>;
@@ -0,0 +1,24 @@
1
+ import { executeContextCompression } from '../hooks/useCommandHandler.js';
2
+ /**
3
+ * 检查 token 使用率是否达到阈值
4
+ * @param percentage 当前上下文使用百分比(由 ChatInput 计算)
5
+ * @param threshold 阈值百分比(默认80)
6
+ * @returns 是否需要压缩
7
+ */
8
+ export function shouldAutoCompress(percentage, threshold = 80) {
9
+ return percentage >= threshold;
10
+ }
11
+ /**
12
+ * 执行自动压缩
13
+ * @returns 压缩结果,如果失败返回null
14
+ */
15
+ export async function performAutoCompression() {
16
+ try {
17
+ const result = await executeContextCompression();
18
+ return result;
19
+ }
20
+ catch (error) {
21
+ console.error('Auto-compression failed:', error);
22
+ return null;
23
+ }
24
+ }
@@ -6,10 +6,11 @@ export interface CompressionResult {
6
6
  completion_tokens: number;
7
7
  total_tokens: number;
8
8
  };
9
+ preservedMessages?: ChatMessage[];
9
10
  }
10
11
  /**
11
12
  * Compress conversation history using the compact model
12
13
  * @param messages - Array of messages to compress
13
- * @returns Compressed summary and token usage information
14
+ * @returns Compressed summary and token usage information, or null if compression should be skipped
14
15
  */
15
- export declare function compressContext(messages: ChatMessage[]): Promise<CompressionResult>;
16
+ export declare function compressContext(messages: ChatMessage[]): Promise<CompressionResult | null>;
@@ -5,11 +5,94 @@ import { createStreamingResponse } from '../api/responses.js';
5
5
  import { createStreamingGeminiCompletion } from '../api/gemini.js';
6
6
  import { createStreamingAnthropicCompletion } from '../api/anthropic.js';
7
7
  /**
8
- * Compression request prompt - asks AI to summarize conversation with focus on task continuity
8
+ * Compression request prompt - asks AI to create a detailed, structured summary
9
+ * that preserves critical information for task continuity
9
10
  */
10
- const COMPRESSION_PROMPT = 'Please provide a concise summary of our conversation so far. Focus on: 1) The current task or goal we are working on, 2) Key decisions and approaches we have agreed upon, 3) Important context needed to continue, 4) Any pending or unfinished work. Keep it brief but ensure I can seamlessly continue assisting with the task.';
11
+ const COMPRESSION_PROMPT = `You are compressing a conversation history to save context space while preserving all critical information. Create a comprehensive summary following this structure:
12
+
13
+ ## 📋 Current Task & Goals
14
+ - What is the main task or project being worked on?
15
+ - What are the specific objectives and desired outcomes?
16
+ - What is the current progress status?
17
+
18
+ ## 🔧 Technical Context
19
+ - Key technologies, frameworks, libraries, and tools being used
20
+ - Important file paths, function names, and code locations mentioned
21
+ - Architecture decisions and design patterns chosen
22
+ - Configuration settings and environment details
23
+
24
+ ## 💡 Key Decisions & Approaches
25
+ - Important decisions made and their rationale
26
+ - Chosen approaches and methodologies
27
+ - Solutions to problems encountered
28
+ - Best practices or patterns agreed upon
29
+
30
+ ## ✅ Completed Work
31
+ - What has been successfully implemented or resolved?
32
+ - Important code changes, fixes, or features added
33
+ - Test results or validation performed
34
+
35
+ ## 🚧 Pending & In-Progress Work
36
+ - What tasks are currently unfinished?
37
+ - Known issues or blockers that need addressing
38
+ - Next steps planned or discussed
39
+ - Open questions or areas needing clarification
40
+
41
+ ## 🔑 Critical Information
42
+ - Important data, values, IDs, or credentials referenced (sanitized)
43
+ - Error messages, warnings, or diagnostic information
44
+ - User preferences, requirements, or constraints
45
+ - Any other context essential for seamless continuation
46
+
47
+ **Guidelines:**
48
+ - Be specific with names, paths, and technical details
49
+ - Preserve exact terminology and technical vocabulary
50
+ - Include enough detail to continue work without confusion
51
+ - Use code snippets or examples where helpful
52
+ - Prioritize actionable information over general descriptions`;
53
+ /**
54
+ * 找到需要保留的消息(最近的工具调用链)
55
+ *
56
+ * 保留策略:
57
+ * - 如果最后有未完成的工具调用(assistant with tool_calls 或 tool),保留这个链
58
+ * - 如果最后是普通 assistant 或 user,不需要保留(压缩全部)
59
+ *
60
+ * 注意:不保留 user 消息,因为:
61
+ * 1. 压缩摘要已包含历史上下文
62
+ * 2. 下一轮对话会有新的 user 消息
63
+ *
64
+ * @returns 保留消息的起始索引,如果全部压缩则返回 messages.length
65
+ */
66
+ function findPreserveStartIndex(messages) {
67
+ if (messages.length === 0) {
68
+ return 0;
69
+ }
70
+ const lastMsg = messages[messages.length - 1];
71
+ // Case 1: 最后是 tool 消息 → 保留 assistant(tool_calls) → tool
72
+ if (lastMsg?.role === 'tool') {
73
+ // 向前找对应的 assistant with tool_calls
74
+ for (let i = messages.length - 2; i >= 0; i--) {
75
+ const msg = messages[i];
76
+ if (msg?.role === 'assistant' && msg.tool_calls && msg.tool_calls.length > 0) {
77
+ // 找到了,从这个 assistant 开始保留
78
+ return i;
79
+ }
80
+ }
81
+ // 如果找不到对应的 assistant,保留最后的 tool(虽然不太可能)
82
+ return messages.length - 1;
83
+ }
84
+ // Case 2: 最后是 assistant with tool_calls → 保留 assistant(tool_calls)
85
+ if (lastMsg?.role === 'assistant' && lastMsg.tool_calls && lastMsg.tool_calls.length > 0) {
86
+ // 保留这个待处理的 tool_calls
87
+ return messages.length - 1;
88
+ }
89
+ // Case 3: 最后是普通 assistant 或 user → 全部压缩
90
+ // 因为没有未完成的工具调用链
91
+ return messages.length;
92
+ }
11
93
  /**
12
94
  * Prepare messages for compression by adding system prompt and compression request
95
+ * Note: Only filters out system messages and tool messages, preserving user and assistant messages
13
96
  */
14
97
  function prepareMessagesForCompression(conversationMessages, customSystemPrompt) {
15
98
  const messages = [];
@@ -23,9 +106,11 @@ function prepareMessagesForCompression(conversationMessages, customSystemPrompt)
23
106
  // No custom system prompt: default as system
24
107
  messages.push({ role: 'system', content: getSystemPrompt() });
25
108
  }
26
- // Add all conversation history (exclude system and tool messages)
109
+ // Add all conversation history for compression
110
+ // Filter out system messages (already added above) and tool messages (only needed for API, not for summary)
27
111
  for (const msg of conversationMessages) {
28
112
  if (msg.role !== 'system' && msg.role !== 'tool') {
113
+ // Only include user and assistant messages for compression
29
114
  messages.push({
30
115
  role: msg.role,
31
116
  content: msg.content,
@@ -181,7 +266,7 @@ async function compressWithAnthropic(modelName, conversationMessages, customSyst
181
266
  /**
182
267
  * Compress conversation history using the compact model
183
268
  * @param messages - Array of messages to compress
184
- * @returns Compressed summary and token usage information
269
+ * @returns Compressed summary and token usage information, or null if compression should be skipped
185
270
  */
186
271
  export async function compressContext(messages) {
187
272
  const config = getOpenAiConfig();
@@ -189,26 +274,51 @@ export async function compressContext(messages) {
189
274
  if (!config.compactModel || !config.compactModel.modelName) {
190
275
  throw new Error('Compact model not configured. Please configure it in API & Model Settings.');
191
276
  }
277
+ if (messages.length === 0) {
278
+ console.warn('No messages to compress');
279
+ return null;
280
+ }
192
281
  const modelName = config.compactModel.modelName;
193
282
  const requestMethod = config.requestMethod;
194
283
  // Get custom system prompt if configured
195
284
  const customSystemPrompt = getCustomSystemPrompt();
285
+ // 找到需要保留的消息起始位置
286
+ const preserveStartIndex = findPreserveStartIndex(messages);
287
+ // 如果 preserveStartIndex 为 0,说明所有消息都需要保留(没有历史可压缩)
288
+ // 例如:整个对话只有一条 user→assistant(tool_calls),无法压缩
289
+ if (preserveStartIndex === 0) {
290
+ console.warn('Cannot compress: all messages need to be preserved (no history)');
291
+ return null;
292
+ }
293
+ // 分离待压缩和待保留的消息
294
+ const messagesToCompress = messages.slice(0, preserveStartIndex);
295
+ const preservedMessages = messages.slice(preserveStartIndex);
196
296
  try {
197
297
  // Choose compression method based on request method
198
298
  // All methods now reuse existing API modules which include proxy support
299
+ let result;
199
300
  switch (requestMethod) {
200
301
  case 'gemini':
201
- return await compressWithGemini(modelName, messages, customSystemPrompt || null);
302
+ result = await compressWithGemini(modelName, messagesToCompress, customSystemPrompt || null);
303
+ break;
202
304
  case 'anthropic':
203
- return await compressWithAnthropic(modelName, messages, customSystemPrompt || null);
305
+ result = await compressWithAnthropic(modelName, messagesToCompress, customSystemPrompt || null);
306
+ break;
204
307
  case 'responses':
205
308
  // OpenAI Responses API
206
- return await compressWithResponses(modelName, messages, customSystemPrompt || null);
309
+ result = await compressWithResponses(modelName, messagesToCompress, customSystemPrompt || null);
310
+ break;
207
311
  case 'chat':
208
312
  default:
209
313
  // OpenAI Chat Completions API
210
- return await compressWithChatCompletions(modelName, messages, customSystemPrompt || null);
314
+ result = await compressWithChatCompletions(modelName, messagesToCompress, customSystemPrompt || null);
315
+ break;
316
+ }
317
+ // 添加保留的消息到结果中
318
+ if (preservedMessages.length > 0) {
319
+ result.preservedMessages = preservedMessages;
211
320
  }
321
+ return result;
212
322
  }
213
323
  catch (error) {
214
324
  if (error instanceof Error) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "snow-ai",
3
- "version": "0.3.9",
3
+ "version": "0.3.10",
4
4
  "description": "Intelligent Command Line Assistant powered by AI",
5
5
  "license": "MIT",
6
6
  "bin": {