snow-ai 0.4.6 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -97,13 +97,42 @@ function convertToAnthropicMessages(messages, includeBuiltinSystemPrompt = true)
97
97
  continue;
98
98
  }
99
99
  if (msg.role === 'tool' && msg.tool_call_id) {
100
+ // Build tool_result content - can be text or array with images
101
+ let toolResultContent;
102
+ if (msg.images && msg.images.length > 0) {
103
+ // Multimodal tool result with images
104
+ const contentArray = [];
105
+ // Add text content first
106
+ if (msg.content) {
107
+ contentArray.push({
108
+ type: 'text',
109
+ text: msg.content,
110
+ });
111
+ }
112
+ // Add images
113
+ for (const image of msg.images) {
114
+ contentArray.push({
115
+ type: 'image',
116
+ source: {
117
+ type: 'base64',
118
+ media_type: image.mimeType,
119
+ data: image.data,
120
+ },
121
+ });
122
+ }
123
+ toolResultContent = contentArray;
124
+ }
125
+ else {
126
+ // Text-only tool result
127
+ toolResultContent = msg.content;
128
+ }
100
129
  anthropicMessages.push({
101
130
  role: 'user',
102
131
  content: [
103
132
  {
104
133
  type: 'tool_result',
105
134
  tool_use_id: msg.tool_call_id,
106
- content: msg.content,
135
+ content: toolResultContent,
107
136
  },
108
137
  ],
109
138
  });
package/dist/api/chat.js CHANGED
@@ -50,6 +50,31 @@ function convertToOpenAIMessages(messages, includeBuiltinSystemPrompt = true) {
50
50
  };
51
51
  }
52
52
  if (msg.role === 'tool' && msg.tool_call_id) {
53
+ // Handle multimodal tool results with images
54
+ if (msg.images && msg.images.length > 0) {
55
+ const content = [];
56
+ // Add text content
57
+ if (msg.content) {
58
+ content.push({
59
+ type: 'text',
60
+ text: msg.content,
61
+ });
62
+ }
63
+ // Add images as base64 data URLs
64
+ for (const image of msg.images) {
65
+ content.push({
66
+ type: 'image_url',
67
+ image_url: {
68
+ url: `data:${image.mimeType};base64,${image.data}`,
69
+ },
70
+ });
71
+ }
72
+ return {
73
+ role: 'tool',
74
+ content,
75
+ tool_call_id: msg.tool_call_id,
76
+ };
77
+ }
53
78
  return {
54
79
  role: 'tool',
55
80
  content: msg.content,
@@ -91,6 +91,18 @@ function convertToGeminiMessages(messages, includeBuiltinSystemPrompt = true) {
91
91
  // Tool response must be a valid object for Gemini API
92
92
  // If content is a JSON string, parse it; otherwise wrap it in an object
93
93
  let responseData;
94
+ const imageParts = [];
95
+ // Handle images from tool result
96
+ if (msg.images && msg.images.length > 0) {
97
+ for (const image of msg.images) {
98
+ imageParts.push({
99
+ inlineData: {
100
+ mimeType: image.mimeType,
101
+ data: image.data,
102
+ },
103
+ });
104
+ }
105
+ }
94
106
  if (!msg.content) {
95
107
  responseData = {};
96
108
  }
@@ -132,16 +144,22 @@ function convertToGeminiMessages(messages, includeBuiltinSystemPrompt = true) {
132
144
  responseData = { content: contentToParse };
133
145
  }
134
146
  }
147
+ // Build parts array with functionResponse and optional images
148
+ const parts = [
149
+ {
150
+ functionResponse: {
151
+ name: functionName,
152
+ response: responseData,
153
+ },
154
+ },
155
+ ];
156
+ // Add images as inline data parts
157
+ if (imageParts.length > 0) {
158
+ parts.push(...imageParts);
159
+ }
135
160
  contents.push({
136
161
  role: 'user',
137
- parts: [
138
- {
139
- functionResponse: {
140
- name: functionName,
141
- response: responseData,
142
- },
143
- },
144
- ],
162
+ parts,
145
163
  });
146
164
  continue;
147
165
  }
@@ -163,11 +163,38 @@ function convertToResponseInput(messages, includeBuiltinSystemPrompt = true) {
163
163
  }
164
164
  // Tool 消息:转换为 function_call_output
165
165
  if (msg.role === 'tool' && msg.tool_call_id) {
166
- result.push({
167
- type: 'function_call_output',
168
- call_id: msg.tool_call_id,
169
- output: msg.content,
170
- });
166
+ // Handle multimodal tool results with images
167
+ if (msg.images && msg.images.length > 0) {
168
+ // For Responses API, we need to include images in a structured way
169
+ // The output can be an array of content items
170
+ const outputContent = [];
171
+ // Add text content
172
+ if (msg.content) {
173
+ outputContent.push({
174
+ type: 'input_text',
175
+ text: msg.content,
176
+ });
177
+ }
178
+ // Add images as base64 data URLs (Responses API format)
179
+ for (const image of msg.images) {
180
+ outputContent.push({
181
+ type: 'input_image',
182
+ image_url: `data:${image.mimeType};base64,${image.data}`,
183
+ });
184
+ }
185
+ result.push({
186
+ type: 'function_call_output',
187
+ call_id: msg.tool_call_id,
188
+ output: outputContent,
189
+ });
190
+ }
191
+ else {
192
+ result.push({
193
+ type: 'function_call_output',
194
+ call_id: msg.tool_call_id,
195
+ output: msg.content,
196
+ });
197
+ }
171
198
  continue;
172
199
  }
173
200
  }
@@ -79,6 +79,7 @@ const SYSTEM_PROMPT_TEMPLATE = `You are Snow AI CLI, an intelligent command-line
79
79
  ## Execution Strategy - BALANCE ACTION & ANALYSIS
80
80
 
81
81
  ### Rigorous Coding Habits
82
+ - **Location Code**: Must First use a search tool to locate the line number of the code, then use \`filesystem-read\` to read the code content
82
83
  - **Boundary verification**: MUST use \`filesystem-read\` to identify complete code boundaries before ANY edit. Never guess line numbers or code structure
83
84
  - **Impact analysis**: Consider modification impact and conflicts with existing business logic
84
85
  - **Optimal solution**: Avoid hardcoding/shortcuts unless explicitly requested
@@ -114,10 +115,9 @@ PLACEHOLDER_FOR_WORKFLOW_SECTION
114
115
  **CRITICAL: BOUNDARY-FIRST EDITING**
115
116
 
116
117
  **MANDATORY WORKFLOW:**
117
- 1. **LOCATE** - \`ace-semantic_search\` / \`ace-text_search\` / \`ace-find_definition\`
118
- 2. **READ & VERIFY** - Use \`filesystem-read\` to identify COMPLETE units (functions: opening to closing brace, markup: full tags, check indentation)
119
- 3. **COPY COMPLETE CODE** - Remove line numbers, preserve all content
120
- 4. **EDIT** - \`filesystem-edit_search\` (fuzzy match, safer) or \`filesystem-edit\` (line-based, for add/delete)
118
+ 1. **READ & VERIFY** - Use \`filesystem-read\` to identify COMPLETE units (functions: opening to closing brace, markup: full tags, check indentation)
119
+ 2. **COPY COMPLETE CODE** - Remove line numbers, preserve all content
120
+ 3. **EDIT** - \`filesystem-edit_search\` (fuzzy match, safer) or \`filesystem-edit\` (line-based, for add/delete)
121
121
 
122
122
  **BATCH OPERATIONS:** Modify 2+ files? Use batch: \`filesystem-read(filePath=["a.ts","b.ts"])\` or \`filesystem-edit_search(filePath=[{path:"a.ts",...},{path:"b.ts",...}])\`
123
123
 
@@ -10,7 +10,7 @@ export declare function useFilePicker(buffer: TextBuffer, triggerUpdate: () => v
10
10
  atSymbolPosition: number;
11
11
  setAtSymbolPosition: (_pos: number) => void;
12
12
  filteredFileCount: number;
13
- searchMode: "content" | "file";
13
+ searchMode: "file" | "content";
14
14
  updateFilePickerState: (_text: string, cursorPos: number) => void;
15
15
  handleFileSelect: (filePath: string) => Promise<void>;
16
16
  handleFilteredCountChange: (count: number) => void;
@@ -1,4 +1,4 @@
1
- import type { EditBySearchConfig, EditByLineConfig, EditBySearchResult, EditByLineResult } from './types/filesystem.types.js';
1
+ import type { EditBySearchConfig, EditByLineConfig, EditBySearchResult, EditByLineResult, SingleFileReadResult, MultipleFilesReadResult } from './types/filesystem.types.js';
2
2
  /**
3
3
  * Filesystem MCP Service
4
4
  * Provides basic file operations: read, create, and delete files
@@ -10,6 +10,30 @@ export declare class FilesystemMCPService {
10
10
  */
11
11
  private readonly prettierSupportedExtensions;
12
12
  constructor(basePath?: string);
13
+ /**
14
+ * Check if a file is an image based on extension
15
+ * @param filePath - Path to the file
16
+ * @returns True if the file is an image
17
+ */
18
+ private isImageFile;
19
+ /**
20
+ * Check if a file is an Office document based on extension
21
+ * @param filePath - Path to the file
22
+ * @returns True if the file is an Office document
23
+ */
24
+ private isOfficeFile;
25
+ /**
26
+ * Get MIME type for an image file
27
+ * @param filePath - Path to the file
28
+ * @returns MIME type or undefined if not an image
29
+ */
30
+ private getImageMimeType;
31
+ /**
32
+ * Read image file and convert to base64
33
+ * @param fullPath - Full path to the image file
34
+ * @returns ImageContent object with base64 data
35
+ */
36
+ private readImageAsBase64;
13
37
  /**
14
38
  * Extract relevant symbol information for a specific line range
15
39
  * This provides context that helps AI make more accurate modifications
@@ -29,31 +53,18 @@ export declare class FilesystemMCPService {
29
53
  /**
30
54
  * Get the content of a file with optional line range
31
55
  * Enhanced with symbol information for better AI context
56
+ * Supports multimodal content (text + images)
32
57
  * @param filePath - Path to the file (relative to base path or absolute) or array of file paths or array of file config objects
33
58
  * @param startLine - Starting line number (1-indexed, inclusive, optional - defaults to 1). Used for single file or as default for array of strings
34
59
  * @param endLine - Ending line number (1-indexed, inclusive, optional - defaults to file end). Used for single file or as default for array of strings
35
- * @returns Object containing the requested content with line numbers and metadata
60
+ * @returns Object containing the requested content with line numbers and metadata (supports multimodal content)
36
61
  * @throws Error if file doesn't exist or cannot be read
37
62
  */
38
63
  getFileContent(filePath: string | string[] | Array<{
39
64
  path: string;
40
65
  startLine?: number;
41
66
  endLine?: number;
42
- }>, startLine?: number, endLine?: number): Promise<{
43
- content: string;
44
- startLine: number;
45
- endLine: number;
46
- totalLines: number;
47
- } | {
48
- content: string;
49
- files: Array<{
50
- path: string;
51
- startLine: number;
52
- endLine: number;
53
- totalLines: number;
54
- }>;
55
- totalFiles: number;
56
- }>;
67
+ }>, startLine?: number, endLine?: number): Promise<SingleFileReadResult | MultipleFilesReadResult>;
57
68
  /**
58
69
  * Create a new file with specified content
59
70
  * @param filePath - Path where the file should be created
@@ -5,17 +5,19 @@ import * as prettier from 'prettier';
5
5
  import { vscodeConnection } from '../utils/vscodeConnection.js';
6
6
  import { incrementalSnapshotManager } from '../utils/incrementalSnapshot.js';
7
7
  import { tryUnescapeFix, trimPairIfPossible, isOverEscaped, } from '../utils/escapeHandler.js';
8
+ import { IMAGE_MIME_TYPES, OFFICE_FILE_TYPES } from './types/filesystem.types.js';
8
9
  // Utility functions
9
10
  import { calculateSimilarity, normalizeForDisplay, } from './utils/filesystem/similarity.utils.js';
10
11
  import { analyzeCodeStructure, findSmartContextBoundaries, } from './utils/filesystem/code-analysis.utils.js';
11
12
  import { findClosestMatches, generateDiffMessage, } from './utils/filesystem/match-finder.utils.js';
12
13
  import { parseEditBySearchParams, parseEditByLineParams, executeBatchOperation, } from './utils/filesystem/batch-operations.utils.js';
13
14
  import { tryFixPath } from './utils/filesystem/path-fixer.utils.js';
15
+ import { readOfficeDocument } from './utils/filesystem/office-parser.utils.js';
14
16
  // ACE Code Search utilities for symbol parsing
15
17
  import { parseFileSymbols } from './utils/aceCodeSearch/symbol.utils.js';
16
18
  // Notebook utilities for automatic note retrieval
17
19
  import { queryNotebook } from '../utils/notebookManager.js';
18
- const { resolve, dirname, isAbsolute } = path;
20
+ const { resolve, dirname, isAbsolute, extname } = path;
19
21
  /**
20
22
  * Filesystem MCP Service
21
23
  * Provides basic file operations: read, create, and delete files
@@ -55,6 +57,57 @@ export class FilesystemMCPService {
55
57
  });
56
58
  this.basePath = resolve(basePath);
57
59
  }
60
+ /**
61
+ * Check if a file is an image based on extension
62
+ * @param filePath - Path to the file
63
+ * @returns True if the file is an image
64
+ */
65
+ isImageFile(filePath) {
66
+ const ext = extname(filePath).toLowerCase();
67
+ return ext in IMAGE_MIME_TYPES;
68
+ }
69
+ /**
70
+ * Check if a file is an Office document based on extension
71
+ * @param filePath - Path to the file
72
+ * @returns True if the file is an Office document
73
+ */
74
+ isOfficeFile(filePath) {
75
+ const ext = extname(filePath).toLowerCase();
76
+ return ext in OFFICE_FILE_TYPES;
77
+ }
78
+ /**
79
+ * Get MIME type for an image file
80
+ * @param filePath - Path to the file
81
+ * @returns MIME type or undefined if not an image
82
+ */
83
+ getImageMimeType(filePath) {
84
+ const ext = extname(filePath).toLowerCase();
85
+ return IMAGE_MIME_TYPES[ext];
86
+ }
87
+ /**
88
+ * Read image file and convert to base64
89
+ * @param fullPath - Full path to the image file
90
+ * @returns ImageContent object with base64 data
91
+ */
92
+ async readImageAsBase64(fullPath) {
93
+ try {
94
+ const mimeType = this.getImageMimeType(fullPath);
95
+ if (!mimeType) {
96
+ return null;
97
+ }
98
+ const buffer = await fs.readFile(fullPath);
99
+ const base64Data = buffer.toString('base64');
100
+ return {
101
+ type: 'image',
102
+ data: base64Data,
103
+ mimeType,
104
+ };
105
+ }
106
+ catch (error) {
107
+ console.error(`Failed to read image ${fullPath}:`, error);
108
+ return null;
109
+ }
110
+ }
58
111
  /**
59
112
  * Extract relevant symbol information for a specific line range
60
113
  * This provides context that helps AI make more accurate modifications
@@ -156,10 +209,11 @@ export class FilesystemMCPService {
156
209
  /**
157
210
  * Get the content of a file with optional line range
158
211
  * Enhanced with symbol information for better AI context
212
+ * Supports multimodal content (text + images)
159
213
  * @param filePath - Path to the file (relative to base path or absolute) or array of file paths or array of file config objects
160
214
  * @param startLine - Starting line number (1-indexed, inclusive, optional - defaults to 1). Used for single file or as default for array of strings
161
215
  * @param endLine - Ending line number (1-indexed, inclusive, optional - defaults to file end). Used for single file or as default for array of strings
162
- * @returns Object containing the requested content with line numbers and metadata
216
+ * @returns Object containing the requested content with line numbers and metadata (supports multimodal content)
163
217
  * @throws Error if file doesn't exist or cannot be read
164
218
  */
165
219
  async getFileContent(filePath, startLine, endLine) {
@@ -167,7 +221,7 @@ export class FilesystemMCPService {
167
221
  // Handle array of files
168
222
  if (Array.isArray(filePath)) {
169
223
  const filesData = [];
170
- const allContents = [];
224
+ const multimodalContent = [];
171
225
  for (const fileItem of filePath) {
172
226
  try {
173
227
  // Support both string format and object format
@@ -196,7 +250,10 @@ export class FilesystemMCPService {
196
250
  if (stats.isDirectory()) {
197
251
  const dirFiles = await this.listFiles(file);
198
252
  const fileList = dirFiles.join('\n');
199
- allContents.push(`📁 Directory: ${file}\n${fileList}`);
253
+ multimodalContent.push({
254
+ type: 'text',
255
+ text: `📁 Directory: ${file}\n${fileList}`,
256
+ });
200
257
  filesData.push({
201
258
  path: file,
202
259
  startLine: 1,
@@ -205,6 +262,44 @@ export class FilesystemMCPService {
205
262
  });
206
263
  continue;
207
264
  }
265
+ // Check if this is an image file
266
+ if (this.isImageFile(fullPath)) {
267
+ const imageContent = await this.readImageAsBase64(fullPath);
268
+ if (imageContent) {
269
+ // Add text description first
270
+ multimodalContent.push({
271
+ type: 'text',
272
+ text: `🖼️ Image: ${file} (${imageContent.mimeType})`,
273
+ });
274
+ // Add image content
275
+ multimodalContent.push(imageContent);
276
+ filesData.push({
277
+ path: file,
278
+ isImage: true,
279
+ mimeType: imageContent.mimeType,
280
+ });
281
+ continue;
282
+ }
283
+ }
284
+ // Check if this is an Office document file
285
+ if (this.isOfficeFile(fullPath)) {
286
+ const docContent = await readOfficeDocument(fullPath);
287
+ if (docContent) {
288
+ // Add text description first
289
+ multimodalContent.push({
290
+ type: 'text',
291
+ text: `📄 ${docContent.fileType.toUpperCase()} Document: ${file}`,
292
+ });
293
+ // Add document content
294
+ multimodalContent.push(docContent);
295
+ filesData.push({
296
+ path: file,
297
+ isDocument: true,
298
+ fileType: docContent.fileType,
299
+ });
300
+ continue;
301
+ }
302
+ }
208
303
  const content = await fs.readFile(fullPath, 'utf-8');
209
304
  const lines = content.split('\n');
210
305
  const totalLines = lines.length;
@@ -244,7 +339,10 @@ export class FilesystemMCPService {
244
339
  if (notebookInfo) {
245
340
  fileContent += notebookInfo;
246
341
  }
247
- allContents.push(fileContent);
342
+ multimodalContent.push({
343
+ type: 'text',
344
+ text: fileContent,
345
+ });
248
346
  filesData.push({
249
347
  path: file,
250
348
  startLine: start,
@@ -256,11 +354,14 @@ export class FilesystemMCPService {
256
354
  const errorMsg = error instanceof Error ? error.message : 'Unknown error';
257
355
  // Extract file path for error message
258
356
  const filePath = typeof fileItem === 'string' ? fileItem : fileItem.path;
259
- allContents.push(`❌ ${filePath}: ${errorMsg}`);
357
+ multimodalContent.push({
358
+ type: 'text',
359
+ text: `❌ ${filePath}: ${errorMsg}`,
360
+ });
260
361
  }
261
362
  }
262
363
  return {
263
- content: allContents.join('\n\n'),
364
+ content: multimodalContent,
264
365
  files: filesData,
265
366
  totalFiles: filePath.length,
266
367
  };
@@ -284,6 +385,41 @@ export class FilesystemMCPService {
284
385
  totalLines: lines.length,
285
386
  };
286
387
  }
388
+ // Check if this is an image file
389
+ if (this.isImageFile(fullPath)) {
390
+ const imageContent = await this.readImageAsBase64(fullPath);
391
+ if (imageContent) {
392
+ return {
393
+ content: [
394
+ {
395
+ type: 'text',
396
+ text: `🖼️ Image: ${filePath} (${imageContent.mimeType})`,
397
+ },
398
+ imageContent,
399
+ ],
400
+ isImage: true,
401
+ mimeType: imageContent.mimeType,
402
+ };
403
+ }
404
+ }
405
+ // Check if this is an Office document file
406
+ if (this.isOfficeFile(fullPath)) {
407
+ const docContent = await readOfficeDocument(fullPath);
408
+ if (docContent) {
409
+ return {
410
+ content: [
411
+ {
412
+ type: 'text',
413
+ text: `📄 ${docContent.fileType.toUpperCase()} Document: ${filePath}`,
414
+ },
415
+ docContent,
416
+ ],
417
+ isDocument: true,
418
+ fileType: docContent.fileType,
419
+ };
420
+ }
421
+ }
422
+ // Text file processing
287
423
  const content = await fs.readFile(fullPath, 'utf-8');
288
424
  // Parse lines
289
425
  const lines = content.split('\n');
@@ -1090,7 +1226,7 @@ export const filesystemService = new FilesystemMCPService();
1090
1226
  export const mcpTools = [
1091
1227
  {
1092
1228
  name: 'filesystem-read',
1093
- description: 'Read file content with line numbers. **Read only when the actual file or folder path is found or provided by the user, do not make random guesses,Search for specific documents or line numbers before reading more accurately** **SUPPORTS MULTIPLE FILES WITH FLEXIBLE LINE RANGES**: Pass either (1) a single file path (string), (2) array of file paths (strings) with unified startLine/endLine, or (3) array of file config objects with per-file line ranges. **INTEGRATED DIRECTORY LISTING**: When filePath is a directory, automatically lists its contents instead of throwing error. ⚠️ **IMPORTANT WORKFLOW**: (1) ALWAYS use ACE search tools FIRST (ace-text_search/ace-search_symbols/ace-file_outline) to locate the relevant code, (2) ONLY use filesystem-read when you know the approximate location and need precise line numbers for editing. **ANTI-PATTERN**: Reading files line-by-line from the top wastes tokens - use search instead! **USAGE**: Call without parameters to read entire file(s), or specify startLine/endLine for partial reads. Returns content with line numbers (format: "123→code") for precise editing. **EXAMPLES**: (A) Unified: filePath=["a.ts", "b.ts"], startLine=1, endLine=500 reads lines 1-500 from both. (B) Per-file: filePath=[{path:"a.ts", startLine:1, endLine:300}, {path:"b.ts", startLine:100, endLine:550}] reads different ranges from each file. (C) Directory: filePath="./src" returns list of files in src/.',
1229
+ description: 'Read file content with line numbers and multimodal support (text + images + Office documents). **MULTIMODAL SUPPORT**: Automatically detects and processes: (1) Image files (.png, .jpg, .jpeg, .gif, .webp, .bmp, .svg) - returns base64-encoded image data, (2) Office documents (.pdf, .docx, .doc, .xlsx, .xls, .pptx, .ppt) - extracts and returns readable text content. All returned in MCP content format for AI analysis. **Read only when the actual file or folder path is found or provided by the user, do not make random guesses,Search for specific documents or line numbers before reading more accurately** **SUPPORTS MULTIPLE FILES WITH FLEXIBLE LINE RANGES**: Pass either (1) a single file path (string), (2) array of file paths (strings) with unified startLine/endLine, or (3) array of file config objects with per-file line ranges. **INTEGRATED DIRECTORY LISTING**: When filePath is a directory, automatically lists its contents instead of throwing error. ⚠️ **IMPORTANT WORKFLOW**: (1) ALWAYS use ACE search tools FIRST (ace-text_search/ace-search_symbols/ace-file_outline) to locate the relevant code, (2) ONLY use filesystem-read when you know the approximate location and need precise line numbers for editing. **ANTI-PATTERN**: Reading files line-by-line from the top wastes tokens - use search instead! **USAGE**: Call without parameters to read entire file(s), or specify startLine/endLine for partial reads. Returns content with line numbers (format: "123→code") for text files or multimodal content array for images/documents. **EXAMPLES**: (A) Unified: filePath=["a.ts", "b.ts"], startLine=1, endLine=500 reads lines 1-500 from both. (B) Per-file: filePath=[{path:"a.ts", startLine:1, endLine:300}, {path:"b.ts", startLine:100, endLine:550}] reads different ranges from each file. (C) Directory: filePath="./src" returns list of files in src/. (D) Image: filePath="screenshot.png" returns multimodal content with base64 image data. (E) Office: filePath="report.pdf" or "data.xlsx" extracts and returns document text.',
1094
1230
  inputSchema: {
1095
1231
  type: 'object',
1096
1232
  properties: {
@@ -2,6 +2,51 @@
2
2
  * Type definitions for Filesystem MCP Service
3
3
  */
4
4
  import type { Diagnostic } from '../../utils/vscodeConnection.js';
5
+ /**
6
+ * MCP Content Types - supports multimodal content
7
+ */
8
+ export type MCPContentType = 'text' | 'image' | 'document';
9
+ /**
10
+ * Text content block
11
+ */
12
+ export interface TextContent {
13
+ type: 'text';
14
+ text: string;
15
+ }
16
+ /**
17
+ * Image content block (base64 encoded)
18
+ */
19
+ export interface ImageContent {
20
+ type: 'image';
21
+ data: string;
22
+ mimeType: string;
23
+ }
24
+ /**
25
+ * Document content block (for Office files like PDF, Word, Excel, PPT)
26
+ */
27
+ export interface DocumentContent {
28
+ type: 'document';
29
+ text: string;
30
+ fileType: 'pdf' | 'word' | 'excel' | 'powerpoint';
31
+ metadata?: {
32
+ pages?: number;
33
+ sheets?: string[];
34
+ slides?: number;
35
+ [key: string]: unknown;
36
+ };
37
+ }
38
+ /**
39
+ * Multimodal content - array of text, image, and document blocks
40
+ */
41
+ export type MultimodalContent = Array<TextContent | ImageContent | DocumentContent>;
42
+ /**
43
+ * Supported image MIME types
44
+ */
45
+ export declare const IMAGE_MIME_TYPES: Record<string, string>;
46
+ /**
47
+ * Supported Office document types
48
+ */
49
+ export declare const OFFICE_FILE_TYPES: Record<string, 'pdf' | 'word' | 'excel' | 'powerpoint'>;
5
50
  /**
6
51
  * Structure analysis result for code validation
7
52
  */
@@ -51,6 +96,36 @@ export interface FileReadConfig {
51
96
  startLine?: number;
52
97
  endLine?: number;
53
98
  }
99
+ /**
100
+ * Single file read result
101
+ */
102
+ export interface SingleFileReadResult {
103
+ content: string | MultimodalContent;
104
+ startLine?: number;
105
+ endLine?: number;
106
+ totalLines?: number;
107
+ isImage?: boolean;
108
+ isDocument?: boolean;
109
+ fileType?: 'pdf' | 'word' | 'excel' | 'powerpoint';
110
+ mimeType?: string;
111
+ }
112
+ /**
113
+ * Multiple files read result
114
+ */
115
+ export interface MultipleFilesReadResult {
116
+ content: string | MultimodalContent;
117
+ files: Array<{
118
+ path: string;
119
+ startLine?: number;
120
+ endLine?: number;
121
+ totalLines?: number;
122
+ isImage?: boolean;
123
+ isDocument?: boolean;
124
+ fileType?: 'pdf' | 'word' | 'excel' | 'powerpoint';
125
+ mimeType?: string;
126
+ }>;
127
+ totalFiles: number;
128
+ }
54
129
  /**
55
130
  * Edit by search configuration
56
131
  */
@@ -1,4 +1,27 @@
1
1
  /**
2
2
  * Type definitions for Filesystem MCP Service
3
3
  */
4
- export {};
4
+ /**
5
+ * Supported image MIME types
6
+ */
7
+ export const IMAGE_MIME_TYPES = {
8
+ '.png': 'image/png',
9
+ '.jpg': 'image/jpeg',
10
+ '.jpeg': 'image/jpeg',
11
+ '.gif': 'image/gif',
12
+ '.webp': 'image/webp',
13
+ '.bmp': 'image/bmp',
14
+ '.svg': 'image/svg+xml',
15
+ };
16
+ /**
17
+ * Supported Office document types
18
+ */
19
+ export const OFFICE_FILE_TYPES = {
20
+ '.pdf': 'pdf',
21
+ '.docx': 'word',
22
+ '.doc': 'word',
23
+ '.xlsx': 'excel',
24
+ '.xls': 'excel',
25
+ '.pptx': 'powerpoint',
26
+ '.ppt': 'powerpoint',
27
+ };
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Office file parsing utilities
3
+ * Handles parsing of PDF, Word, Excel, and PowerPoint files
4
+ */
5
+ import type { DocumentContent } from '../../types/filesystem.types.js';
6
+ /**
7
+ * Parse Word document (.docx, .doc)
8
+ * @param fullPath - Full path to the Word document
9
+ * @returns DocumentContent object with extracted text
10
+ */
11
+ export declare function parseWordDocument(fullPath: string): Promise<DocumentContent | null>;
12
+ /**
13
+ * Parse PDF document
14
+ * @param fullPath - Full path to the PDF file
15
+ * @returns DocumentContent object with extracted text
16
+ */
17
+ export declare function parsePDFDocument(fullPath: string): Promise<DocumentContent | null>;
18
+ /**
19
+ * Parse Excel spreadsheet (.xlsx, .xls)
20
+ * @param fullPath - Full path to the Excel file
21
+ * @returns DocumentContent object with extracted text
22
+ */
23
+ export declare function parseExcelDocument(fullPath: string): Promise<DocumentContent | null>;
24
+ /**
25
+ * Parse PowerPoint presentation (.pptx, .ppt)
26
+ * Note: PowerPoint parsing is complex and requires unzipping the .pptx file
27
+ * This is a placeholder implementation
28
+ * @param fullPath - Full path to the PowerPoint file
29
+ * @returns DocumentContent object with extracted text
30
+ */
31
+ export declare function parsePowerPointDocument(fullPath: string): Promise<DocumentContent | null>;
32
+ /**
33
+ * Get Office file type based on extension
34
+ * @param filePath - Path to the file
35
+ * @returns File type or undefined
36
+ */
37
+ export declare function getOfficeFileType(filePath: string): 'pdf' | 'word' | 'excel' | 'powerpoint' | undefined;
38
+ /**
39
+ * Main entry point: Read and parse Office document
40
+ * @param fullPath - Full path to the Office document
41
+ * @returns DocumentContent object with extracted text
42
+ */
43
+ export declare function readOfficeDocument(fullPath: string): Promise<DocumentContent | null>;
@@ -0,0 +1,163 @@
1
+ /**
2
+ * Office file parsing utilities
3
+ * Handles parsing of PDF, Word, Excel, and PowerPoint files
4
+ */
5
+ import { promises as fs } from 'fs';
6
+ import mammoth from 'mammoth';
7
+ import * as XLSX from 'xlsx';
8
+ import { OFFICE_FILE_TYPES } from '../../types/filesystem.types.js';
9
+ import * as path from 'path';
10
+ /**
11
+ * Parse Word document (.docx, .doc)
12
+ * @param fullPath - Full path to the Word document
13
+ * @returns DocumentContent object with extracted text
14
+ */
15
+ export async function parseWordDocument(fullPath) {
16
+ try {
17
+ const buffer = await fs.readFile(fullPath);
18
+ const result = await mammoth.extractRawText({ buffer });
19
+ return {
20
+ type: 'document',
21
+ text: result.value,
22
+ fileType: 'word',
23
+ metadata: {
24
+ messages: result.messages.length > 0 ? result.messages : undefined,
25
+ },
26
+ };
27
+ }
28
+ catch (error) {
29
+ console.error(`Failed to parse Word document ${fullPath}:`, error);
30
+ return null;
31
+ }
32
+ }
33
+ /**
34
+ * Parse PDF document
35
+ * @param fullPath - Full path to the PDF file
36
+ * @returns DocumentContent object with extracted text
37
+ */
38
+ export async function parsePDFDocument(fullPath) {
39
+ try {
40
+ // Lazy import pdf-parse to avoid loading pdfjs-dist at startup
41
+ // pdfjs-dist requires browser APIs (DOMMatrix) which causes errors in Node.js
42
+ const { PDFParse } = await import('pdf-parse');
43
+ const buffer = await fs.readFile(fullPath);
44
+ const uint8Array = new Uint8Array(buffer);
45
+ // Create parser instance and parse
46
+ const parser = new PDFParse({ data: uint8Array });
47
+ const data = await parser.getText();
48
+ return {
49
+ type: 'document',
50
+ text: data.text,
51
+ fileType: 'pdf',
52
+ metadata: {
53
+ pages: data.total,
54
+ },
55
+ };
56
+ }
57
+ catch (error) {
58
+ console.error(`Failed to parse PDF document ${fullPath}:`, error);
59
+ return null;
60
+ }
61
+ }
62
+ /**
63
+ * Parse Excel spreadsheet (.xlsx, .xls)
64
+ * @param fullPath - Full path to the Excel file
65
+ * @returns DocumentContent object with extracted text
66
+ */
67
+ export async function parseExcelDocument(fullPath) {
68
+ try {
69
+ const buffer = await fs.readFile(fullPath);
70
+ const workbook = XLSX.read(buffer, { type: 'buffer' });
71
+ const sheets = [];
72
+ let allText = '';
73
+ workbook.SheetNames.forEach(sheetName => {
74
+ sheets.push(sheetName);
75
+ const worksheet = workbook.Sheets[sheetName];
76
+ if (worksheet) {
77
+ const sheetText = XLSX.utils.sheet_to_txt(worksheet);
78
+ allText += `\n\n=== Sheet: ${sheetName} ===\n${sheetText}`;
79
+ }
80
+ });
81
+ return {
82
+ type: 'document',
83
+ text: allText.trim(),
84
+ fileType: 'excel',
85
+ metadata: {
86
+ sheets,
87
+ sheetCount: sheets.length,
88
+ },
89
+ };
90
+ }
91
+ catch (error) {
92
+ console.error(`Failed to parse Excel document ${fullPath}:`, error);
93
+ return null;
94
+ }
95
+ }
96
+ /**
97
+ * Parse PowerPoint presentation (.pptx, .ppt)
98
+ * Note: PowerPoint parsing is complex and requires unzipping the .pptx file
99
+ * This is a placeholder implementation
100
+ * @param fullPath - Full path to the PowerPoint file
101
+ * @returns DocumentContent object with extracted text
102
+ */
103
+ export async function parsePowerPointDocument(fullPath) {
104
+ try {
105
+ // PowerPoint parsing requires extracting and parsing XML from the .pptx archive
106
+ // A full implementation would use JSZip to extract slide XML files
107
+ // and parse them to extract text content
108
+ // For now, return a placeholder message
109
+ return {
110
+ type: 'document',
111
+ text: '[PowerPoint parsing not fully implemented yet. Please use a specialized tool to extract text from .pptx files.]',
112
+ fileType: 'powerpoint',
113
+ metadata: {
114
+ note: 'PowerPoint text extraction requires additional implementation',
115
+ suggestion: 'Consider using external tools or libraries like python-pptx for full PowerPoint text extraction',
116
+ },
117
+ };
118
+ }
119
+ catch (error) {
120
+ console.error(`Failed to parse PowerPoint document ${fullPath}:`, error);
121
+ return null;
122
+ }
123
+ }
124
+ /**
125
+ * Get Office file type based on extension
126
+ * @param filePath - Path to the file
127
+ * @returns File type or undefined
128
+ */
129
+ export function getOfficeFileType(filePath) {
130
+ const ext = path.extname(filePath).toLowerCase();
131
+ return OFFICE_FILE_TYPES[ext];
132
+ }
133
+ /**
134
+ * Main entry point: Read and parse Office document
135
+ * @param fullPath - Full path to the Office document
136
+ * @returns DocumentContent object with extracted text
137
+ */
138
+ export async function readOfficeDocument(fullPath) {
139
+ const fileType = getOfficeFileType(fullPath);
140
+ if (!fileType) {
141
+ return null;
142
+ }
143
+ let docContent = null;
144
+ switch (fileType) {
145
+ case 'word': {
146
+ docContent = await parseWordDocument(fullPath);
147
+ break;
148
+ }
149
+ case 'pdf': {
150
+ docContent = await parsePDFDocument(fullPath);
151
+ break;
152
+ }
153
+ case 'excel': {
154
+ docContent = await parseExcelDocument(fullPath);
155
+ break;
156
+ }
157
+ case 'powerpoint': {
158
+ docContent = await parsePowerPointDocument(fullPath);
159
+ break;
160
+ }
161
+ }
162
+ return docContent;
163
+ }
@@ -472,6 +472,10 @@ export default function ChatScreen({ skipWelcome }) {
472
472
  }
473
473
  });
474
474
  const handleHistorySelect = async (selectedIndex, message, images) => {
475
+ // Clear context percentage and usage when user performs history rollback
476
+ setCurrentContextPercentage(0);
477
+ currentContextPercentageRef.current = 0;
478
+ streamingState.setContextUsage(null);
475
479
  // Count total files that will be rolled back (from selectedIndex onwards)
476
480
  let totalFileCount = 0;
477
481
  for (const [index, count] of snapshotState.snapshotFileCount.entries()) {
@@ -1,5 +1,6 @@
1
1
  import type { SubAgentMessage } from './subAgentExecutor.js';
2
2
  import type { ConfirmationResult } from '../ui/components/ToolConfirmation.js';
3
+ import type { ImageContent } from '../api/types.js';
3
4
  export interface ToolCall {
4
5
  id: string;
5
6
  type: 'function';
@@ -12,6 +13,7 @@ export interface ToolResult {
12
13
  tool_call_id: string;
13
14
  role: 'tool';
14
15
  content: string;
16
+ images?: ImageContent[];
15
17
  }
16
18
  export type SubAgentMessageCallback = (message: SubAgentMessage) => void;
17
19
  export interface ToolConfirmationCallback {
@@ -1,5 +1,59 @@
1
1
  import { executeMCPTool } from './mcpToolsManager.js';
2
2
  import { subAgentService } from '../mcp/subagent.js';
3
+ /**
4
+ * Check if a value is a multimodal content array
5
+ */
6
+ function isMultimodalContent(value) {
7
+ return (Array.isArray(value) &&
8
+ value.length > 0 &&
9
+ value.every((item) => item &&
10
+ typeof item === 'object' &&
11
+ (item.type === 'text' || item.type === 'image')));
12
+ }
13
+ /**
14
+ * Extract images and text content from a result that may be multimodal
15
+ */
16
+ function extractMultimodalContent(result) {
17
+ // Check if result has multimodal content array
18
+ let contentToCheck = result;
19
+ // Handle wrapped results (e.g., {content: [...], files: [...], totalFiles: n})
20
+ if (result && typeof result === 'object' && result.content) {
21
+ contentToCheck = result.content;
22
+ }
23
+ if (isMultimodalContent(contentToCheck)) {
24
+ const textParts = [];
25
+ const images = [];
26
+ for (const item of contentToCheck) {
27
+ if (item.type === 'text') {
28
+ textParts.push(item.text);
29
+ }
30
+ else if (item.type === 'image') {
31
+ images.push({
32
+ type: 'image',
33
+ data: item.data,
34
+ mimeType: item.mimeType,
35
+ });
36
+ }
37
+ }
38
+ // If we extracted the content, we need to rebuild the result
39
+ if (result && typeof result === 'object' && result.content === contentToCheck) {
40
+ // Create a new result object with text content instead of multimodal array
41
+ const newResult = { ...result, content: textParts.join('\n\n') };
42
+ return {
43
+ textContent: JSON.stringify(newResult),
44
+ images: images.length > 0 ? images : undefined,
45
+ };
46
+ }
47
+ return {
48
+ textContent: textParts.join('\n\n'),
49
+ images: images.length > 0 ? images : undefined,
50
+ };
51
+ }
52
+ // Not multimodal, return as JSON string
53
+ return {
54
+ textContent: JSON.stringify(result),
55
+ };
56
+ }
3
57
  /**
4
58
  * Execute a single tool call and return the result
5
59
  */
@@ -48,10 +102,13 @@ export async function executeToolCall(toolCall, abortSignal, onTokenUpdate, onSu
48
102
  }
49
103
  // Regular tool execution
50
104
  const result = await executeMCPTool(toolCall.function.name, args, abortSignal, onTokenUpdate);
105
+ // Extract multimodal content (text + images)
106
+ const { textContent, images } = extractMultimodalContent(result);
51
107
  return {
52
108
  tool_call_id: toolCall.id,
53
109
  role: 'tool',
54
- content: JSON.stringify(result),
110
+ content: textContent,
111
+ images,
55
112
  };
56
113
  }
57
114
  catch (error) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "snow-ai",
3
- "version": "0.4.6",
3
+ "version": "0.4.7",
4
4
  "description": "Intelligent Command Line Assistant powered by AI",
5
5
  "license": "MIT",
6
6
  "bin": {
@@ -45,8 +45,6 @@
45
45
  "better-sqlite3": "^12.4.1",
46
46
  "cli-highlight": "^2.1.11",
47
47
  "diff": "^8.0.2",
48
- "marked": "^15.0.6",
49
- "marked-terminal": "^7.3.0",
50
48
  "fzf": "^0.5.2",
51
49
  "http-proxy-agent": "^7.0.2",
52
50
  "https-proxy-agent": "^7.0.6",
@@ -56,18 +54,25 @@
56
54
  "ink-select-input": "^6.2.0",
57
55
  "ink-spinner": "^5.0.0",
58
56
  "ink-text-input": "^6.0.0",
57
+ "mammoth": "^1.11.0",
58
+ "marked": "^15.0.6",
59
+ "marked-terminal": "^7.3.0",
59
60
  "meow": "^11.0.0",
61
+ "pdf-parse": "^2.4.5",
62
+ "pptx-parser": "^1.1.7-beta.9",
60
63
  "prettier": "^2.8.7",
61
64
  "puppeteer-core": "^24.25.0",
62
65
  "react": "^18.2.0",
63
66
  "string-width": "^7.2.0",
64
67
  "tiktoken": "^1.0.22",
65
- "ws": "^8.14.2"
68
+ "ws": "^8.14.2",
69
+ "xlsx": "^0.18.5"
66
70
  },
67
71
  "devDependencies": {
68
72
  "@sindresorhus/tsconfig": "^3.0.1",
69
73
  "@types/diff": "^7.0.2",
70
74
  "@types/marked-terminal": "^6.1.1",
75
+ "@types/pdf-parse": "^1.1.5",
71
76
  "@types/prettier": "^2.7.3",
72
77
  "@types/react": "^18.0.32",
73
78
  "@types/ws": "^8.5.8",