massgen 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of massgen might be problematic. Click here for more details.
- massgen/__init__.py +1 -1
- massgen/agent_config.py +33 -7
- massgen/api_params_handler/_api_params_handler_base.py +3 -0
- massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
- massgen/api_params_handler/_claude_api_params_handler.py +4 -0
- massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
- massgen/api_params_handler/_response_api_params_handler.py +4 -0
- massgen/backend/azure_openai.py +9 -1
- massgen/backend/base.py +4 -0
- massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
- massgen/backend/claude_code.py +9 -1
- massgen/backend/docs/permissions_and_context_files.md +2 -2
- massgen/backend/gemini.py +35 -6
- massgen/backend/gemini_utils.py +30 -0
- massgen/backend/response.py +2 -0
- massgen/chat_agent.py +9 -3
- massgen/cli.py +291 -43
- massgen/config_builder.py +163 -18
- massgen/configs/README.md +69 -14
- massgen/configs/debug/restart_test_controlled.yaml +60 -0
- massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
- massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
- massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
- massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
- massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
- massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
- massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
- massgen/docker/README.md +83 -0
- massgen/filesystem_manager/_code_execution_server.py +22 -7
- massgen/filesystem_manager/_docker_manager.py +21 -1
- massgen/filesystem_manager/_filesystem_manager.py +9 -0
- massgen/filesystem_manager/_path_permission_manager.py +148 -0
- massgen/filesystem_manager/_workspace_tools_server.py +0 -997
- massgen/formatter/_gemini_formatter.py +73 -0
- massgen/frontend/coordination_ui.py +175 -257
- massgen/frontend/displays/base_display.py +29 -0
- massgen/frontend/displays/rich_terminal_display.py +155 -9
- massgen/frontend/displays/simple_display.py +21 -0
- massgen/frontend/displays/terminal_display.py +22 -2
- massgen/logger_config.py +50 -6
- massgen/message_templates.py +283 -15
- massgen/orchestrator.py +335 -38
- massgen/tests/test_binary_file_blocking.py +274 -0
- massgen/tests/test_case_studies.md +12 -12
- massgen/tests/test_code_execution.py +178 -0
- massgen/tests/test_multimodal_size_limits.py +407 -0
- massgen/tests/test_orchestration_restart.py +204 -0
- massgen/tool/__init__.py +4 -0
- massgen/tool/_manager.py +7 -2
- massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
- massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
- massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
- massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
- massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
- massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
- massgen/tool/_multimodal_tools/understand_audio.py +211 -0
- massgen/tool/_multimodal_tools/understand_file.py +555 -0
- massgen/tool/_multimodal_tools/understand_image.py +316 -0
- massgen/tool/_multimodal_tools/understand_video.py +340 -0
- massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
- massgen/tool/docs/multimodal_tools.md +1368 -0
- massgen/tool/workflow_toolkits/__init__.py +26 -0
- massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
- massgen/utils.py +1 -0
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/METADATA +101 -69
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/RECORD +82 -46
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/WHEEL +0 -0
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/entry_points.txt +0 -0
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,555 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Understand and analyze file contents using OpenAI's gpt-4.1 API.
|
|
4
|
+
Supports text files, PDF, DOCX, XLSX, and more.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import List, Optional, Tuple
|
|
11
|
+
|
|
12
|
+
from dotenv import load_dotenv
|
|
13
|
+
from openai import OpenAI
|
|
14
|
+
|
|
15
|
+
from massgen.tool._result import ExecutionResult, TextContent
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _validate_path_access(path: Path, allowed_paths: Optional[List[Path]] = None) -> None:
|
|
19
|
+
"""
|
|
20
|
+
Validate that a path is within allowed directories.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
path: Path to validate
|
|
24
|
+
allowed_paths: List of allowed base paths (optional)
|
|
25
|
+
|
|
26
|
+
Raises:
|
|
27
|
+
ValueError: If path is not within allowed directories
|
|
28
|
+
"""
|
|
29
|
+
if not allowed_paths:
|
|
30
|
+
return # No restrictions
|
|
31
|
+
|
|
32
|
+
for allowed_path in allowed_paths:
|
|
33
|
+
try:
|
|
34
|
+
path.relative_to(allowed_path)
|
|
35
|
+
return # Path is within this allowed directory
|
|
36
|
+
except ValueError:
|
|
37
|
+
continue
|
|
38
|
+
|
|
39
|
+
raise ValueError(f"Path not in allowed directories: {path}")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _extract_text_from_pdf(file_path: Path) -> Tuple[str, str]:
|
|
43
|
+
"""
|
|
44
|
+
Extract text from a PDF file.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
file_path: Path to the PDF file
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
Tuple of (extracted_text, error_message)
|
|
51
|
+
If successful, error_message is empty string
|
|
52
|
+
"""
|
|
53
|
+
try:
|
|
54
|
+
import PyPDF2
|
|
55
|
+
except ImportError:
|
|
56
|
+
return "", "PyPDF2 is required for PDF files. Install it with: pip install PyPDF2"
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
text_content = []
|
|
60
|
+
with open(file_path, "rb") as pdf_file:
|
|
61
|
+
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
|
62
|
+
num_pages = len(pdf_reader.pages)
|
|
63
|
+
|
|
64
|
+
for page_num in range(num_pages):
|
|
65
|
+
page = pdf_reader.pages[page_num]
|
|
66
|
+
text = page.extract_text()
|
|
67
|
+
if text.strip():
|
|
68
|
+
text_content.append(f"--- Page {page_num + 1} ---\n{text}")
|
|
69
|
+
|
|
70
|
+
if not text_content:
|
|
71
|
+
return "", "PDF file appears to be empty or contains only images"
|
|
72
|
+
|
|
73
|
+
return "\n\n".join(text_content), ""
|
|
74
|
+
|
|
75
|
+
except Exception as e:
|
|
76
|
+
return "", f"Failed to extract text from PDF: {str(e)}"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _extract_text_from_docx(file_path: Path) -> Tuple[str, str]:
|
|
80
|
+
"""
|
|
81
|
+
Extract text from a DOCX file.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
file_path: Path to the DOCX file
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
Tuple of (extracted_text, error_message)
|
|
88
|
+
If successful, error_message is empty string
|
|
89
|
+
"""
|
|
90
|
+
try:
|
|
91
|
+
from docx import Document
|
|
92
|
+
except ImportError:
|
|
93
|
+
return "", "python-docx is required for DOCX files. Install it with: pip install python-docx"
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
doc = Document(file_path)
|
|
97
|
+
text_content = []
|
|
98
|
+
|
|
99
|
+
# Extract paragraphs
|
|
100
|
+
for para in doc.paragraphs:
|
|
101
|
+
if para.text.strip():
|
|
102
|
+
text_content.append(para.text)
|
|
103
|
+
|
|
104
|
+
# Extract tables
|
|
105
|
+
for table in doc.tables:
|
|
106
|
+
for row in table.rows:
|
|
107
|
+
row_text = " | ".join(cell.text for cell in row.cells)
|
|
108
|
+
if row_text.strip():
|
|
109
|
+
text_content.append(row_text)
|
|
110
|
+
|
|
111
|
+
if not text_content:
|
|
112
|
+
return "", "DOCX file appears to be empty"
|
|
113
|
+
|
|
114
|
+
return "\n\n".join(text_content), ""
|
|
115
|
+
|
|
116
|
+
except Exception as e:
|
|
117
|
+
return "", f"Failed to extract text from DOCX: {str(e)}"
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _extract_text_from_excel(file_path: Path) -> Tuple[str, str]:
|
|
121
|
+
"""
|
|
122
|
+
Extract text from an Excel file (XLSX/XLS).
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
file_path: Path to the Excel file
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Tuple of (extracted_text, error_message)
|
|
129
|
+
If successful, error_message is empty string
|
|
130
|
+
"""
|
|
131
|
+
try:
|
|
132
|
+
import openpyxl
|
|
133
|
+
except ImportError:
|
|
134
|
+
return "", "openpyxl is required for XLSX files. Install it with: pip install openpyxl"
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
workbook = openpyxl.load_workbook(file_path, data_only=True)
|
|
138
|
+
text_content = []
|
|
139
|
+
|
|
140
|
+
for sheet_name in workbook.sheetnames:
|
|
141
|
+
sheet = workbook[sheet_name]
|
|
142
|
+
text_content.append(f"=== Sheet: {sheet_name} ===\n")
|
|
143
|
+
|
|
144
|
+
for row in sheet.iter_rows(values_only=True):
|
|
145
|
+
# Filter out None values and convert to string
|
|
146
|
+
row_values = [str(cell) if cell is not None else "" for cell in row]
|
|
147
|
+
# Only add non-empty rows
|
|
148
|
+
if any(val.strip() for val in row_values):
|
|
149
|
+
text_content.append(" | ".join(row_values))
|
|
150
|
+
|
|
151
|
+
if len(text_content) <= len(workbook.sheetnames):
|
|
152
|
+
return "", "Excel file appears to be empty"
|
|
153
|
+
|
|
154
|
+
return "\n".join(text_content), ""
|
|
155
|
+
|
|
156
|
+
except Exception as e:
|
|
157
|
+
return "", f"Failed to extract text from Excel: {str(e)}"
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _extract_text_from_pptx(file_path: Path) -> Tuple[str, str]:
|
|
161
|
+
"""
|
|
162
|
+
Extract text from a PowerPoint file (PPTX).
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
file_path: Path to the PPTX file
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Tuple of (extracted_text, error_message)
|
|
169
|
+
If successful, error_message is empty string
|
|
170
|
+
"""
|
|
171
|
+
try:
|
|
172
|
+
from pptx import Presentation
|
|
173
|
+
except ImportError:
|
|
174
|
+
return "", "python-pptx is required for PPTX files. Install it with: pip install python-pptx"
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
prs = Presentation(file_path)
|
|
178
|
+
text_content = []
|
|
179
|
+
|
|
180
|
+
for slide_num, slide in enumerate(prs.slides, 1):
|
|
181
|
+
text_content.append(f"--- Slide {slide_num} ---")
|
|
182
|
+
|
|
183
|
+
for shape in slide.shapes:
|
|
184
|
+
if hasattr(shape, "text") and shape.text.strip():
|
|
185
|
+
text_content.append(shape.text)
|
|
186
|
+
|
|
187
|
+
if len(text_content) <= len(prs.slides):
|
|
188
|
+
return "", "PowerPoint file appears to be empty"
|
|
189
|
+
|
|
190
|
+
return "\n\n".join(text_content), ""
|
|
191
|
+
|
|
192
|
+
except Exception as e:
|
|
193
|
+
return "", f"Failed to extract text from PowerPoint: {str(e)}"
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
async def understand_file(
|
|
197
|
+
file_path: str,
|
|
198
|
+
prompt: str = "Please analyze this file and provide a comprehensive understanding of its content, purpose, and structure.",
|
|
199
|
+
model: str = "gpt-4.1",
|
|
200
|
+
max_chars: int = 50000,
|
|
201
|
+
allowed_paths: Optional[List[str]] = None,
|
|
202
|
+
agent_cwd: Optional[str] = None,
|
|
203
|
+
) -> ExecutionResult:
|
|
204
|
+
"""
|
|
205
|
+
Understand and analyze file contents using OpenAI's gpt-4.1 API.
|
|
206
|
+
|
|
207
|
+
This tool reads a file (text or document format) and processes its content through
|
|
208
|
+
OpenAI's gpt-4.1 API to provide insights, summaries, explanations, or answer questions.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
file_path: Path to the file to analyze
|
|
212
|
+
- Relative path: Resolved relative to workspace
|
|
213
|
+
- Absolute path: Must be within allowed directories
|
|
214
|
+
prompt: Question or instruction about the file (default: asks for comprehensive analysis)
|
|
215
|
+
model: Model to use (default: "gpt-4.1")
|
|
216
|
+
max_chars: Maximum number of characters to read/extract (default: 50000)
|
|
217
|
+
- Prevents processing extremely large files
|
|
218
|
+
- Applies to both text files and extracted content from documents
|
|
219
|
+
allowed_paths: List of allowed base paths for validation (optional)
|
|
220
|
+
agent_cwd: Agent's current working directory (automatically injected, optional)
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
ExecutionResult containing:
|
|
224
|
+
- success: Whether operation succeeded
|
|
225
|
+
- operation: "understand_file"
|
|
226
|
+
- file_path: Path to the analyzed file
|
|
227
|
+
- file_name: Name of the file
|
|
228
|
+
- file_type: Extraction method used ("text", "pdf", "docx", "excel", "pptx")
|
|
229
|
+
- file_size: Size of the file in bytes
|
|
230
|
+
- chars_read: Number of characters read/extracted
|
|
231
|
+
- truncated: Whether content was truncated
|
|
232
|
+
- prompt: The prompt used
|
|
233
|
+
- model: Model used for analysis
|
|
234
|
+
- response: The model's understanding/analysis of the file
|
|
235
|
+
|
|
236
|
+
Examples:
|
|
237
|
+
# Text and code files
|
|
238
|
+
understand_file("script.py")
|
|
239
|
+
→ Returns analysis of the Python script
|
|
240
|
+
|
|
241
|
+
understand_file("README.md", "Summarize the key points of this documentation")
|
|
242
|
+
→ Returns summary of documentation
|
|
243
|
+
|
|
244
|
+
# PDF documents
|
|
245
|
+
understand_file("report.pdf", "What are the main findings in this research paper?")
|
|
246
|
+
→ Extracts text from PDF and analyzes it
|
|
247
|
+
|
|
248
|
+
# Word documents
|
|
249
|
+
understand_file("proposal.docx", "Summarize this business proposal")
|
|
250
|
+
→ Extracts text from DOCX and summarizes
|
|
251
|
+
|
|
252
|
+
# Excel spreadsheets
|
|
253
|
+
understand_file("data.xlsx", "What patterns can you see in this data?")
|
|
254
|
+
→ Extracts data from Excel and analyzes
|
|
255
|
+
|
|
256
|
+
# PowerPoint presentations
|
|
257
|
+
understand_file("presentation.pptx", "What are the key points of this presentation?")
|
|
258
|
+
→ Extracts text from slides and summarizes
|
|
259
|
+
|
|
260
|
+
Security:
|
|
261
|
+
- Requires valid OpenAI API key
|
|
262
|
+
- File must exist and be readable
|
|
263
|
+
- File content is sent to OpenAI API
|
|
264
|
+
|
|
265
|
+
Supported File Types:
|
|
266
|
+
Text Files:
|
|
267
|
+
- Code: .py, .js, .java, .cpp, .c, .go, .rs, .ts, .tsx, .jsx, etc.
|
|
268
|
+
- Config: .md, .yaml, .yml, .json, .xml, .toml, .ini, etc.
|
|
269
|
+
- Data: .txt, .log, .csv, .tsv, etc.
|
|
270
|
+
|
|
271
|
+
Document Files (require additional packages):
|
|
272
|
+
- PDF: .pdf (requires PyPDF2: pip install PyPDF2)
|
|
273
|
+
- Word: .docx (requires python-docx: pip install python-docx)
|
|
274
|
+
- Excel: .xlsx (requires openpyxl: pip install openpyxl)
|
|
275
|
+
- PowerPoint: .pptx (requires python-pptx: pip install python-pptx)
|
|
276
|
+
|
|
277
|
+
Note:
|
|
278
|
+
- Old Office formats (.doc, .xls, .ppt) are not supported
|
|
279
|
+
- For images, use understand_image tool
|
|
280
|
+
- For videos, use understand_video tool
|
|
281
|
+
- For audio, use generate_text_with_input_audio tool
|
|
282
|
+
"""
|
|
283
|
+
try:
|
|
284
|
+
# Convert allowed_paths from strings to Path objects
|
|
285
|
+
allowed_paths_list = [Path(p) for p in allowed_paths] if allowed_paths else None
|
|
286
|
+
|
|
287
|
+
# Load environment variables
|
|
288
|
+
script_dir = Path(__file__).parent.parent.parent.parent
|
|
289
|
+
env_path = script_dir / ".env"
|
|
290
|
+
if env_path.exists():
|
|
291
|
+
load_dotenv(env_path)
|
|
292
|
+
else:
|
|
293
|
+
load_dotenv()
|
|
294
|
+
|
|
295
|
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
296
|
+
|
|
297
|
+
if not openai_api_key:
|
|
298
|
+
result = {
|
|
299
|
+
"success": False,
|
|
300
|
+
"operation": "understand_file",
|
|
301
|
+
"error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
|
|
302
|
+
}
|
|
303
|
+
return ExecutionResult(
|
|
304
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# Initialize OpenAI client
|
|
308
|
+
client = OpenAI(api_key=openai_api_key)
|
|
309
|
+
|
|
310
|
+
# Resolve file path
|
|
311
|
+
# Use agent_cwd if available, otherwise fall back to Path.cwd()
|
|
312
|
+
base_dir = Path(agent_cwd) if agent_cwd else Path.cwd()
|
|
313
|
+
|
|
314
|
+
if Path(file_path).is_absolute():
|
|
315
|
+
f_path = Path(file_path).resolve()
|
|
316
|
+
else:
|
|
317
|
+
f_path = (base_dir / file_path).resolve()
|
|
318
|
+
|
|
319
|
+
# Validate file path
|
|
320
|
+
_validate_path_access(f_path, allowed_paths_list)
|
|
321
|
+
|
|
322
|
+
if not f_path.exists():
|
|
323
|
+
result = {
|
|
324
|
+
"success": False,
|
|
325
|
+
"operation": "understand_file",
|
|
326
|
+
"error": f"File does not exist: {f_path}",
|
|
327
|
+
}
|
|
328
|
+
return ExecutionResult(
|
|
329
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
if not f_path.is_file():
|
|
333
|
+
result = {
|
|
334
|
+
"success": False,
|
|
335
|
+
"operation": "understand_file",
|
|
336
|
+
"error": f"Path is not a file: {f_path}",
|
|
337
|
+
}
|
|
338
|
+
return ExecutionResult(
|
|
339
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# Get file size
|
|
343
|
+
file_size = f_path.stat().st_size
|
|
344
|
+
|
|
345
|
+
# Check if file is unsupported binary format
|
|
346
|
+
unsupported_binary_extensions = {
|
|
347
|
+
".exe",
|
|
348
|
+
".bin",
|
|
349
|
+
".dll",
|
|
350
|
+
".so",
|
|
351
|
+
".dylib",
|
|
352
|
+
".o",
|
|
353
|
+
".a",
|
|
354
|
+
".doc", # Old Word format (use .docx instead)
|
|
355
|
+
".xls", # Old Excel format (use .xlsx instead, though we try to support it)
|
|
356
|
+
".ppt", # Old PowerPoint format (use .pptx instead)
|
|
357
|
+
".zip",
|
|
358
|
+
".tar",
|
|
359
|
+
".gz",
|
|
360
|
+
".bz2",
|
|
361
|
+
".7z",
|
|
362
|
+
".rar",
|
|
363
|
+
".jpg",
|
|
364
|
+
".jpeg",
|
|
365
|
+
".png",
|
|
366
|
+
".gif",
|
|
367
|
+
".bmp",
|
|
368
|
+
".ico",
|
|
369
|
+
".svg",
|
|
370
|
+
".mp3",
|
|
371
|
+
".wav",
|
|
372
|
+
".ogg",
|
|
373
|
+
".flac",
|
|
374
|
+
".aac",
|
|
375
|
+
".mp4",
|
|
376
|
+
".avi",
|
|
377
|
+
".mov",
|
|
378
|
+
".mkv",
|
|
379
|
+
".flv",
|
|
380
|
+
".wmv",
|
|
381
|
+
".pyc",
|
|
382
|
+
".class",
|
|
383
|
+
".jar",
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
file_extension = f_path.suffix.lower()
|
|
387
|
+
|
|
388
|
+
if file_extension in unsupported_binary_extensions:
|
|
389
|
+
result = {
|
|
390
|
+
"success": False,
|
|
391
|
+
"operation": "understand_file",
|
|
392
|
+
"error": f"Unsupported file format: {f_path.suffix}. " f"For images use understand_image, for videos use understand_video, for audio use generate_text_with_input_audio.",
|
|
393
|
+
}
|
|
394
|
+
return ExecutionResult(
|
|
395
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
# Extract content based on file type
|
|
399
|
+
file_content = ""
|
|
400
|
+
extraction_method = "text"
|
|
401
|
+
|
|
402
|
+
# PDF files
|
|
403
|
+
if file_extension == ".pdf":
|
|
404
|
+
extraction_method = "pdf"
|
|
405
|
+
file_content, error = _extract_text_from_pdf(f_path)
|
|
406
|
+
if error:
|
|
407
|
+
result = {
|
|
408
|
+
"success": False,
|
|
409
|
+
"operation": "understand_file",
|
|
410
|
+
"error": error,
|
|
411
|
+
}
|
|
412
|
+
return ExecutionResult(
|
|
413
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
# Word documents
|
|
417
|
+
elif file_extension == ".docx":
|
|
418
|
+
extraction_method = "docx"
|
|
419
|
+
file_content, error = _extract_text_from_docx(f_path)
|
|
420
|
+
if error:
|
|
421
|
+
result = {
|
|
422
|
+
"success": False,
|
|
423
|
+
"operation": "understand_file",
|
|
424
|
+
"error": error,
|
|
425
|
+
}
|
|
426
|
+
return ExecutionResult(
|
|
427
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
# Excel spreadsheets
|
|
431
|
+
elif file_extension in [".xlsx", ".xls"]:
|
|
432
|
+
extraction_method = "excel"
|
|
433
|
+
file_content, error = _extract_text_from_excel(f_path)
|
|
434
|
+
if error:
|
|
435
|
+
result = {
|
|
436
|
+
"success": False,
|
|
437
|
+
"operation": "understand_file",
|
|
438
|
+
"error": error,
|
|
439
|
+
}
|
|
440
|
+
return ExecutionResult(
|
|
441
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
# PowerPoint presentations
|
|
445
|
+
elif file_extension == ".pptx":
|
|
446
|
+
extraction_method = "pptx"
|
|
447
|
+
file_content, error = _extract_text_from_pptx(f_path)
|
|
448
|
+
if error:
|
|
449
|
+
result = {
|
|
450
|
+
"success": False,
|
|
451
|
+
"operation": "understand_file",
|
|
452
|
+
"error": error,
|
|
453
|
+
}
|
|
454
|
+
return ExecutionResult(
|
|
455
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
# Text-based files
|
|
459
|
+
else:
|
|
460
|
+
try:
|
|
461
|
+
with open(f_path, "r", encoding="utf-8") as file:
|
|
462
|
+
file_content = file.read(max_chars)
|
|
463
|
+
|
|
464
|
+
except UnicodeDecodeError:
|
|
465
|
+
# File is likely binary
|
|
466
|
+
result = {
|
|
467
|
+
"success": False,
|
|
468
|
+
"operation": "understand_file",
|
|
469
|
+
"error": f"File appears to be binary and cannot be read as text: {f_path}. Supported binary formats: PDF, DOCX, XLSX, PPTX",
|
|
470
|
+
}
|
|
471
|
+
return ExecutionResult(
|
|
472
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
473
|
+
)
|
|
474
|
+
except Exception as read_error:
|
|
475
|
+
result = {
|
|
476
|
+
"success": False,
|
|
477
|
+
"operation": "understand_file",
|
|
478
|
+
"error": f"Failed to read file: {str(read_error)}",
|
|
479
|
+
}
|
|
480
|
+
return ExecutionResult(
|
|
481
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
# Truncate if necessary (for text files)
|
|
485
|
+
chars_read = len(file_content)
|
|
486
|
+
truncated = False
|
|
487
|
+
|
|
488
|
+
if extraction_method == "text" and chars_read == max_chars and file_size > max_chars:
|
|
489
|
+
truncated = True
|
|
490
|
+
truncation_note = f"\n\n[Note: File was truncated. Read {chars_read} characters out of {file_size} bytes total. Increase max_chars parameter to read more.]"
|
|
491
|
+
file_content += truncation_note
|
|
492
|
+
elif chars_read > max_chars:
|
|
493
|
+
# Truncate extracted content from document formats
|
|
494
|
+
truncated = True
|
|
495
|
+
file_content = file_content[:max_chars]
|
|
496
|
+
truncation_note = f"\n\n[Note: Extracted content was truncated. Showing first {max_chars} characters. Increase max_chars parameter to read more.]"
|
|
497
|
+
file_content += truncation_note
|
|
498
|
+
chars_read = len(file_content)
|
|
499
|
+
|
|
500
|
+
# Build the full prompt with file content
|
|
501
|
+
full_prompt = f"{prompt}\n\nFile: {f_path.name}\nContent:\n```\n{file_content}\n```"
|
|
502
|
+
|
|
503
|
+
try:
|
|
504
|
+
# Call OpenAI API for file understanding
|
|
505
|
+
response = client.responses.create(
|
|
506
|
+
model=model,
|
|
507
|
+
input=[
|
|
508
|
+
{
|
|
509
|
+
"role": "user",
|
|
510
|
+
"content": [
|
|
511
|
+
{"type": "input_text", "text": full_prompt},
|
|
512
|
+
],
|
|
513
|
+
},
|
|
514
|
+
],
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
# Extract response text
|
|
518
|
+
response_text = response.output_text if hasattr(response, "output_text") else str(response.output)
|
|
519
|
+
|
|
520
|
+
result = {
|
|
521
|
+
"success": True,
|
|
522
|
+
"operation": "understand_file",
|
|
523
|
+
"file_path": str(f_path),
|
|
524
|
+
"file_name": f_path.name,
|
|
525
|
+
"file_type": extraction_method,
|
|
526
|
+
"file_size": file_size,
|
|
527
|
+
"chars_read": chars_read,
|
|
528
|
+
"truncated": truncated,
|
|
529
|
+
"prompt": prompt,
|
|
530
|
+
"model": model,
|
|
531
|
+
"response": response_text,
|
|
532
|
+
}
|
|
533
|
+
return ExecutionResult(
|
|
534
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
except Exception as api_error:
|
|
538
|
+
result = {
|
|
539
|
+
"success": False,
|
|
540
|
+
"operation": "understand_file",
|
|
541
|
+
"error": f"OpenAI API error: {str(api_error)}",
|
|
542
|
+
}
|
|
543
|
+
return ExecutionResult(
|
|
544
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
except Exception as e:
|
|
548
|
+
result = {
|
|
549
|
+
"success": False,
|
|
550
|
+
"operation": "understand_file",
|
|
551
|
+
"error": f"Failed to understand file: {str(e)}",
|
|
552
|
+
}
|
|
553
|
+
return ExecutionResult(
|
|
554
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
555
|
+
)
|