massgen 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +33 -7
  3. massgen/api_params_handler/_api_params_handler_base.py +3 -0
  4. massgen/backend/azure_openai.py +9 -1
  5. massgen/backend/base.py +4 -0
  6. massgen/backend/claude_code.py +9 -1
  7. massgen/backend/gemini.py +35 -6
  8. massgen/backend/gemini_utils.py +30 -0
  9. massgen/chat_agent.py +9 -3
  10. massgen/cli.py +291 -43
  11. massgen/config_builder.py +163 -18
  12. massgen/configs/README.md +52 -6
  13. massgen/configs/debug/restart_test_controlled.yaml +60 -0
  14. massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
  15. massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
  16. massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
  17. massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
  18. massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
  19. massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +67 -0
  20. massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +68 -0
  21. massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +98 -0
  22. massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
  23. massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
  24. massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
  25. massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
  26. massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +54 -0
  27. massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
  28. massgen/configs/tools/memory/README.md +199 -0
  29. massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +131 -0
  30. massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +133 -0
  31. massgen/configs/tools/memory/test_context_window_management.py +286 -0
  32. massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +97 -0
  33. massgen/docker/README.md +83 -0
  34. massgen/filesystem_manager/_code_execution_server.py +22 -7
  35. massgen/filesystem_manager/_docker_manager.py +21 -1
  36. massgen/filesystem_manager/_filesystem_manager.py +8 -0
  37. massgen/filesystem_manager/_workspace_tools_server.py +0 -997
  38. massgen/formatter/_gemini_formatter.py +73 -0
  39. massgen/frontend/coordination_ui.py +175 -257
  40. massgen/frontend/displays/base_display.py +29 -0
  41. massgen/frontend/displays/rich_terminal_display.py +155 -9
  42. massgen/frontend/displays/simple_display.py +21 -0
  43. massgen/frontend/displays/terminal_display.py +22 -2
  44. massgen/logger_config.py +50 -6
  45. massgen/message_templates.py +123 -3
  46. massgen/orchestrator.py +319 -38
  47. massgen/tests/test_code_execution.py +178 -0
  48. massgen/tests/test_orchestration_restart.py +204 -0
  49. massgen/tool/__init__.py +4 -0
  50. massgen/tool/_multimodal_tools/understand_audio.py +193 -0
  51. massgen/tool/_multimodal_tools/understand_file.py +550 -0
  52. massgen/tool/_multimodal_tools/understand_image.py +212 -0
  53. massgen/tool/_multimodal_tools/understand_video.py +313 -0
  54. massgen/tool/docs/multimodal_tools.md +779 -0
  55. massgen/tool/workflow_toolkits/__init__.py +26 -0
  56. massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
  57. massgen/utils.py +1 -0
  58. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/METADATA +8 -3
  59. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/RECORD +63 -36
  60. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/WHEEL +0 -0
  61. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/entry_points.txt +0 -0
  62. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/licenses/LICENSE +0 -0
  63. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,550 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Understand and analyze file contents using OpenAI's gpt-4.1 API.
4
+ Supports text files, PDF, DOCX, XLSX, and more.
5
+ """
6
+
7
+ import json
8
+ import os
9
+ from pathlib import Path
10
+ from typing import List, Optional, Tuple
11
+
12
+ from dotenv import load_dotenv
13
+ from openai import OpenAI
14
+
15
+ from massgen.tool._result import ExecutionResult, TextContent
16
+
17
+
18
+ def _validate_path_access(path: Path, allowed_paths: Optional[List[Path]] = None) -> None:
19
+ """
20
+ Validate that a path is within allowed directories.
21
+
22
+ Args:
23
+ path: Path to validate
24
+ allowed_paths: List of allowed base paths (optional)
25
+
26
+ Raises:
27
+ ValueError: If path is not within allowed directories
28
+ """
29
+ if not allowed_paths:
30
+ return # No restrictions
31
+
32
+ for allowed_path in allowed_paths:
33
+ try:
34
+ path.relative_to(allowed_path)
35
+ return # Path is within this allowed directory
36
+ except ValueError:
37
+ continue
38
+
39
+ raise ValueError(f"Path not in allowed directories: {path}")
40
+
41
+
42
+ def _extract_text_from_pdf(file_path: Path) -> Tuple[str, str]:
43
+ """
44
+ Extract text from a PDF file.
45
+
46
+ Args:
47
+ file_path: Path to the PDF file
48
+
49
+ Returns:
50
+ Tuple of (extracted_text, error_message)
51
+ If successful, error_message is empty string
52
+ """
53
+ try:
54
+ import PyPDF2
55
+ except ImportError:
56
+ return "", "PyPDF2 is required for PDF files. Install it with: pip install PyPDF2"
57
+
58
+ try:
59
+ text_content = []
60
+ with open(file_path, "rb") as pdf_file:
61
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
62
+ num_pages = len(pdf_reader.pages)
63
+
64
+ for page_num in range(num_pages):
65
+ page = pdf_reader.pages[page_num]
66
+ text = page.extract_text()
67
+ if text.strip():
68
+ text_content.append(f"--- Page {page_num + 1} ---\n{text}")
69
+
70
+ if not text_content:
71
+ return "", "PDF file appears to be empty or contains only images"
72
+
73
+ return "\n\n".join(text_content), ""
74
+
75
+ except Exception as e:
76
+ return "", f"Failed to extract text from PDF: {str(e)}"
77
+
78
+
79
+ def _extract_text_from_docx(file_path: Path) -> Tuple[str, str]:
80
+ """
81
+ Extract text from a DOCX file.
82
+
83
+ Args:
84
+ file_path: Path to the DOCX file
85
+
86
+ Returns:
87
+ Tuple of (extracted_text, error_message)
88
+ If successful, error_message is empty string
89
+ """
90
+ try:
91
+ from docx import Document
92
+ except ImportError:
93
+ return "", "python-docx is required for DOCX files. Install it with: pip install python-docx"
94
+
95
+ try:
96
+ doc = Document(file_path)
97
+ text_content = []
98
+
99
+ # Extract paragraphs
100
+ for para in doc.paragraphs:
101
+ if para.text.strip():
102
+ text_content.append(para.text)
103
+
104
+ # Extract tables
105
+ for table in doc.tables:
106
+ for row in table.rows:
107
+ row_text = " | ".join(cell.text for cell in row.cells)
108
+ if row_text.strip():
109
+ text_content.append(row_text)
110
+
111
+ if not text_content:
112
+ return "", "DOCX file appears to be empty"
113
+
114
+ return "\n\n".join(text_content), ""
115
+
116
+ except Exception as e:
117
+ return "", f"Failed to extract text from DOCX: {str(e)}"
118
+
119
+
120
+ def _extract_text_from_excel(file_path: Path) -> Tuple[str, str]:
121
+ """
122
+ Extract text from an Excel file (XLSX/XLS).
123
+
124
+ Args:
125
+ file_path: Path to the Excel file
126
+
127
+ Returns:
128
+ Tuple of (extracted_text, error_message)
129
+ If successful, error_message is empty string
130
+ """
131
+ try:
132
+ import openpyxl
133
+ except ImportError:
134
+ return "", "openpyxl is required for XLSX files. Install it with: pip install openpyxl"
135
+
136
+ try:
137
+ workbook = openpyxl.load_workbook(file_path, data_only=True)
138
+ text_content = []
139
+
140
+ for sheet_name in workbook.sheetnames:
141
+ sheet = workbook[sheet_name]
142
+ text_content.append(f"=== Sheet: {sheet_name} ===\n")
143
+
144
+ for row in sheet.iter_rows(values_only=True):
145
+ # Filter out None values and convert to string
146
+ row_values = [str(cell) if cell is not None else "" for cell in row]
147
+ # Only add non-empty rows
148
+ if any(val.strip() for val in row_values):
149
+ text_content.append(" | ".join(row_values))
150
+
151
+ if len(text_content) <= len(workbook.sheetnames):
152
+ return "", "Excel file appears to be empty"
153
+
154
+ return "\n".join(text_content), ""
155
+
156
+ except Exception as e:
157
+ return "", f"Failed to extract text from Excel: {str(e)}"
158
+
159
+
160
+ def _extract_text_from_pptx(file_path: Path) -> Tuple[str, str]:
161
+ """
162
+ Extract text from a PowerPoint file (PPTX).
163
+
164
+ Args:
165
+ file_path: Path to the PPTX file
166
+
167
+ Returns:
168
+ Tuple of (extracted_text, error_message)
169
+ If successful, error_message is empty string
170
+ """
171
+ try:
172
+ from pptx import Presentation
173
+ except ImportError:
174
+ return "", "python-pptx is required for PPTX files. Install it with: pip install python-pptx"
175
+
176
+ try:
177
+ prs = Presentation(file_path)
178
+ text_content = []
179
+
180
+ for slide_num, slide in enumerate(prs.slides, 1):
181
+ text_content.append(f"--- Slide {slide_num} ---")
182
+
183
+ for shape in slide.shapes:
184
+ if hasattr(shape, "text") and shape.text.strip():
185
+ text_content.append(shape.text)
186
+
187
+ if len(text_content) <= len(prs.slides):
188
+ return "", "PowerPoint file appears to be empty"
189
+
190
+ return "\n\n".join(text_content), ""
191
+
192
+ except Exception as e:
193
+ return "", f"Failed to extract text from PowerPoint: {str(e)}"
194
+
195
+
196
+ async def understand_file(
197
+ file_path: str,
198
+ prompt: str = "Please analyze this file and provide a comprehensive understanding of its content, purpose, and structure.",
199
+ model: str = "gpt-4.1",
200
+ max_chars: int = 50000,
201
+ allowed_paths: Optional[List[str]] = None,
202
+ ) -> ExecutionResult:
203
+ """
204
+ Understand and analyze file contents using OpenAI's gpt-4.1 API.
205
+
206
+ This tool reads a file (text or document format) and processes its content through
207
+ OpenAI's gpt-4.1 API to provide insights, summaries, explanations, or answer questions.
208
+
209
+ Args:
210
+ file_path: Path to the file to analyze
211
+ - Relative path: Resolved relative to workspace
212
+ - Absolute path: Must be within allowed directories
213
+ prompt: Question or instruction about the file (default: asks for comprehensive analysis)
214
+ model: Model to use (default: "gpt-4.1")
215
+ max_chars: Maximum number of characters to read/extract (default: 50000)
216
+ - Prevents processing extremely large files
217
+ - Applies to both text files and extracted content from documents
218
+ allowed_paths: List of allowed base paths for validation (optional)
219
+
220
+ Returns:
221
+ ExecutionResult containing:
222
+ - success: Whether operation succeeded
223
+ - operation: "understand_file"
224
+ - file_path: Path to the analyzed file
225
+ - file_name: Name of the file
226
+ - file_type: Extraction method used ("text", "pdf", "docx", "excel", "pptx")
227
+ - file_size: Size of the file in bytes
228
+ - chars_read: Number of characters read/extracted
229
+ - truncated: Whether content was truncated
230
+ - prompt: The prompt used
231
+ - model: Model used for analysis
232
+ - response: The model's understanding/analysis of the file
233
+
234
+ Examples:
235
+ # Text and code files
236
+ understand_file("script.py")
237
+ → Returns analysis of the Python script
238
+
239
+ understand_file("README.md", "Summarize the key points of this documentation")
240
+ → Returns summary of documentation
241
+
242
+ # PDF documents
243
+ understand_file("report.pdf", "What are the main findings in this research paper?")
244
+ → Extracts text from PDF and analyzes it
245
+
246
+ # Word documents
247
+ understand_file("proposal.docx", "Summarize this business proposal")
248
+ → Extracts text from DOCX and summarizes
249
+
250
+ # Excel spreadsheets
251
+ understand_file("data.xlsx", "What patterns can you see in this data?")
252
+ → Extracts data from Excel and analyzes
253
+
254
+ # PowerPoint presentations
255
+ understand_file("presentation.pptx", "What are the key points of this presentation?")
256
+ → Extracts text from slides and summarizes
257
+
258
+ Security:
259
+ - Requires valid OpenAI API key
260
+ - File must exist and be readable
261
+ - File content is sent to OpenAI API
262
+
263
+ Supported File Types:
264
+ Text Files:
265
+ - Code: .py, .js, .java, .cpp, .c, .go, .rs, .ts, .tsx, .jsx, etc.
266
+ - Config: .md, .yaml, .yml, .json, .xml, .toml, .ini, etc.
267
+ - Data: .txt, .log, .csv, .tsv, etc.
268
+
269
+ Document Files (require additional packages):
270
+ - PDF: .pdf (requires PyPDF2: pip install PyPDF2)
271
+ - Word: .docx (requires python-docx: pip install python-docx)
272
+ - Excel: .xlsx (requires openpyxl: pip install openpyxl)
273
+ - PowerPoint: .pptx (requires python-pptx: pip install python-pptx)
274
+
275
+ Note:
276
+ - Old Office formats (.doc, .xls, .ppt) are not supported
277
+ - For images, use understand_image tool
278
+ - For videos, use understand_video tool
279
+ - For audio, use generate_text_with_input_audio tool
280
+ """
281
+ try:
282
+ # Convert allowed_paths from strings to Path objects
283
+ allowed_paths_list = [Path(p) for p in allowed_paths] if allowed_paths else None
284
+
285
+ # Load environment variables
286
+ script_dir = Path(__file__).parent.parent.parent.parent
287
+ env_path = script_dir / ".env"
288
+ if env_path.exists():
289
+ load_dotenv(env_path)
290
+ else:
291
+ load_dotenv()
292
+
293
+ openai_api_key = os.getenv("OPENAI_API_KEY")
294
+
295
+ if not openai_api_key:
296
+ result = {
297
+ "success": False,
298
+ "operation": "understand_file",
299
+ "error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
300
+ }
301
+ return ExecutionResult(
302
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
303
+ )
304
+
305
+ # Initialize OpenAI client
306
+ client = OpenAI(api_key=openai_api_key)
307
+
308
+ # Resolve file path
309
+ if Path(file_path).is_absolute():
310
+ f_path = Path(file_path).resolve()
311
+ else:
312
+ f_path = (Path.cwd() / file_path).resolve()
313
+
314
+ # Validate file path
315
+ _validate_path_access(f_path, allowed_paths_list)
316
+
317
+ if not f_path.exists():
318
+ result = {
319
+ "success": False,
320
+ "operation": "understand_file",
321
+ "error": f"File does not exist: {f_path}",
322
+ }
323
+ return ExecutionResult(
324
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
325
+ )
326
+
327
+ if not f_path.is_file():
328
+ result = {
329
+ "success": False,
330
+ "operation": "understand_file",
331
+ "error": f"Path is not a file: {f_path}",
332
+ }
333
+ return ExecutionResult(
334
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
335
+ )
336
+
337
+ # Get file size
338
+ file_size = f_path.stat().st_size
339
+
340
+ # Check if file is unsupported binary format
341
+ unsupported_binary_extensions = {
342
+ ".exe",
343
+ ".bin",
344
+ ".dll",
345
+ ".so",
346
+ ".dylib",
347
+ ".o",
348
+ ".a",
349
+ ".doc", # Old Word format (use .docx instead)
350
+ ".xls", # Old Excel format (use .xlsx instead, though we try to support it)
351
+ ".ppt", # Old PowerPoint format (use .pptx instead)
352
+ ".zip",
353
+ ".tar",
354
+ ".gz",
355
+ ".bz2",
356
+ ".7z",
357
+ ".rar",
358
+ ".jpg",
359
+ ".jpeg",
360
+ ".png",
361
+ ".gif",
362
+ ".bmp",
363
+ ".ico",
364
+ ".svg",
365
+ ".mp3",
366
+ ".wav",
367
+ ".ogg",
368
+ ".flac",
369
+ ".aac",
370
+ ".mp4",
371
+ ".avi",
372
+ ".mov",
373
+ ".mkv",
374
+ ".flv",
375
+ ".wmv",
376
+ ".pyc",
377
+ ".class",
378
+ ".jar",
379
+ }
380
+
381
+ file_extension = f_path.suffix.lower()
382
+
383
+ if file_extension in unsupported_binary_extensions:
384
+ result = {
385
+ "success": False,
386
+ "operation": "understand_file",
387
+ "error": f"Unsupported file format: {f_path.suffix}. " f"For images use understand_image, for videos use understand_video, for audio use generate_text_with_input_audio.",
388
+ }
389
+ return ExecutionResult(
390
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
391
+ )
392
+
393
+ # Extract content based on file type
394
+ file_content = ""
395
+ extraction_method = "text"
396
+
397
+ # PDF files
398
+ if file_extension == ".pdf":
399
+ extraction_method = "pdf"
400
+ file_content, error = _extract_text_from_pdf(f_path)
401
+ if error:
402
+ result = {
403
+ "success": False,
404
+ "operation": "understand_file",
405
+ "error": error,
406
+ }
407
+ return ExecutionResult(
408
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
409
+ )
410
+
411
+ # Word documents
412
+ elif file_extension == ".docx":
413
+ extraction_method = "docx"
414
+ file_content, error = _extract_text_from_docx(f_path)
415
+ if error:
416
+ result = {
417
+ "success": False,
418
+ "operation": "understand_file",
419
+ "error": error,
420
+ }
421
+ return ExecutionResult(
422
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
423
+ )
424
+
425
+ # Excel spreadsheets
426
+ elif file_extension in [".xlsx", ".xls"]:
427
+ extraction_method = "excel"
428
+ file_content, error = _extract_text_from_excel(f_path)
429
+ if error:
430
+ result = {
431
+ "success": False,
432
+ "operation": "understand_file",
433
+ "error": error,
434
+ }
435
+ return ExecutionResult(
436
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
437
+ )
438
+
439
+ # PowerPoint presentations
440
+ elif file_extension == ".pptx":
441
+ extraction_method = "pptx"
442
+ file_content, error = _extract_text_from_pptx(f_path)
443
+ if error:
444
+ result = {
445
+ "success": False,
446
+ "operation": "understand_file",
447
+ "error": error,
448
+ }
449
+ return ExecutionResult(
450
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
451
+ )
452
+
453
+ # Text-based files
454
+ else:
455
+ try:
456
+ with open(f_path, "r", encoding="utf-8") as file:
457
+ file_content = file.read(max_chars)
458
+
459
+ except UnicodeDecodeError:
460
+ # File is likely binary
461
+ result = {
462
+ "success": False,
463
+ "operation": "understand_file",
464
+ "error": f"File appears to be binary and cannot be read as text: {f_path}. Supported binary formats: PDF, DOCX, XLSX, PPTX",
465
+ }
466
+ return ExecutionResult(
467
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
468
+ )
469
+ except Exception as read_error:
470
+ result = {
471
+ "success": False,
472
+ "operation": "understand_file",
473
+ "error": f"Failed to read file: {str(read_error)}",
474
+ }
475
+ return ExecutionResult(
476
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
477
+ )
478
+
479
+ # Truncate if necessary (for text files)
480
+ chars_read = len(file_content)
481
+ truncated = False
482
+
483
+ if extraction_method == "text" and chars_read == max_chars and file_size > max_chars:
484
+ truncated = True
485
+ truncation_note = f"\n\n[Note: File was truncated. Read {chars_read} characters out of {file_size} bytes total. Increase max_chars parameter to read more.]"
486
+ file_content += truncation_note
487
+ elif chars_read > max_chars:
488
+ # Truncate extracted content from document formats
489
+ truncated = True
490
+ file_content = file_content[:max_chars]
491
+ truncation_note = f"\n\n[Note: Extracted content was truncated. Showing first {max_chars} characters. Increase max_chars parameter to read more.]"
492
+ file_content += truncation_note
493
+ chars_read = len(file_content)
494
+
495
+ # Build the full prompt with file content
496
+ full_prompt = f"{prompt}\n\nFile: {f_path.name}\nContent:\n```\n{file_content}\n```"
497
+
498
+ try:
499
+ # Call OpenAI API for file understanding
500
+ response = client.responses.create(
501
+ model=model,
502
+ input=[
503
+ {
504
+ "role": "user",
505
+ "content": [
506
+ {"type": "input_text", "text": full_prompt},
507
+ ],
508
+ },
509
+ ],
510
+ )
511
+
512
+ # Extract response text
513
+ response_text = response.output_text if hasattr(response, "output_text") else str(response.output)
514
+
515
+ result = {
516
+ "success": True,
517
+ "operation": "understand_file",
518
+ "file_path": str(f_path),
519
+ "file_name": f_path.name,
520
+ "file_type": extraction_method,
521
+ "file_size": file_size,
522
+ "chars_read": chars_read,
523
+ "truncated": truncated,
524
+ "prompt": prompt,
525
+ "model": model,
526
+ "response": response_text,
527
+ }
528
+ return ExecutionResult(
529
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
530
+ )
531
+
532
+ except Exception as api_error:
533
+ result = {
534
+ "success": False,
535
+ "operation": "understand_file",
536
+ "error": f"OpenAI API error: {str(api_error)}",
537
+ }
538
+ return ExecutionResult(
539
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
540
+ )
541
+
542
+ except Exception as e:
543
+ result = {
544
+ "success": False,
545
+ "operation": "understand_file",
546
+ "error": f"Failed to understand file: {str(e)}",
547
+ }
548
+ return ExecutionResult(
549
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
550
+ )