massgen 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (82) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +33 -7
  3. massgen/api_params_handler/_api_params_handler_base.py +3 -0
  4. massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
  5. massgen/api_params_handler/_claude_api_params_handler.py +4 -0
  6. massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
  7. massgen/api_params_handler/_response_api_params_handler.py +4 -0
  8. massgen/backend/azure_openai.py +9 -1
  9. massgen/backend/base.py +4 -0
  10. massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
  11. massgen/backend/claude_code.py +9 -1
  12. massgen/backend/docs/permissions_and_context_files.md +2 -2
  13. massgen/backend/gemini.py +35 -6
  14. massgen/backend/gemini_utils.py +30 -0
  15. massgen/backend/response.py +2 -0
  16. massgen/chat_agent.py +9 -3
  17. massgen/cli.py +291 -43
  18. massgen/config_builder.py +163 -18
  19. massgen/configs/README.md +69 -14
  20. massgen/configs/debug/restart_test_controlled.yaml +60 -0
  21. massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
  22. massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
  23. massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
  24. massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
  25. massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
  26. massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
  27. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
  28. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
  29. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
  30. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
  31. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
  32. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
  33. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
  34. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
  35. massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
  36. massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
  37. massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
  38. massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
  39. massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
  40. massgen/docker/README.md +83 -0
  41. massgen/filesystem_manager/_code_execution_server.py +22 -7
  42. massgen/filesystem_manager/_docker_manager.py +21 -1
  43. massgen/filesystem_manager/_filesystem_manager.py +9 -0
  44. massgen/filesystem_manager/_path_permission_manager.py +148 -0
  45. massgen/filesystem_manager/_workspace_tools_server.py +0 -997
  46. massgen/formatter/_gemini_formatter.py +73 -0
  47. massgen/frontend/coordination_ui.py +175 -257
  48. massgen/frontend/displays/base_display.py +29 -0
  49. massgen/frontend/displays/rich_terminal_display.py +155 -9
  50. massgen/frontend/displays/simple_display.py +21 -0
  51. massgen/frontend/displays/terminal_display.py +22 -2
  52. massgen/logger_config.py +50 -6
  53. massgen/message_templates.py +283 -15
  54. massgen/orchestrator.py +335 -38
  55. massgen/tests/test_binary_file_blocking.py +274 -0
  56. massgen/tests/test_case_studies.md +12 -12
  57. massgen/tests/test_code_execution.py +178 -0
  58. massgen/tests/test_multimodal_size_limits.py +407 -0
  59. massgen/tests/test_orchestration_restart.py +204 -0
  60. massgen/tool/__init__.py +4 -0
  61. massgen/tool/_manager.py +7 -2
  62. massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
  63. massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
  64. massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
  65. massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
  66. massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
  67. massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
  68. massgen/tool/_multimodal_tools/understand_audio.py +211 -0
  69. massgen/tool/_multimodal_tools/understand_file.py +555 -0
  70. massgen/tool/_multimodal_tools/understand_image.py +316 -0
  71. massgen/tool/_multimodal_tools/understand_video.py +340 -0
  72. massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
  73. massgen/tool/docs/multimodal_tools.md +1368 -0
  74. massgen/tool/workflow_toolkits/__init__.py +26 -0
  75. massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
  76. massgen/utils.py +1 -0
  77. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/METADATA +101 -69
  78. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/RECORD +82 -46
  79. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/WHEEL +0 -0
  80. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/entry_points.txt +0 -0
  81. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/licenses/LICENSE +0 -0
  82. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,211 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Transcribe audio file(s) to text using OpenAI's Transcription API.
4
+ """
5
+
6
+ import json
7
+ import os
8
+ from pathlib import Path
9
+ from typing import List, Optional
10
+
11
+ from dotenv import load_dotenv
12
+ from openai import OpenAI
13
+
14
+ from massgen.tool._result import ExecutionResult, TextContent
15
+
16
+
17
+ def _validate_path_access(path: Path, allowed_paths: Optional[List[Path]] = None) -> None:
18
+ """
19
+ Validate that a path is within allowed directories.
20
+
21
+ Args:
22
+ path: Path to validate
23
+ allowed_paths: List of allowed base paths (optional)
24
+
25
+ Raises:
26
+ ValueError: If path is not within allowed directories
27
+ """
28
+ if not allowed_paths:
29
+ return # No restrictions
30
+
31
+ for allowed_path in allowed_paths:
32
+ try:
33
+ path.relative_to(allowed_path)
34
+ return # Path is within this allowed directory
35
+ except ValueError:
36
+ continue
37
+
38
+ raise ValueError(f"Path not in allowed directories: {path}")
39
+
40
+
41
+ async def understand_audio(
42
+ audio_paths: List[str],
43
+ model: str = "gpt-4o-transcribe",
44
+ allowed_paths: Optional[List[str]] = None,
45
+ agent_cwd: Optional[str] = None,
46
+ ) -> ExecutionResult:
47
+ """
48
+ Transcribe audio file(s) to text using OpenAI's Transcription API.
49
+
50
+ This tool processes one or more audio files through OpenAI's Transcription API
51
+ to extract the text content from the audio. Each file is processed separately.
52
+
53
+ Args:
54
+ audio_paths: List of paths to input audio files (WAV, MP3, M4A, etc.)
55
+ - Relative path: Resolved relative to workspace
56
+ - Absolute path: Must be within allowed directories
57
+ model: Model to use (default: "gpt-4o-transcribe")
58
+ allowed_paths: List of allowed base paths for validation (optional)
59
+ agent_cwd: Current working directory of the agent (optional)
60
+
61
+ Returns:
62
+ ExecutionResult containing:
63
+ - success: Whether operation succeeded
64
+ - operation: "generate_text_with_input_audio"
65
+ - transcriptions: List of transcription results for each file
66
+ - audio_files: List of paths to the input audio files
67
+ - model: Model used
68
+
69
+ Examples:
70
+ generate_text_with_input_audio(["recording.wav"])
71
+ → Returns transcription for recording.wav
72
+
73
+ generate_text_with_input_audio(["interview1.mp3", "interview2.mp3"])
74
+ → Returns separate transcriptions for each file
75
+
76
+ Security:
77
+ - Requires valid OpenAI API key
78
+ - All input audio files must exist and be readable
79
+ """
80
+ try:
81
+ # Convert allowed_paths from strings to Path objects
82
+ allowed_paths_list = [Path(p) for p in allowed_paths] if allowed_paths else None
83
+
84
+ # Load environment variables
85
+ script_dir = Path(__file__).parent.parent.parent.parent
86
+ env_path = script_dir / ".env"
87
+ if env_path.exists():
88
+ load_dotenv(env_path)
89
+ else:
90
+ load_dotenv()
91
+
92
+ openai_api_key = os.getenv("OPENAI_API_KEY")
93
+
94
+ if not openai_api_key:
95
+ result = {
96
+ "success": False,
97
+ "operation": "generate_text_with_input_audio",
98
+ "error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
99
+ }
100
+ return ExecutionResult(
101
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
102
+ )
103
+
104
+ # Initialize OpenAI client
105
+ client = OpenAI(api_key=openai_api_key)
106
+
107
+ # Validate and process input audio files
108
+ validated_audio_paths = []
109
+ audio_extensions = [".wav", ".mp3", ".m4a", ".mp4", ".ogg", ".flac", ".aac", ".wma", ".opus"]
110
+
111
+ for audio_path_str in audio_paths:
112
+ # Resolve audio path
113
+ # Use agent_cwd if available, otherwise fall back to Path.cwd()
114
+ base_dir = Path(agent_cwd) if agent_cwd else Path.cwd()
115
+
116
+ if Path(audio_path_str).is_absolute():
117
+ audio_path = Path(audio_path_str).resolve()
118
+ else:
119
+ audio_path = (base_dir / audio_path_str).resolve()
120
+
121
+ # Validate audio path
122
+ _validate_path_access(audio_path, allowed_paths_list)
123
+
124
+ if not audio_path.exists():
125
+ result = {
126
+ "success": False,
127
+ "operation": "generate_text_with_input_audio",
128
+ "error": f"Audio file does not exist: {audio_path}",
129
+ }
130
+ return ExecutionResult(
131
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
132
+ )
133
+
134
+ # Check if file is an audio file
135
+ if audio_path.suffix.lower() not in audio_extensions:
136
+ result = {
137
+ "success": False,
138
+ "operation": "generate_text_with_input_audio",
139
+ "error": f"File does not appear to be an audio file: {audio_path}",
140
+ }
141
+ return ExecutionResult(
142
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
143
+ )
144
+
145
+ # Check file size (OpenAI Whisper API has 25MB limit)
146
+ file_size = audio_path.stat().st_size
147
+ max_size = 25 * 1024 * 1024 # 25MB
148
+ if file_size > max_size:
149
+ result = {
150
+ "success": False,
151
+ "operation": "generate_text_with_input_audio",
152
+ "error": f"Audio file too large: {audio_path} ({file_size/1024/1024:.1f}MB > 25MB). " "Please use a smaller file or compress the audio.",
153
+ }
154
+ return ExecutionResult(
155
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
156
+ )
157
+
158
+ validated_audio_paths.append(audio_path)
159
+
160
+ # Process each audio file separately using OpenAI Transcription API
161
+ transcriptions = []
162
+
163
+ for audio_path in validated_audio_paths:
164
+ try:
165
+ # Open audio file
166
+ with open(audio_path, "rb") as audio_file:
167
+ # Basic transcription without prompt
168
+ transcription = client.audio.transcriptions.create(
169
+ model=model,
170
+ file=audio_file,
171
+ response_format="text",
172
+ )
173
+
174
+ # Add transcription to list
175
+ transcriptions.append(
176
+ {
177
+ "file": str(audio_path),
178
+ "transcription": transcription,
179
+ },
180
+ )
181
+
182
+ except Exception as api_error:
183
+ result = {
184
+ "success": False,
185
+ "operation": "generate_text_with_input_audio",
186
+ "error": f"Transcription API error for file {audio_path}: {str(api_error)}",
187
+ }
188
+ return ExecutionResult(
189
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
190
+ )
191
+
192
+ result = {
193
+ "success": True,
194
+ "operation": "generate_text_with_input_audio",
195
+ "transcriptions": transcriptions,
196
+ "audio_files": [str(p) for p in validated_audio_paths],
197
+ "model": model,
198
+ }
199
+ return ExecutionResult(
200
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
201
+ )
202
+
203
+ except Exception as e:
204
+ result = {
205
+ "success": False,
206
+ "operation": "generate_text_with_input_audio",
207
+ "error": f"Failed to transcribe audio: {str(e)}",
208
+ }
209
+ return ExecutionResult(
210
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
211
+ )