massgen 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of massgen might be problematic. Click here for more details.
- massgen/__init__.py +1 -1
- massgen/agent_config.py +33 -7
- massgen/api_params_handler/_api_params_handler_base.py +3 -0
- massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
- massgen/api_params_handler/_claude_api_params_handler.py +4 -0
- massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
- massgen/api_params_handler/_response_api_params_handler.py +4 -0
- massgen/backend/azure_openai.py +9 -1
- massgen/backend/base.py +4 -0
- massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
- massgen/backend/claude_code.py +9 -1
- massgen/backend/docs/permissions_and_context_files.md +2 -2
- massgen/backend/gemini.py +35 -6
- massgen/backend/gemini_utils.py +30 -0
- massgen/backend/response.py +2 -0
- massgen/chat_agent.py +9 -3
- massgen/cli.py +291 -43
- massgen/config_builder.py +163 -18
- massgen/configs/README.md +69 -14
- massgen/configs/debug/restart_test_controlled.yaml +60 -0
- massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
- massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
- massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
- massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
- massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
- massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
- massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
- massgen/docker/README.md +83 -0
- massgen/filesystem_manager/_code_execution_server.py +22 -7
- massgen/filesystem_manager/_docker_manager.py +21 -1
- massgen/filesystem_manager/_filesystem_manager.py +9 -0
- massgen/filesystem_manager/_path_permission_manager.py +148 -0
- massgen/filesystem_manager/_workspace_tools_server.py +0 -997
- massgen/formatter/_gemini_formatter.py +73 -0
- massgen/frontend/coordination_ui.py +175 -257
- massgen/frontend/displays/base_display.py +29 -0
- massgen/frontend/displays/rich_terminal_display.py +155 -9
- massgen/frontend/displays/simple_display.py +21 -0
- massgen/frontend/displays/terminal_display.py +22 -2
- massgen/logger_config.py +50 -6
- massgen/message_templates.py +283 -15
- massgen/orchestrator.py +335 -38
- massgen/tests/test_binary_file_blocking.py +274 -0
- massgen/tests/test_case_studies.md +12 -12
- massgen/tests/test_code_execution.py +178 -0
- massgen/tests/test_multimodal_size_limits.py +407 -0
- massgen/tests/test_orchestration_restart.py +204 -0
- massgen/tool/__init__.py +4 -0
- massgen/tool/_manager.py +7 -2
- massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
- massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
- massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
- massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
- massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
- massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
- massgen/tool/_multimodal_tools/understand_audio.py +211 -0
- massgen/tool/_multimodal_tools/understand_file.py +555 -0
- massgen/tool/_multimodal_tools/understand_image.py +316 -0
- massgen/tool/_multimodal_tools/understand_video.py +340 -0
- massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
- massgen/tool/docs/multimodal_tools.md +1368 -0
- massgen/tool/workflow_toolkits/__init__.py +26 -0
- massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
- massgen/utils.py +1 -0
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/METADATA +101 -69
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/RECORD +82 -46
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/WHEEL +0 -0
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/entry_points.txt +0 -0
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Transcribe audio file(s) to text using OpenAI's Transcription API.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import List, Optional
|
|
10
|
+
|
|
11
|
+
from dotenv import load_dotenv
|
|
12
|
+
from openai import OpenAI
|
|
13
|
+
|
|
14
|
+
from massgen.tool._result import ExecutionResult, TextContent
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _validate_path_access(path: Path, allowed_paths: Optional[List[Path]] = None) -> None:
|
|
18
|
+
"""
|
|
19
|
+
Validate that a path is within allowed directories.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
path: Path to validate
|
|
23
|
+
allowed_paths: List of allowed base paths (optional)
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
ValueError: If path is not within allowed directories
|
|
27
|
+
"""
|
|
28
|
+
if not allowed_paths:
|
|
29
|
+
return # No restrictions
|
|
30
|
+
|
|
31
|
+
for allowed_path in allowed_paths:
|
|
32
|
+
try:
|
|
33
|
+
path.relative_to(allowed_path)
|
|
34
|
+
return # Path is within this allowed directory
|
|
35
|
+
except ValueError:
|
|
36
|
+
continue
|
|
37
|
+
|
|
38
|
+
raise ValueError(f"Path not in allowed directories: {path}")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
async def understand_audio(
|
|
42
|
+
audio_paths: List[str],
|
|
43
|
+
model: str = "gpt-4o-transcribe",
|
|
44
|
+
allowed_paths: Optional[List[str]] = None,
|
|
45
|
+
agent_cwd: Optional[str] = None,
|
|
46
|
+
) -> ExecutionResult:
|
|
47
|
+
"""
|
|
48
|
+
Transcribe audio file(s) to text using OpenAI's Transcription API.
|
|
49
|
+
|
|
50
|
+
This tool processes one or more audio files through OpenAI's Transcription API
|
|
51
|
+
to extract the text content from the audio. Each file is processed separately.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
audio_paths: List of paths to input audio files (WAV, MP3, M4A, etc.)
|
|
55
|
+
- Relative path: Resolved relative to workspace
|
|
56
|
+
- Absolute path: Must be within allowed directories
|
|
57
|
+
model: Model to use (default: "gpt-4o-transcribe")
|
|
58
|
+
allowed_paths: List of allowed base paths for validation (optional)
|
|
59
|
+
agent_cwd: Current working directory of the agent (optional)
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
ExecutionResult containing:
|
|
63
|
+
- success: Whether operation succeeded
|
|
64
|
+
- operation: "generate_text_with_input_audio"
|
|
65
|
+
- transcriptions: List of transcription results for each file
|
|
66
|
+
- audio_files: List of paths to the input audio files
|
|
67
|
+
- model: Model used
|
|
68
|
+
|
|
69
|
+
Examples:
|
|
70
|
+
generate_text_with_input_audio(["recording.wav"])
|
|
71
|
+
→ Returns transcription for recording.wav
|
|
72
|
+
|
|
73
|
+
generate_text_with_input_audio(["interview1.mp3", "interview2.mp3"])
|
|
74
|
+
→ Returns separate transcriptions for each file
|
|
75
|
+
|
|
76
|
+
Security:
|
|
77
|
+
- Requires valid OpenAI API key
|
|
78
|
+
- All input audio files must exist and be readable
|
|
79
|
+
"""
|
|
80
|
+
try:
|
|
81
|
+
# Convert allowed_paths from strings to Path objects
|
|
82
|
+
allowed_paths_list = [Path(p) for p in allowed_paths] if allowed_paths else None
|
|
83
|
+
|
|
84
|
+
# Load environment variables
|
|
85
|
+
script_dir = Path(__file__).parent.parent.parent.parent
|
|
86
|
+
env_path = script_dir / ".env"
|
|
87
|
+
if env_path.exists():
|
|
88
|
+
load_dotenv(env_path)
|
|
89
|
+
else:
|
|
90
|
+
load_dotenv()
|
|
91
|
+
|
|
92
|
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
93
|
+
|
|
94
|
+
if not openai_api_key:
|
|
95
|
+
result = {
|
|
96
|
+
"success": False,
|
|
97
|
+
"operation": "generate_text_with_input_audio",
|
|
98
|
+
"error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
|
|
99
|
+
}
|
|
100
|
+
return ExecutionResult(
|
|
101
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Initialize OpenAI client
|
|
105
|
+
client = OpenAI(api_key=openai_api_key)
|
|
106
|
+
|
|
107
|
+
# Validate and process input audio files
|
|
108
|
+
validated_audio_paths = []
|
|
109
|
+
audio_extensions = [".wav", ".mp3", ".m4a", ".mp4", ".ogg", ".flac", ".aac", ".wma", ".opus"]
|
|
110
|
+
|
|
111
|
+
for audio_path_str in audio_paths:
|
|
112
|
+
# Resolve audio path
|
|
113
|
+
# Use agent_cwd if available, otherwise fall back to Path.cwd()
|
|
114
|
+
base_dir = Path(agent_cwd) if agent_cwd else Path.cwd()
|
|
115
|
+
|
|
116
|
+
if Path(audio_path_str).is_absolute():
|
|
117
|
+
audio_path = Path(audio_path_str).resolve()
|
|
118
|
+
else:
|
|
119
|
+
audio_path = (base_dir / audio_path_str).resolve()
|
|
120
|
+
|
|
121
|
+
# Validate audio path
|
|
122
|
+
_validate_path_access(audio_path, allowed_paths_list)
|
|
123
|
+
|
|
124
|
+
if not audio_path.exists():
|
|
125
|
+
result = {
|
|
126
|
+
"success": False,
|
|
127
|
+
"operation": "generate_text_with_input_audio",
|
|
128
|
+
"error": f"Audio file does not exist: {audio_path}",
|
|
129
|
+
}
|
|
130
|
+
return ExecutionResult(
|
|
131
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Check if file is an audio file
|
|
135
|
+
if audio_path.suffix.lower() not in audio_extensions:
|
|
136
|
+
result = {
|
|
137
|
+
"success": False,
|
|
138
|
+
"operation": "generate_text_with_input_audio",
|
|
139
|
+
"error": f"File does not appear to be an audio file: {audio_path}",
|
|
140
|
+
}
|
|
141
|
+
return ExecutionResult(
|
|
142
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# Check file size (OpenAI Whisper API has 25MB limit)
|
|
146
|
+
file_size = audio_path.stat().st_size
|
|
147
|
+
max_size = 25 * 1024 * 1024 # 25MB
|
|
148
|
+
if file_size > max_size:
|
|
149
|
+
result = {
|
|
150
|
+
"success": False,
|
|
151
|
+
"operation": "generate_text_with_input_audio",
|
|
152
|
+
"error": f"Audio file too large: {audio_path} ({file_size/1024/1024:.1f}MB > 25MB). " "Please use a smaller file or compress the audio.",
|
|
153
|
+
}
|
|
154
|
+
return ExecutionResult(
|
|
155
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
validated_audio_paths.append(audio_path)
|
|
159
|
+
|
|
160
|
+
# Process each audio file separately using OpenAI Transcription API
|
|
161
|
+
transcriptions = []
|
|
162
|
+
|
|
163
|
+
for audio_path in validated_audio_paths:
|
|
164
|
+
try:
|
|
165
|
+
# Open audio file
|
|
166
|
+
with open(audio_path, "rb") as audio_file:
|
|
167
|
+
# Basic transcription without prompt
|
|
168
|
+
transcription = client.audio.transcriptions.create(
|
|
169
|
+
model=model,
|
|
170
|
+
file=audio_file,
|
|
171
|
+
response_format="text",
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Add transcription to list
|
|
175
|
+
transcriptions.append(
|
|
176
|
+
{
|
|
177
|
+
"file": str(audio_path),
|
|
178
|
+
"transcription": transcription,
|
|
179
|
+
},
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
except Exception as api_error:
|
|
183
|
+
result = {
|
|
184
|
+
"success": False,
|
|
185
|
+
"operation": "generate_text_with_input_audio",
|
|
186
|
+
"error": f"Transcription API error for file {audio_path}: {str(api_error)}",
|
|
187
|
+
}
|
|
188
|
+
return ExecutionResult(
|
|
189
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
result = {
|
|
193
|
+
"success": True,
|
|
194
|
+
"operation": "generate_text_with_input_audio",
|
|
195
|
+
"transcriptions": transcriptions,
|
|
196
|
+
"audio_files": [str(p) for p in validated_audio_paths],
|
|
197
|
+
"model": model,
|
|
198
|
+
}
|
|
199
|
+
return ExecutionResult(
|
|
200
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
except Exception as e:
|
|
204
|
+
result = {
|
|
205
|
+
"success": False,
|
|
206
|
+
"operation": "generate_text_with_input_audio",
|
|
207
|
+
"error": f"Failed to transcribe audio: {str(e)}",
|
|
208
|
+
}
|
|
209
|
+
return ExecutionResult(
|
|
210
|
+
output_blocks=[TextContent(data=json.dumps(result, indent=2))],
|
|
211
|
+
)
|