massgen 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (82) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +33 -7
  3. massgen/api_params_handler/_api_params_handler_base.py +3 -0
  4. massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
  5. massgen/api_params_handler/_claude_api_params_handler.py +4 -0
  6. massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
  7. massgen/api_params_handler/_response_api_params_handler.py +4 -0
  8. massgen/backend/azure_openai.py +9 -1
  9. massgen/backend/base.py +4 -0
  10. massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
  11. massgen/backend/claude_code.py +9 -1
  12. massgen/backend/docs/permissions_and_context_files.md +2 -2
  13. massgen/backend/gemini.py +35 -6
  14. massgen/backend/gemini_utils.py +30 -0
  15. massgen/backend/response.py +2 -0
  16. massgen/chat_agent.py +9 -3
  17. massgen/cli.py +291 -43
  18. massgen/config_builder.py +163 -18
  19. massgen/configs/README.md +69 -14
  20. massgen/configs/debug/restart_test_controlled.yaml +60 -0
  21. massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
  22. massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
  23. massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
  24. massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
  25. massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
  26. massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
  27. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
  28. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
  29. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
  30. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
  31. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
  32. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
  33. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
  34. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
  35. massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
  36. massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
  37. massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
  38. massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
  39. massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
  40. massgen/docker/README.md +83 -0
  41. massgen/filesystem_manager/_code_execution_server.py +22 -7
  42. massgen/filesystem_manager/_docker_manager.py +21 -1
  43. massgen/filesystem_manager/_filesystem_manager.py +9 -0
  44. massgen/filesystem_manager/_path_permission_manager.py +148 -0
  45. massgen/filesystem_manager/_workspace_tools_server.py +0 -997
  46. massgen/formatter/_gemini_formatter.py +73 -0
  47. massgen/frontend/coordination_ui.py +175 -257
  48. massgen/frontend/displays/base_display.py +29 -0
  49. massgen/frontend/displays/rich_terminal_display.py +155 -9
  50. massgen/frontend/displays/simple_display.py +21 -0
  51. massgen/frontend/displays/terminal_display.py +22 -2
  52. massgen/logger_config.py +50 -6
  53. massgen/message_templates.py +283 -15
  54. massgen/orchestrator.py +335 -38
  55. massgen/tests/test_binary_file_blocking.py +274 -0
  56. massgen/tests/test_case_studies.md +12 -12
  57. massgen/tests/test_code_execution.py +178 -0
  58. massgen/tests/test_multimodal_size_limits.py +407 -0
  59. massgen/tests/test_orchestration_restart.py +204 -0
  60. massgen/tool/__init__.py +4 -0
  61. massgen/tool/_manager.py +7 -2
  62. massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
  63. massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
  64. massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
  65. massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
  66. massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
  67. massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
  68. massgen/tool/_multimodal_tools/understand_audio.py +211 -0
  69. massgen/tool/_multimodal_tools/understand_file.py +555 -0
  70. massgen/tool/_multimodal_tools/understand_image.py +316 -0
  71. massgen/tool/_multimodal_tools/understand_video.py +340 -0
  72. massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
  73. massgen/tool/docs/multimodal_tools.md +1368 -0
  74. massgen/tool/workflow_toolkits/__init__.py +26 -0
  75. massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
  76. massgen/utils.py +1 -0
  77. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/METADATA +101 -69
  78. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/RECORD +82 -46
  79. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/WHEEL +0 -0
  80. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/entry_points.txt +0 -0
  81. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/licenses/LICENSE +0 -0
  82. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,226 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Generate audio from text using OpenAI's gpt-4o-audio-preview model and store it in the workspace.
4
+ """
5
+
6
+ import base64
7
+ import json
8
+ import os
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import List, Optional
12
+
13
+ from dotenv import load_dotenv
14
+ from openai import OpenAI
15
+
16
+ from massgen.tool._result import ExecutionResult, TextContent
17
+
18
+
19
+ def _validate_path_access(path: Path, allowed_paths: Optional[List[Path]] = None) -> None:
20
+ """
21
+ Validate that a path is within allowed directories.
22
+
23
+ Args:
24
+ path: Path to validate
25
+ allowed_paths: List of allowed base paths (optional)
26
+ agent_cwd: Agent\'s current working directory (automatically injected)
27
+
28
+ Raises:
29
+ ValueError: If path is not within allowed directories
30
+ """
31
+ if not allowed_paths:
32
+ return # No restrictions
33
+
34
+ for allowed_path in allowed_paths:
35
+ try:
36
+ path.relative_to(allowed_path)
37
+ return # Path is within this allowed directory
38
+ except ValueError:
39
+ continue
40
+
41
+ raise ValueError(f"Path not in allowed directories: {path}")
42
+
43
+
44
+ async def text_to_speech_continue_generation(
45
+ prompt: str,
46
+ model: str = "gpt-4o-audio-preview",
47
+ voice: str = "alloy",
48
+ audio_format: str = "wav",
49
+ storage_path: Optional[str] = None,
50
+ allowed_paths: Optional[List[str]] = None,
51
+ agent_cwd: Optional[str] = None,
52
+ ) -> ExecutionResult:
53
+ """
54
+ Generate audio from text using OpenAI's gpt-4o-audio-preview model and store it in the workspace.
55
+
56
+ This tool generates audio speech from text prompts using OpenAI's audio generation API
57
+ and saves the audio files to the workspace with automatic organization.
58
+
59
+ Args:
60
+ prompt: Text content to convert to audio speech
61
+ model: Model to use for generation (default: "gpt-4o-audio-preview")
62
+ voice: Voice to use for audio generation (default: "alloy")
63
+ Options: "alloy", "echo", "fable", "onyx", "nova", "shimmer"
64
+ audio_format: Audio format for output (default: "wav")
65
+ Options: "wav", "mp3", "opus", "aac", "flac"
66
+ storage_path: Directory path where to save the audio (optional)
67
+ - **IMPORTANT**: Must be a DIRECTORY path only, NOT a file path (e.g., "audio/generated" NOT "audio/output.wav")
68
+ - The filename is automatically generated from the prompt and timestamp
69
+ - Relative path: Resolved relative to agent's workspace (e.g., "audio/generated")
70
+ - Absolute path: Must be within allowed directories
71
+ - None/empty: Saves to agent's workspace root
72
+ allowed_paths: List of allowed base paths for validation (optional)
73
+ agent_cwd: Agent\'s current working directory (automatically injected)
74
+
75
+ Returns:
76
+ ExecutionResult containing:
77
+ - success: Whether operation succeeded
78
+ - operation: "generate_and_store_audio_no_input_audios"
79
+ - audio_file: Generated audio file with path and metadata
80
+ - model: Model used for generation
81
+ - prompt: The prompt used for generation
82
+ - voice: Voice used for generation
83
+ - format: Audio format used
84
+
85
+ Examples:
86
+ generate_and_store_audio_no_input_audios("Is a golden retriever a good family dog?")
87
+ → Generates and saves to: 20240115_143022_audio.wav
88
+
89
+ generate_and_store_audio_no_input_audios("Hello world", voice="nova", audio_format="mp3")
90
+ → Generates with nova voice and saves as: 20240115_143022_audio.mp3
91
+
92
+ Security:
93
+ - Requires valid OpenAI API key (automatically detected from .env or environment)
94
+ - Files are saved to specified path within workspace
95
+ - Path must be within allowed directories
96
+ """
97
+ try:
98
+ # Convert allowed_paths from strings to Path objects
99
+ allowed_paths_list = [Path(p) for p in allowed_paths] if allowed_paths else None
100
+
101
+ # Use agent_cwd if available, otherwise fall back to base_dir
102
+ base_dir = Path(agent_cwd) if agent_cwd else Path.cwd()
103
+
104
+ # Load environment variables
105
+ script_dir = Path(__file__).parent.parent.parent.parent
106
+ env_path = script_dir / ".env"
107
+ if env_path.exists():
108
+ load_dotenv(env_path)
109
+ else:
110
+ load_dotenv()
111
+
112
+ openai_api_key = os.getenv("OPENAI_API_KEY")
113
+
114
+ if not openai_api_key:
115
+ result = {
116
+ "success": False,
117
+ "operation": "generate_and_store_audio_no_input_audios",
118
+ "error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
119
+ }
120
+ return ExecutionResult(
121
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
122
+ )
123
+
124
+ # Initialize OpenAI client
125
+ client = OpenAI(api_key=openai_api_key)
126
+
127
+ # Determine storage directory
128
+ if storage_path:
129
+ if Path(storage_path).is_absolute():
130
+ storage_dir = Path(storage_path).resolve()
131
+ else:
132
+ storage_dir = (base_dir / storage_path).resolve()
133
+ else:
134
+ storage_dir = base_dir
135
+
136
+ # Validate storage directory is within allowed paths
137
+ _validate_path_access(storage_dir, allowed_paths_list)
138
+
139
+ # Create directory if it doesn't exist
140
+ storage_dir.mkdir(parents=True, exist_ok=True)
141
+
142
+ try:
143
+ # Generate audio using OpenAI API
144
+ completion = client.chat.completions.create(
145
+ model=model,
146
+ modalities=["text", "audio"],
147
+ audio={"voice": voice, "format": audio_format},
148
+ messages=[
149
+ {
150
+ "role": "user",
151
+ "content": prompt,
152
+ },
153
+ ],
154
+ )
155
+
156
+ # Check if audio data is available
157
+ if not completion.choices[0].message.audio or not completion.choices[0].message.audio.data:
158
+ result = {
159
+ "success": False,
160
+ "operation": "generate_and_store_audio_no_input_audios",
161
+ "error": "No audio data received from API",
162
+ }
163
+ return ExecutionResult(
164
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
165
+ )
166
+
167
+ # Decode audio data from base64
168
+ audio_bytes = base64.b64decode(completion.choices[0].message.audio.data)
169
+
170
+ # Generate filename with timestamp
171
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
172
+
173
+ # Clean prompt for filename (first 30 chars)
174
+ clean_prompt = "".join(c for c in prompt[:30] if c.isalnum() or c in (" ", "-", "_")).strip()
175
+ clean_prompt = clean_prompt.replace(" ", "_")
176
+
177
+ filename = f"{timestamp}_{clean_prompt}.{audio_format}"
178
+
179
+ # Full file path
180
+ file_path = storage_dir / filename
181
+
182
+ # Write audio to file
183
+ file_path.write_bytes(audio_bytes)
184
+ file_size = len(audio_bytes)
185
+
186
+ # Get text response if available
187
+ text_response = completion.choices[0].message.content if completion.choices[0].message.content else None
188
+
189
+ result = {
190
+ "success": True,
191
+ "operation": "generate_and_store_audio_no_input_audios",
192
+ "audio_file": {
193
+ "file_path": str(file_path),
194
+ "filename": filename,
195
+ "size": file_size,
196
+ "format": audio_format,
197
+ },
198
+ "model": model,
199
+ "prompt": prompt,
200
+ "voice": voice,
201
+ "format": audio_format,
202
+ "text_response": text_response,
203
+ }
204
+ return ExecutionResult(
205
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
206
+ )
207
+
208
+ except Exception as api_error:
209
+ result = {
210
+ "success": False,
211
+ "operation": "generate_and_store_audio_no_input_audios",
212
+ "error": f"OpenAI API error: {str(api_error)}",
213
+ }
214
+ return ExecutionResult(
215
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
216
+ )
217
+
218
+ except Exception as e:
219
+ result = {
220
+ "success": False,
221
+ "operation": "generate_and_store_audio_no_input_audios",
222
+ "error": f"Failed to generate or save audio: {str(e)}",
223
+ }
224
+ return ExecutionResult(
225
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
226
+ )
@@ -0,0 +1,217 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Convert text (transcription) directly to speech using OpenAI's TTS API with streaming response.
4
+ """
5
+
6
+ import json
7
+ import os
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import List, Optional
11
+
12
+ from dotenv import load_dotenv
13
+ from openai import OpenAI
14
+
15
+ from massgen.tool._result import ExecutionResult, TextContent
16
+
17
+
18
+ def _validate_path_access(path: Path, allowed_paths: Optional[List[Path]] = None) -> None:
19
+ """
20
+ Validate that a path is within allowed directories.
21
+
22
+ Args:
23
+ path: Path to validate
24
+ allowed_paths: List of allowed base paths (optional)
25
+ agent_cwd: Agent\'s current working directory (automatically injected)
26
+
27
+ Raises:
28
+ ValueError: If path is not within allowed directories
29
+ """
30
+ if not allowed_paths:
31
+ return # No restrictions
32
+
33
+ for allowed_path in allowed_paths:
34
+ try:
35
+ path.relative_to(allowed_path)
36
+ return # Path is within this allowed directory
37
+ except ValueError:
38
+ continue
39
+
40
+ raise ValueError(f"Path not in allowed directories: {path}")
41
+
42
+
43
+ async def text_to_speech_transcription_generation(
44
+ input_text: str,
45
+ model: str = "gpt-4o-mini-tts",
46
+ voice: str = "alloy",
47
+ instructions: Optional[str] = None,
48
+ storage_path: Optional[str] = None,
49
+ audio_format: str = "mp3",
50
+ allowed_paths: Optional[List[str]] = None,
51
+ agent_cwd: Optional[str] = None,
52
+ ) -> ExecutionResult:
53
+ """
54
+ Convert text (transcription) directly to speech using OpenAI's TTS API with streaming response.
55
+
56
+ This tool converts text directly to speech audio using OpenAI's Text-to-Speech API,
57
+ designed specifically for converting transcriptions or any text content to spoken audio.
58
+ Uses streaming response for efficient file handling.
59
+
60
+ Args:
61
+ input_text: The text content to convert to speech (e.g., transcription text)
62
+ model: TTS model to use (default: "gpt-4o-mini-tts")
63
+ Options: "gpt-4o-mini-tts", "tts-1", "tts-1-hd"
64
+ voice: Voice to use for speech synthesis (default: "alloy")
65
+ Options: "alloy", "echo", "fable", "onyx", "nova", "shimmer", "coral", "sage"
66
+ instructions: Optional speaking instructions for tone and style (e.g., "Speak in a cheerful tone")
67
+ storage_path: Directory path where to save the audio file (optional)
68
+ - **IMPORTANT**: Must be a DIRECTORY path only, NOT a file path (e.g., "audio/speech" NOT "audio/speech.mp3")
69
+ - The filename is automatically generated from the text content and timestamp
70
+ - Relative path: Resolved relative to agent's workspace (e.g., "audio/speech")
71
+ - Absolute path: Must be within allowed directories
72
+ - None/empty: Saves to agent's workspace root
73
+ audio_format: Output audio format (default: "mp3")
74
+ Options: "mp3", "opus", "aac", "flac", "wav", "pcm"
75
+ allowed_paths: List of allowed base paths for validation (optional)
76
+ agent_cwd: Agent\'s current working directory (automatically injected)
77
+
78
+ Returns:
79
+ ExecutionResult containing:
80
+ - success: Whether operation succeeded
81
+ - operation: "convert_text_to_speech"
82
+ - audio_file: Generated audio file with path and metadata
83
+ - model: TTS model used
84
+ - voice: Voice used
85
+ - format: Audio format used
86
+ - text_length: Length of input text
87
+ - instructions: Speaking instructions if provided
88
+
89
+ Examples:
90
+ convert_text_to_speech("Hello world, this is a test.")
91
+ → Converts text to speech and saves as MP3
92
+
93
+ convert_text_to_speech(
94
+ "Today is a wonderful day to build something people love!",
95
+ voice="coral",
96
+ instructions="Speak in a cheerful and positive tone."
97
+ )
98
+ → Converts with specific voice and speaking instructions
99
+
100
+ Security:
101
+ - Requires valid OpenAI API key
102
+ - Files are saved to specified path within workspace
103
+ - Path must be within allowed directories
104
+ """
105
+ try:
106
+ # Convert allowed_paths from strings to Path objects
107
+ allowed_paths_list = [Path(p) for p in allowed_paths] if allowed_paths else None
108
+
109
+ # Use agent_cwd if available, otherwise fall back to base_dir
110
+ base_dir = Path(agent_cwd) if agent_cwd else Path.cwd()
111
+
112
+ # Load environment variables
113
+ script_dir = Path(__file__).parent.parent.parent.parent
114
+ env_path = script_dir / ".env"
115
+ if env_path.exists():
116
+ load_dotenv(env_path)
117
+ else:
118
+ load_dotenv()
119
+
120
+ openai_api_key = os.getenv("OPENAI_API_KEY")
121
+
122
+ if not openai_api_key:
123
+ result = {
124
+ "success": False,
125
+ "operation": "convert_text_to_speech",
126
+ "error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
127
+ }
128
+ return ExecutionResult(
129
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
130
+ )
131
+
132
+ # Initialize OpenAI client
133
+ client = OpenAI(api_key=openai_api_key)
134
+
135
+ # Determine storage directory
136
+ if storage_path:
137
+ if Path(storage_path).is_absolute():
138
+ storage_dir = Path(storage_path).resolve()
139
+ else:
140
+ storage_dir = (base_dir / storage_path).resolve()
141
+ else:
142
+ storage_dir = base_dir
143
+
144
+ # Validate storage directory is within allowed paths
145
+ _validate_path_access(storage_dir, allowed_paths_list)
146
+
147
+ # Create directory if it doesn't exist
148
+ storage_dir.mkdir(parents=True, exist_ok=True)
149
+
150
+ # Generate filename with timestamp
151
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
152
+
153
+ # Clean text for filename (first 30 chars)
154
+ clean_text = "".join(c for c in input_text[:30] if c.isalnum() or c in (" ", "-", "_")).strip()
155
+ clean_text = clean_text.replace(" ", "_")
156
+
157
+ filename = f"speech_{timestamp}_{clean_text}.{audio_format}"
158
+ file_path = storage_dir / filename
159
+
160
+ try:
161
+ # Prepare request parameters
162
+ request_params = {
163
+ "model": model,
164
+ "voice": voice,
165
+ "input": input_text,
166
+ }
167
+
168
+ # Add instructions if provided (only for models that support it)
169
+ if instructions and model in ["gpt-4o-mini-tts"]:
170
+ request_params["instructions"] = instructions
171
+
172
+ # Use streaming response for efficient file handling
173
+ with client.audio.speech.with_streaming_response.create(**request_params) as response:
174
+ # Stream directly to file
175
+ response.stream_to_file(file_path)
176
+
177
+ # Get file size
178
+ file_size = file_path.stat().st_size
179
+
180
+ result = {
181
+ "success": True,
182
+ "operation": "convert_text_to_speech",
183
+ "audio_file": {
184
+ "file_path": str(file_path),
185
+ "filename": filename,
186
+ "size": file_size,
187
+ "format": audio_format,
188
+ },
189
+ "model": model,
190
+ "voice": voice,
191
+ "format": audio_format,
192
+ "text_length": len(input_text),
193
+ "instructions": instructions if instructions else None,
194
+ }
195
+ return ExecutionResult(
196
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
197
+ )
198
+
199
+ except Exception as api_error:
200
+ result = {
201
+ "success": False,
202
+ "operation": "convert_text_to_speech",
203
+ "error": f"OpenAI TTS API error: {str(api_error)}",
204
+ }
205
+ return ExecutionResult(
206
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
207
+ )
208
+
209
+ except Exception as e:
210
+ result = {
211
+ "success": False,
212
+ "operation": "convert_text_to_speech",
213
+ "error": f"Failed to convert text to speech: {str(e)}",
214
+ }
215
+ return ExecutionResult(
216
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
217
+ )
@@ -0,0 +1,223 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Generate a video from a text prompt using OpenAI's Sora-2 API.
4
+ """
5
+
6
+ import json
7
+ import os
8
+ import time
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import List, Optional
12
+
13
+ from dotenv import load_dotenv
14
+ from openai import OpenAI
15
+
16
+ from massgen.tool._result import ExecutionResult, TextContent
17
+
18
+
19
+ def _validate_path_access(path: Path, allowed_paths: Optional[List[Path]] = None) -> None:
20
+ """
21
+ Validate that a path is within allowed directories.
22
+
23
+ Args:
24
+ path: Path to validate
25
+ allowed_paths: List of allowed base paths (optional)
26
+ agent_cwd: Agent\'s current working directory (automatically injected)
27
+
28
+ Raises:
29
+ ValueError: If path is not within allowed directories
30
+ """
31
+ if not allowed_paths:
32
+ return # No restrictions
33
+
34
+ for allowed_path in allowed_paths:
35
+ try:
36
+ path.relative_to(allowed_path)
37
+ return # Path is within this allowed directory
38
+ except ValueError:
39
+ continue
40
+
41
+ raise ValueError(f"Path not in allowed directories: {path}")
42
+
43
+
44
+ async def text_to_video_generation(
45
+ prompt: str,
46
+ model: str = "sora-2",
47
+ seconds: int = 4,
48
+ storage_path: Optional[str] = None,
49
+ allowed_paths: Optional[List[str]] = None,
50
+ agent_cwd: Optional[str] = None,
51
+ ) -> ExecutionResult:
52
+ """
53
+ Generate a video from a text prompt using OpenAI's Sora-2 API.
54
+
55
+ This tool generates a video based on a text prompt using OpenAI's Sora-2 API
56
+ and saves it to the workspace with automatic organization.
57
+
58
+ Args:
59
+ prompt: Text description for the video to generate
60
+ model: Model to use (default: "sora-2")
61
+ seconds: Video duration in seconds (default: 4)
62
+ storage_path: Directory path where to save the video (optional)
63
+ - **IMPORTANT**: Must be a DIRECTORY path only, NOT a file path (e.g., "videos/generated" NOT "videos/output.mp4")
64
+ - The filename is automatically generated from the prompt and timestamp
65
+ - Relative path: Resolved relative to agent's workspace (e.g., "videos/generated")
66
+ - Absolute path: Must be within allowed directories
67
+ - None/empty: Saves to agent's workspace root
68
+ allowed_paths: List of allowed base paths for validation (optional)
69
+ agent_cwd: Agent\'s current working directory (automatically injected)
70
+
71
+ Returns:
72
+ ExecutionResult containing:
73
+ - success: Whether operation succeeded
74
+ - operation: "generate_and_store_video_no_input_images"
75
+ - video_path: Path to the saved video file
76
+ - model: Model used for generation
77
+ - prompt: The prompt used
78
+ - duration: Time taken for generation in seconds
79
+
80
+ Examples:
81
+ generate_and_store_video_no_input_images("A cool cat on a motorcycle in the night")
82
+ → Generates a video and saves to workspace root
83
+
84
+ generate_and_store_video_no_input_images("Dancing robot", storage_path="videos/")
85
+ → Generates a video and saves to videos/ directory
86
+
87
+ Security:
88
+ - Requires valid OpenAI API key with Sora-2 access
89
+ - Files are saved to specified path within workspace
90
+ """
91
+ try:
92
+ # Convert allowed_paths from strings to Path objects
93
+ allowed_paths_list = [Path(p) for p in allowed_paths] if allowed_paths else None
94
+
95
+ # Use agent_cwd if available, otherwise fall back to base_dir
96
+ base_dir = Path(agent_cwd) if agent_cwd else Path.cwd()
97
+
98
+ # Load environment variables
99
+ script_dir = Path(__file__).parent.parent.parent.parent
100
+ env_path = script_dir / ".env"
101
+ if env_path.exists():
102
+ load_dotenv(env_path)
103
+ else:
104
+ load_dotenv()
105
+
106
+ openai_api_key = os.getenv("OPENAI_API_KEY")
107
+
108
+ if not openai_api_key:
109
+ result = {
110
+ "success": False,
111
+ "operation": "generate_and_store_video_no_input_images",
112
+ "error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
113
+ }
114
+ return ExecutionResult(
115
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
116
+ )
117
+
118
+ # Initialize OpenAI client
119
+ client = OpenAI(api_key=openai_api_key)
120
+
121
+ # Determine storage directory
122
+ if storage_path:
123
+ if Path(storage_path).is_absolute():
124
+ storage_dir = Path(storage_path).resolve()
125
+ else:
126
+ storage_dir = (base_dir / storage_path).resolve()
127
+ else:
128
+ storage_dir = base_dir
129
+
130
+ # Validate storage directory is within allowed paths
131
+ _validate_path_access(storage_dir, allowed_paths_list)
132
+
133
+ # Create directory if it doesn't exist
134
+ storage_dir.mkdir(parents=True, exist_ok=True)
135
+
136
+ try:
137
+ start_time = time.time()
138
+
139
+ # Start video generation (no print statements to avoid MCP JSON parsing issues)
140
+ video = client.videos.create(
141
+ model=model,
142
+ prompt=prompt,
143
+ seconds=str(seconds),
144
+ )
145
+
146
+ getattr(video, "progress", 0)
147
+
148
+ # Monitor progress (silently, no stdout writes)
149
+ while video.status in ("in_progress", "queued"):
150
+ # Refresh status
151
+ video = client.videos.retrieve(video.id)
152
+ getattr(video, "progress", 0)
153
+ time.sleep(2)
154
+
155
+ if video.status == "failed":
156
+ message = getattr(
157
+ getattr(video, "error", None),
158
+ "message",
159
+ "Video generation failed",
160
+ )
161
+ result = {
162
+ "success": False,
163
+ "operation": "generate_and_store_video_no_input_images",
164
+ "error": message,
165
+ }
166
+ return ExecutionResult(
167
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
168
+ )
169
+
170
+ # Download video content
171
+ content = client.videos.download_content(video.id, variant="video")
172
+
173
+ # Generate filename with timestamp
174
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
175
+ clean_prompt = "".join(c for c in prompt[:30] if c.isalnum() or c in (" ", "-", "_")).strip()
176
+ clean_prompt = clean_prompt.replace(" ", "_")
177
+ filename = f"{timestamp}_{clean_prompt}.mp4"
178
+
179
+ # Full file path
180
+ file_path = storage_dir / filename
181
+
182
+ # Write video to file
183
+ content.write_to_file(str(file_path))
184
+
185
+ # Calculate duration
186
+ duration = time.time() - start_time
187
+
188
+ # Get file size
189
+ file_size = file_path.stat().st_size
190
+
191
+ result = {
192
+ "success": True,
193
+ "operation": "generate_and_store_video_no_input_images",
194
+ "video_path": str(file_path),
195
+ "filename": filename,
196
+ "size": file_size,
197
+ "model": model,
198
+ "prompt": prompt,
199
+ "duration": duration,
200
+ }
201
+ return ExecutionResult(
202
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
203
+ )
204
+
205
+ except Exception as api_error:
206
+ result = {
207
+ "success": False,
208
+ "operation": "generate_and_store_video_no_input_images",
209
+ "error": f"OpenAI API error: {str(api_error)}",
210
+ }
211
+ return ExecutionResult(
212
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
213
+ )
214
+
215
+ except Exception as e:
216
+ result = {
217
+ "success": False,
218
+ "operation": "generate_and_store_video_no_input_images",
219
+ "error": f"Failed to generate or save video: {str(e)}",
220
+ }
221
+ return ExecutionResult(
222
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
223
+ )