massgen 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +33 -7
  3. massgen/api_params_handler/_api_params_handler_base.py +3 -0
  4. massgen/backend/azure_openai.py +9 -1
  5. massgen/backend/base.py +4 -0
  6. massgen/backend/claude_code.py +9 -1
  7. massgen/backend/gemini.py +35 -6
  8. massgen/backend/gemini_utils.py +30 -0
  9. massgen/chat_agent.py +9 -3
  10. massgen/cli.py +291 -43
  11. massgen/config_builder.py +163 -18
  12. massgen/configs/README.md +52 -6
  13. massgen/configs/debug/restart_test_controlled.yaml +60 -0
  14. massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
  15. massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
  16. massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
  17. massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
  18. massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
  19. massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +67 -0
  20. massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +68 -0
  21. massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +98 -0
  22. massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
  23. massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
  24. massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
  25. massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
  26. massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +54 -0
  27. massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
  28. massgen/configs/tools/memory/README.md +199 -0
  29. massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +131 -0
  30. massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +133 -0
  31. massgen/configs/tools/memory/test_context_window_management.py +286 -0
  32. massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +97 -0
  33. massgen/docker/README.md +83 -0
  34. massgen/filesystem_manager/_code_execution_server.py +22 -7
  35. massgen/filesystem_manager/_docker_manager.py +21 -1
  36. massgen/filesystem_manager/_filesystem_manager.py +8 -0
  37. massgen/filesystem_manager/_workspace_tools_server.py +0 -997
  38. massgen/formatter/_gemini_formatter.py +73 -0
  39. massgen/frontend/coordination_ui.py +175 -257
  40. massgen/frontend/displays/base_display.py +29 -0
  41. massgen/frontend/displays/rich_terminal_display.py +155 -9
  42. massgen/frontend/displays/simple_display.py +21 -0
  43. massgen/frontend/displays/terminal_display.py +22 -2
  44. massgen/logger_config.py +50 -6
  45. massgen/message_templates.py +123 -3
  46. massgen/orchestrator.py +319 -38
  47. massgen/tests/test_code_execution.py +178 -0
  48. massgen/tests/test_orchestration_restart.py +204 -0
  49. massgen/tool/__init__.py +4 -0
  50. massgen/tool/_multimodal_tools/understand_audio.py +193 -0
  51. massgen/tool/_multimodal_tools/understand_file.py +550 -0
  52. massgen/tool/_multimodal_tools/understand_image.py +212 -0
  53. massgen/tool/_multimodal_tools/understand_video.py +313 -0
  54. massgen/tool/docs/multimodal_tools.md +779 -0
  55. massgen/tool/workflow_toolkits/__init__.py +26 -0
  56. massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
  57. massgen/utils.py +1 -0
  58. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/METADATA +8 -3
  59. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/RECORD +63 -36
  60. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/WHEEL +0 -0
  61. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/entry_points.txt +0 -0
  62. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/licenses/LICENSE +0 -0
  63. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,212 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Understand and analyze images using OpenAI's gpt-4.1 API.
4
+ """
5
+
6
+ import base64
7
+ import json
8
+ import os
9
+ from pathlib import Path
10
+ from typing import List, Optional
11
+
12
+ from dotenv import load_dotenv
13
+ from openai import OpenAI
14
+
15
+ from massgen.tool._result import ExecutionResult, TextContent
16
+
17
+
18
+ def _validate_path_access(path: Path, allowed_paths: Optional[List[Path]] = None) -> None:
19
+ """
20
+ Validate that a path is within allowed directories.
21
+
22
+ Args:
23
+ path: Path to validate
24
+ allowed_paths: List of allowed base paths (optional)
25
+
26
+ Raises:
27
+ ValueError: If path is not within allowed directories
28
+ """
29
+ if not allowed_paths:
30
+ return # No restrictions
31
+
32
+ for allowed_path in allowed_paths:
33
+ try:
34
+ path.relative_to(allowed_path)
35
+ return # Path is within this allowed directory
36
+ except ValueError:
37
+ continue
38
+
39
+ raise ValueError(f"Path not in allowed directories: {path}")
40
+
41
+
42
+ async def understand_image(
43
+ image_path: str,
44
+ prompt: str = "What's in this image? Please describe it in detail.",
45
+ model: str = "gpt-4.1",
46
+ allowed_paths: Optional[List[str]] = None,
47
+ ) -> ExecutionResult:
48
+ """
49
+ Understand and analyze an image using OpenAI's gpt-4.1 API.
50
+
51
+ This tool processes an image through OpenAI's gpt-4.1 API to extract insights,
52
+ descriptions, or answer questions about the image content.
53
+
54
+ Args:
55
+ image_path: Path to the image file (PNG/JPEG/JPG)
56
+ - Relative path: Resolved relative to workspace
57
+ - Absolute path: Must be within allowed directories
58
+ prompt: Question or instruction about the image (default: "What's in this image? Please describe it in detail.")
59
+ model: Model to use (default: "gpt-4.1")
60
+ allowed_paths: List of allowed base paths for validation (optional)
61
+
62
+ Returns:
63
+ ExecutionResult containing:
64
+ - success: Whether operation succeeded
65
+ - operation: "understand_image"
66
+ - image_path: Path to the analyzed image
67
+ - prompt: The prompt used
68
+ - model: Model used for analysis
69
+ - response: The model's understanding/description of the image
70
+
71
+ Examples:
72
+ understand_image("photo.jpg")
73
+ → Returns detailed description of the image
74
+
75
+ understand_image("chart.png", "What data is shown in this chart?")
76
+ → Returns analysis of the chart data
77
+
78
+ understand_image("screenshot.png", "What UI elements are visible in this screenshot?")
79
+ → Returns description of UI elements
80
+
81
+ Security:
82
+ - Requires valid OpenAI API key
83
+ - Image file must exist and be readable
84
+ - Only supports PNG, JPEG, and JPG formats
85
+ """
86
+ try:
87
+ # Convert allowed_paths from strings to Path objects
88
+ allowed_paths_list = [Path(p) for p in allowed_paths] if allowed_paths else None
89
+
90
+ # Load environment variables
91
+ script_dir = Path(__file__).parent.parent.parent.parent
92
+ env_path = script_dir / ".env"
93
+ if env_path.exists():
94
+ load_dotenv(env_path)
95
+ else:
96
+ load_dotenv()
97
+
98
+ openai_api_key = os.getenv("OPENAI_API_KEY")
99
+
100
+ if not openai_api_key:
101
+ result = {
102
+ "success": False,
103
+ "operation": "understand_image",
104
+ "error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
105
+ }
106
+ return ExecutionResult(
107
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
108
+ )
109
+
110
+ # Initialize OpenAI client
111
+ client = OpenAI(api_key=openai_api_key)
112
+
113
+ # Resolve image path
114
+ if Path(image_path).is_absolute():
115
+ img_path = Path(image_path).resolve()
116
+ else:
117
+ img_path = (Path.cwd() / image_path).resolve()
118
+
119
+ # Validate image path
120
+ _validate_path_access(img_path, allowed_paths_list)
121
+
122
+ if not img_path.exists():
123
+ result = {
124
+ "success": False,
125
+ "operation": "understand_image",
126
+ "error": f"Image file does not exist: {img_path}",
127
+ }
128
+ return ExecutionResult(
129
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
130
+ )
131
+
132
+ # Check file format
133
+ if img_path.suffix.lower() not in [".png", ".jpg", ".jpeg"]:
134
+ result = {
135
+ "success": False,
136
+ "operation": "understand_image",
137
+ "error": f"Image must be PNG, JPEG, or JPG format: {img_path}",
138
+ }
139
+ return ExecutionResult(
140
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
141
+ )
142
+
143
+ # Read and encode image to base64
144
+ try:
145
+ with open(img_path, "rb") as image_file:
146
+ image_data = image_file.read()
147
+ base64_image = base64.b64encode(image_data).decode("utf-8")
148
+ except Exception as read_error:
149
+ result = {
150
+ "success": False,
151
+ "operation": "understand_image",
152
+ "error": f"Failed to read image file: {str(read_error)}",
153
+ }
154
+ return ExecutionResult(
155
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
156
+ )
157
+
158
+ # Determine MIME type
159
+ mime_type = "image/jpeg" if img_path.suffix.lower() in [".jpg", ".jpeg"] else "image/png"
160
+
161
+ try:
162
+ # Call OpenAI API for image understanding
163
+ response = client.responses.create(
164
+ model=model,
165
+ input=[
166
+ {
167
+ "role": "user",
168
+ "content": [
169
+ {"type": "input_text", "text": prompt},
170
+ {
171
+ "type": "input_image",
172
+ "image_url": f"data:{mime_type};base64,{base64_image}",
173
+ },
174
+ ],
175
+ },
176
+ ],
177
+ )
178
+
179
+ # Extract response text
180
+ response_text = response.output_text if hasattr(response, "output_text") else str(response.output)
181
+
182
+ result = {
183
+ "success": True,
184
+ "operation": "understand_image",
185
+ "image_path": str(img_path),
186
+ "prompt": prompt,
187
+ "model": model,
188
+ "response": response_text,
189
+ }
190
+ return ExecutionResult(
191
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
192
+ )
193
+
194
+ except Exception as api_error:
195
+ result = {
196
+ "success": False,
197
+ "operation": "understand_image",
198
+ "error": f"OpenAI API error: {str(api_error)}",
199
+ }
200
+ return ExecutionResult(
201
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
202
+ )
203
+
204
+ except Exception as e:
205
+ result = {
206
+ "success": False,
207
+ "operation": "understand_image",
208
+ "error": f"Failed to understand image: {str(e)}",
209
+ }
210
+ return ExecutionResult(
211
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
212
+ )
@@ -0,0 +1,313 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Understand and analyze videos by extracting key frames and using OpenAI's gpt-4.1 API.
4
+ """
5
+
6
+ import base64
7
+ import json
8
+ import os
9
+ from pathlib import Path
10
+ from typing import List, Optional
11
+
12
+ from dotenv import load_dotenv
13
+ from openai import OpenAI
14
+
15
+ from massgen.tool._result import ExecutionResult, TextContent
16
+
17
+
18
+ def _validate_path_access(path: Path, allowed_paths: Optional[List[Path]] = None) -> None:
19
+ """
20
+ Validate that a path is within allowed directories.
21
+
22
+ Args:
23
+ path: Path to validate
24
+ allowed_paths: List of allowed base paths (optional)
25
+
26
+ Raises:
27
+ ValueError: If path is not within allowed directories
28
+ """
29
+ if not allowed_paths:
30
+ return # No restrictions
31
+
32
+ for allowed_path in allowed_paths:
33
+ try:
34
+ path.relative_to(allowed_path)
35
+ return # Path is within this allowed directory
36
+ except ValueError:
37
+ continue
38
+
39
+ raise ValueError(f"Path not in allowed directories: {path}")
40
+
41
+
42
+ def _extract_key_frames(video_path: Path, num_frames: int = 8) -> List[str]:
43
+ """
44
+ Extract key frames from a video file.
45
+
46
+ Args:
47
+ video_path: Path to the video file
48
+ num_frames: Number of key frames to extract
49
+
50
+ Returns:
51
+ List of base64-encoded frame images
52
+
53
+ Raises:
54
+ ImportError: If opencv-python is not installed
55
+ Exception: If frame extraction fails
56
+ """
57
+ try:
58
+ import cv2
59
+ except ImportError:
60
+ raise ImportError(
61
+ "opencv-python is required for video frame extraction. " "Please install it with: pip install opencv-python",
62
+ )
63
+
64
+ # Open the video file
65
+ video = cv2.VideoCapture(str(video_path))
66
+
67
+ if not video.isOpened():
68
+ raise Exception(f"Failed to open video file: {video_path}")
69
+
70
+ try:
71
+ # Get total number of frames
72
+ total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
73
+
74
+ if total_frames == 0:
75
+ raise Exception(f"Video file has no frames: {video_path}")
76
+
77
+ # Calculate frame indices to extract (evenly spaced)
78
+ frame_indices = []
79
+ if num_frames >= total_frames:
80
+ # If requesting more frames than available, use all frames
81
+ frame_indices = list(range(total_frames))
82
+ else:
83
+ # Extract evenly spaced frames
84
+ step = total_frames / num_frames
85
+ frame_indices = [int(i * step) for i in range(num_frames)]
86
+
87
+ # Extract frames
88
+ frames_base64 = []
89
+ for frame_idx in frame_indices:
90
+ # Set video position to the frame
91
+ video.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
92
+
93
+ # Read the frame
94
+ ret, frame = video.read()
95
+
96
+ if not ret:
97
+ continue
98
+
99
+ # Encode frame to JPEG
100
+ ret, buffer = cv2.imencode(".jpg", frame)
101
+
102
+ if not ret:
103
+ continue
104
+
105
+ # Convert to base64
106
+ frame_base64 = base64.b64encode(buffer).decode("utf-8")
107
+ frames_base64.append(frame_base64)
108
+
109
+ if not frames_base64:
110
+ raise Exception("Failed to extract any frames from video")
111
+
112
+ return frames_base64
113
+
114
+ finally:
115
+ # Release the video capture object
116
+ video.release()
117
+
118
+
119
+ async def understand_video(
120
+ video_path: str,
121
+ prompt: str = "What's happening in this video? Please describe the content, actions, and any important details you observe across these frames.",
122
+ num_frames: int = 8,
123
+ model: str = "gpt-4.1",
124
+ allowed_paths: Optional[List[str]] = None,
125
+ ) -> ExecutionResult:
126
+ """
127
+ Understand and analyze a video by extracting key frames and using OpenAI's gpt-4.1 API.
128
+
129
+ This tool extracts key frames from a video file and processes them through OpenAI's
130
+ gpt-4.1 API to provide insights, descriptions, or answer questions about the video content.
131
+
132
+ Args:
133
+ video_path: Path to the video file (MP4, AVI, MOV, etc.)
134
+ - Relative path: Resolved relative to workspace
135
+ - Absolute path: Must be within allowed directories
136
+ prompt: Question or instruction about the video (default: asks for general description)
137
+ num_frames: Number of key frames to extract from the video (default: 8)
138
+ - Higher values provide more detail but increase API costs
139
+ - Recommended range: 4-16 frames
140
+ model: Model to use (default: "gpt-4.1")
141
+ allowed_paths: List of allowed base paths for validation (optional)
142
+
143
+ Returns:
144
+ ExecutionResult containing:
145
+ - success: Whether operation succeeded
146
+ - operation: "understand_video"
147
+ - video_path: Path to the analyzed video
148
+ - num_frames_extracted: Number of frames extracted
149
+ - prompt: The prompt used
150
+ - model: Model used for analysis
151
+ - response: The model's understanding/description of the video
152
+
153
+ Examples:
154
+ understand_video("demo.mp4")
155
+ → Returns detailed description of the video content
156
+
157
+ understand_video("tutorial.mp4", "What steps are shown in this tutorial?")
158
+ → Returns analysis of tutorial steps
159
+
160
+ understand_video("meeting.mp4", "Summarize the key points discussed in this meeting", num_frames=12)
161
+ → Returns meeting summary based on 12 key frames
162
+
163
+ understand_video("sports.mp4", "What sport is being played and what are the key moments?")
164
+ → Returns sports analysis
165
+
166
+ Security:
167
+ - Requires valid OpenAI API key
168
+ - Requires opencv-python package for video processing
169
+ - Video file must exist and be readable
170
+ - Supports common video formats (MP4, AVI, MOV, MKV, etc.)
171
+
172
+ Note:
173
+ This tool extracts still frames from the video. Audio content is not analyzed.
174
+ For audio analysis, use the generate_text_with_input_audio tool.
175
+ """
176
+ try:
177
+ # Convert allowed_paths from strings to Path objects
178
+ allowed_paths_list = [Path(p) for p in allowed_paths] if allowed_paths else None
179
+
180
+ # Load environment variables
181
+ script_dir = Path(__file__).parent.parent.parent.parent
182
+ env_path = script_dir / ".env"
183
+ if env_path.exists():
184
+ load_dotenv(env_path)
185
+ else:
186
+ load_dotenv()
187
+
188
+ openai_api_key = os.getenv("OPENAI_API_KEY")
189
+
190
+ if not openai_api_key:
191
+ result = {
192
+ "success": False,
193
+ "operation": "understand_video",
194
+ "error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
195
+ }
196
+ return ExecutionResult(
197
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
198
+ )
199
+
200
+ # Initialize OpenAI client
201
+ client = OpenAI(api_key=openai_api_key)
202
+
203
+ # Resolve video path
204
+ if Path(video_path).is_absolute():
205
+ vid_path = Path(video_path).resolve()
206
+ else:
207
+ vid_path = (Path.cwd() / video_path).resolve()
208
+
209
+ # Validate video path
210
+ _validate_path_access(vid_path, allowed_paths_list)
211
+
212
+ if not vid_path.exists():
213
+ result = {
214
+ "success": False,
215
+ "operation": "understand_video",
216
+ "error": f"Video file does not exist: {vid_path}",
217
+ }
218
+ return ExecutionResult(
219
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
220
+ )
221
+
222
+ # Check if file is likely a video (by extension)
223
+ video_extensions = [".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".m4v", ".mpg", ".mpeg"]
224
+ if vid_path.suffix.lower() not in video_extensions:
225
+ result = {
226
+ "success": False,
227
+ "operation": "understand_video",
228
+ "error": f"File does not appear to be a video file: {vid_path}. Supported formats: {', '.join(video_extensions)}",
229
+ }
230
+ return ExecutionResult(
231
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
232
+ )
233
+
234
+ # Extract key frames from video
235
+ try:
236
+ frames_base64 = _extract_key_frames(vid_path, num_frames)
237
+ except ImportError as import_error:
238
+ result = {
239
+ "success": False,
240
+ "operation": "understand_video",
241
+ "error": str(import_error),
242
+ }
243
+ return ExecutionResult(
244
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
245
+ )
246
+ except Exception as extract_error:
247
+ result = {
248
+ "success": False,
249
+ "operation": "understand_video",
250
+ "error": f"Failed to extract frames from video: {str(extract_error)}",
251
+ }
252
+ return ExecutionResult(
253
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
254
+ )
255
+
256
+ # Build content array with prompt and all frames
257
+ content = [{"type": "input_text", "text": prompt}]
258
+
259
+ for frame_base64 in frames_base64:
260
+ content.append(
261
+ {
262
+ "type": "input_image",
263
+ "image_url": f"data:image/jpeg;base64,{frame_base64}",
264
+ },
265
+ )
266
+
267
+ try:
268
+ # Call OpenAI API for video understanding
269
+ response = client.responses.create(
270
+ model=model,
271
+ input=[
272
+ {
273
+ "role": "user",
274
+ "content": content,
275
+ },
276
+ ],
277
+ )
278
+
279
+ # Extract response text
280
+ response_text = response.output_text if hasattr(response, "output_text") else str(response.output)
281
+
282
+ result = {
283
+ "success": True,
284
+ "operation": "understand_video",
285
+ "video_path": str(vid_path),
286
+ "num_frames_extracted": len(frames_base64),
287
+ "prompt": prompt,
288
+ "model": model,
289
+ "response": response_text,
290
+ }
291
+ return ExecutionResult(
292
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
293
+ )
294
+
295
+ except Exception as api_error:
296
+ result = {
297
+ "success": False,
298
+ "operation": "understand_video",
299
+ "error": f"OpenAI API error: {str(api_error)}",
300
+ }
301
+ return ExecutionResult(
302
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
303
+ )
304
+
305
+ except Exception as e:
306
+ result = {
307
+ "success": False,
308
+ "operation": "understand_video",
309
+ "error": f"Failed to understand video: {str(e)}",
310
+ }
311
+ return ExecutionResult(
312
+ output_blocks=[TextContent(data=json.dumps(result, indent=2))],
313
+ )