PyPI - massgen - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

massgen 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of massgen might be problematic. Click here for more details.

Files changed (82) hide show

massgen/__init__.py +1 -1
massgen/agent_config.py +33 -7
massgen/api_params_handler/_api_params_handler_base.py +3 -0
massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
massgen/api_params_handler/_claude_api_params_handler.py +4 -0
massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
massgen/api_params_handler/_response_api_params_handler.py +4 -0
massgen/backend/azure_openai.py +9 -1
massgen/backend/base.py +4 -0
massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
massgen/backend/claude_code.py +9 -1
massgen/backend/docs/permissions_and_context_files.md +2 -2
massgen/backend/gemini.py +35 -6
massgen/backend/gemini_utils.py +30 -0
massgen/backend/response.py +2 -0
massgen/chat_agent.py +9 -3
massgen/cli.py +291 -43
massgen/config_builder.py +163 -18
massgen/configs/README.md +69 -14
massgen/configs/debug/restart_test_controlled.yaml +60 -0
massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
massgen/docker/README.md +83 -0
massgen/filesystem_manager/_code_execution_server.py +22 -7
massgen/filesystem_manager/_docker_manager.py +21 -1
massgen/filesystem_manager/_filesystem_manager.py +9 -0
massgen/filesystem_manager/_path_permission_manager.py +148 -0
massgen/filesystem_manager/_workspace_tools_server.py +0 -997
massgen/formatter/_gemini_formatter.py +73 -0
massgen/frontend/coordination_ui.py +175 -257
massgen/frontend/displays/base_display.py +29 -0
massgen/frontend/displays/rich_terminal_display.py +155 -9
massgen/frontend/displays/simple_display.py +21 -0
massgen/frontend/displays/terminal_display.py +22 -2
massgen/logger_config.py +50 -6
massgen/message_templates.py +283 -15
massgen/orchestrator.py +335 -38
massgen/tests/test_binary_file_blocking.py +274 -0
massgen/tests/test_case_studies.md +12 -12
massgen/tests/test_code_execution.py +178 -0
massgen/tests/test_multimodal_size_limits.py +407 -0
massgen/tests/test_orchestration_restart.py +204 -0
massgen/tool/__init__.py +4 -0
massgen/tool/_manager.py +7 -2
massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
massgen/tool/_multimodal_tools/understand_audio.py +211 -0
massgen/tool/_multimodal_tools/understand_file.py +555 -0
massgen/tool/_multimodal_tools/understand_image.py +316 -0
massgen/tool/_multimodal_tools/understand_video.py +340 -0
massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
massgen/tool/docs/multimodal_tools.md +1368 -0
massgen/tool/workflow_toolkits/__init__.py +26 -0
massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
massgen/utils.py +1 -0
{massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/METADATA +101 -69
{massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/RECORD +82 -46
{massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/WHEEL +0 -0
{massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/entry_points.txt +0 -0
{massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/licenses/LICENSE +0 -0
{massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/top_level.txt +0 -0

massgen/tool/_multimodal_tools/understand_image.py ADDED Viewed

@@ -0,0 +1,316 @@
+# -*- coding: utf-8 -*-
+"""
+Understand and analyze images using OpenAI's gpt-4.1 API.
+"""
+import base64
+import json
+import os
+from pathlib import Path
+from typing import List, Optional
+from dotenv import load_dotenv
+from openai import OpenAI
+from massgen.logger_config import logger
+from massgen.tool._result import ExecutionResult, TextContent
+def _validate_path_access(path: Path, allowed_paths: Optional[List[Path]] = None) -> None:
+    """
+    Validate that a path is within allowed directories.
+    Args:
+        path: Path to validate
+        allowed_paths: List of allowed base paths (optional)
+    Raises:
+        ValueError: If path is not within allowed directories
+    """
+    if not allowed_paths:
+        return  # No restrictions
+    for allowed_path in allowed_paths:
+        try:
+            path.relative_to(allowed_path)
+            return  # Path is within this allowed directory
+        except ValueError:
+            continue
+    raise ValueError(f"Path not in allowed directories: {path}")
+async def understand_image(
+    image_path: str,
+    prompt: str = "What's in this image? Please describe it in detail.",
+    model: str = "gpt-4.1",
+    allowed_paths: Optional[List[str]] = None,
+    agent_cwd: Optional[str] = None,
+) -> ExecutionResult:
+    """
+    Understand and analyze an image using OpenAI's gpt-4.1 API.
+    This tool processes an image through OpenAI's gpt-4.1 API to extract insights,
+    descriptions, or answer questions about the image content.
+    Args:
+        image_path: Path to the image file (PNG/JPEG/JPG)
+                   - Relative path: Resolved relative to workspace
+                   - Absolute path: Must be within allowed directories
+        prompt: Question or instruction about the image (default: "What's in this image? Please describe it in detail.")
+        model: Model to use (default: "gpt-4.1")
+        allowed_paths: List of allowed base paths for validation (optional)
+        agent_cwd: Agent's current working directory (automatically injected)
+    Returns:
+        ExecutionResult containing:
+        - success: Whether operation succeeded
+        - operation: "understand_image"
+        - image_path: Path to the analyzed image
+        - prompt: The prompt used
+        - model: Model used for analysis
+        - response: The model's understanding/description of the image
+    Examples:
+        understand_image("photo.jpg")
+        → Returns detailed description of the image
+        understand_image("chart.png", "What data is shown in this chart?")
+        → Returns analysis of the chart data
+        understand_image("screenshot.png", "What UI elements are visible in this screenshot?")
+        → Returns description of UI elements
+    Security:
+        - Requires valid OpenAI API key
+        - Image file must exist and be readable
+        - Only supports PNG, JPEG, and JPG formats
+    """
+    try:
+        # Convert allowed_paths from strings to Path objects
+        allowed_paths_list = [Path(p) for p in allowed_paths] if allowed_paths else None
+        # Load environment variables
+        script_dir = Path(__file__).parent.parent.parent.parent
+        env_path = script_dir / ".env"
+        if env_path.exists():
+            load_dotenv(env_path)
+        else:
+            load_dotenv()
+        openai_api_key = os.getenv("OPENAI_API_KEY")
+        if not openai_api_key:
+            result = {
+                "success": False,
+                "operation": "understand_image",
+                "error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
+            }
+            return ExecutionResult(
+                output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+            )
+        # Initialize OpenAI client
+        client = OpenAI(api_key=openai_api_key)
+        # Resolve image path
+        # Use agent_cwd if available, otherwise fall back to Path.cwd()
+        base_dir = Path(agent_cwd) if agent_cwd else Path.cwd()
+        if Path(image_path).is_absolute():
+            img_path = Path(image_path).resolve()
+        else:
+            img_path = (base_dir / image_path).resolve()
+        # Validate image path
+        _validate_path_access(img_path, allowed_paths_list)
+        if not img_path.exists():
+            result = {
+                "success": False,
+                "operation": "understand_image",
+                "error": f"Image file does not exist: {img_path}",
+            }
+            return ExecutionResult(
+                output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+            )
+        # Check file format
+        if img_path.suffix.lower() not in [".png", ".jpg", ".jpeg"]:
+            result = {
+                "success": False,
+                "operation": "understand_image",
+                "error": f"Image must be PNG, JPEG, or JPG format: {img_path}",
+            }
+            return ExecutionResult(
+                output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+            )
+        # Read image and check size and dimensions
+        try:
+            # OpenAI Vision API limits:
+            # - Up to 20MB per image
+            # - High-resolution: 768px (short side) x 2000px (long side)
+            file_size = img_path.stat().st_size
+            max_size = 18 * 1024 * 1024  # 18MB (conservative buffer under OpenAI's 20MB limit)
+            max_short_side = 768  # Maximum pixels for short side
+            max_long_side = 2000  # Maximum pixels for long side
+            # Try to import PIL for dimension/size checking
+            try:
+                import io
+                from PIL import Image
+            except ImportError:
+                # PIL not available - fall back to simple file reading
+                # This will work for small images but may fail for large ones
+                if file_size > max_size:
+                    result = {
+                        "success": False,
+                        "operation": "understand_image",
+                        "error": f"Image too large ({file_size/1024/1024:.1f}MB > {max_size/1024/1024:.0f}MB) and PIL not available for resizing. Install with: pip install pillow",
+                    }
+                    return ExecutionResult(
+                        output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+                    )
+                # Read without resizing
+                with open(img_path, "rb") as image_file:
+                    image_data = image_file.read()
+                base64_image = base64.b64encode(image_data).decode("utf-8")
+                mime_type = "image/jpeg" if img_path.suffix.lower() in [".jpg", ".jpeg"] else "image/png"
+                logger.info(f"Read image without dimension check (PIL not available): {img_path.name} ({file_size/1024/1024:.1f}MB)")
+            else:
+                # PIL available - check both file size and dimensions
+                img = Image.open(img_path)
+                img.size
+                original_width, original_height = img.size
+                # Determine short and long sides
+                short_side = min(original_width, original_height)
+                long_side = max(original_width, original_height)
+                # Check if we need to resize
+                needs_resize = False
+                resize_reason = []
+                if file_size > max_size:
+                    needs_resize = True
+                    resize_reason.append(f"file size {file_size/1024/1024:.1f}MB > {max_size/1024/1024:.0f}MB")
+                if short_side > max_short_side or long_side > max_long_side:
+                    needs_resize = True
+                    resize_reason.append(f"dimensions {original_width}x{original_height} exceed {max_short_side}x{max_long_side}")
+                if needs_resize:
+                    # Calculate scale factor based on both size and dimensions
+                    scale_factors = []
+                    # Scale for file size (if needed)
+                    if file_size > max_size:
+                        # Estimate: reduce dimensions by sqrt of size ratio
+                        size_scale = (max_size / file_size) ** 0.5 * 0.8  # 0.8 for safety margin
+                        scale_factors.append(size_scale)
+                    # Scale for dimensions (if needed)
+                    if short_side > max_short_side or long_side > max_long_side:
+                        # Calculate scale needed to fit within dimension constraints
+                        short_scale = max_short_side / short_side if short_side > max_short_side else 1.0
+                        long_scale = max_long_side / long_side if long_side > max_long_side else 1.0
+                        dimension_scale = min(short_scale, long_scale) * 0.95  # 0.95 for safety margin
+                        scale_factors.append(dimension_scale)
+                    # Use the most restrictive scale factor
+                    scale_factor = min(scale_factors)
+                    new_width = int(original_width * scale_factor)
+                    new_height = int(original_height * scale_factor)
+                    # Resize image
+                    img_resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
+                    # Convert to bytes
+                    img_byte_arr = io.BytesIO()
+                    # Save as JPEG for better compression
+                    img_resized.convert("RGB").save(img_byte_arr, format="JPEG", quality=85, optimize=True)
+                    image_data = img_byte_arr.getvalue()
+                    base64_image = base64.b64encode(image_data).decode("utf-8")
+                    mime_type = "image/jpeg"
+                    logger.info(
+                        f"Resized image ({', '.join(resize_reason)}): "
+                        f"{original_width}x{original_height} ({file_size/1024/1024:.1f}MB) -> "
+                        f"{new_width}x{new_height} ({len(image_data)/1024/1024:.1f}MB)",
+                    )
+                else:
+                    # No resize needed - read normally
+                    with open(img_path, "rb") as image_file:
+                        image_data = image_file.read()
+                    base64_image = base64.b64encode(image_data).decode("utf-8")
+                    # Determine MIME type
+                    mime_type = "image/jpeg" if img_path.suffix.lower() in [".jpg", ".jpeg"] else "image/png"
+                    logger.info(f"Image within limits: {original_width}x{original_height} ({file_size/1024/1024:.1f}MB)")
+        except Exception as read_error:
+            result = {
+                "success": False,
+                "operation": "understand_image",
+                "error": f"Failed to read image file: {str(read_error)}",
+            }
+            return ExecutionResult(
+                output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+            )
+        try:
+            # Call OpenAI API for image understanding
+            response = client.responses.create(
+                model=model,
+                input=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "input_text", "text": prompt},
+                            {
+                                "type": "input_image",
+                                "image_url": f"data:{mime_type};base64,{base64_image}",
+                            },
+                        ],
+                    },
+                ],
+            )
+            # Extract response text
+            response_text = response.output_text if hasattr(response, "output_text") else str(response.output)
+            result = {
+                "success": True,
+                "operation": "understand_image",
+                "image_path": str(img_path),
+                "prompt": prompt,
+                "model": model,
+                "response": response_text,
+            }
+            return ExecutionResult(
+                output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+            )
+        except Exception as api_error:
+            result = {
+                "success": False,
+                "operation": "understand_image",
+                "error": f"OpenAI API error: {str(api_error)}",
+            }
+            return ExecutionResult(
+                output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+            )
+    except Exception as e:
+        result = {
+            "success": False,
+            "operation": "understand_image",
+            "error": f"Failed to understand image: {str(e)}",
+        }
+        return ExecutionResult(
+            output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+        )

massgen/tool/_multimodal_tools/understand_video.py ADDED Viewed

@@ -0,0 +1,340 @@
+# -*- coding: utf-8 -*-
+"""
+Understand and analyze videos by extracting key frames and using OpenAI's gpt-4.1 API.
+"""
+import base64
+import json
+import os
+from pathlib import Path
+from typing import List, Optional
+from dotenv import load_dotenv
+from openai import OpenAI
+from massgen.tool._result import ExecutionResult, TextContent
+def _validate_path_access(path: Path, allowed_paths: Optional[List[Path]] = None) -> None:
+    """
+    Validate that a path is within allowed directories.
+    Args:
+        path: Path to validate
+        allowed_paths: List of allowed base paths (optional)
+    Raises:
+        ValueError: If path is not within allowed directories
+    """
+    if not allowed_paths:
+        return  # No restrictions
+    for allowed_path in allowed_paths:
+        try:
+            path.relative_to(allowed_path)
+            return  # Path is within this allowed directory
+        except ValueError:
+            continue
+    raise ValueError(f"Path not in allowed directories: {path}")
+def _extract_key_frames(video_path: Path, num_frames: int = 8) -> List[str]:
+    """
+    Extract key frames from a video file and resize them to fit OpenAI Vision API limits.
+    Args:
+        video_path: Path to the video file
+        num_frames: Number of key frames to extract
+    Returns:
+        List of base64-encoded frame images (resized to fit 768px x 2000px limits)
+    Raises:
+        ImportError: If opencv-python is not installed
+        Exception: If frame extraction fails
+    """
+    try:
+        import cv2
+    except ImportError:
+        raise ImportError(
+            "opencv-python is required for video frame extraction. " "Please install it with: pip install opencv-python",
+        )
+    # OpenAI Vision API limits for images (same as understand_image)
+    max_short_side = 768  # Maximum pixels for short side
+    max_long_side = 2000  # Maximum pixels for long side
+    # Open the video file
+    video = cv2.VideoCapture(str(video_path))
+    if not video.isOpened():
+        raise Exception(f"Failed to open video file: {video_path}")
+    try:
+        # Get total number of frames
+        total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+        if total_frames == 0:
+            raise Exception(f"Video file has no frames: {video_path}")
+        # Calculate frame indices to extract (evenly spaced)
+        frame_indices = []
+        if num_frames >= total_frames:
+            # If requesting more frames than available, use all frames
+            frame_indices = list(range(total_frames))
+        else:
+            # Extract evenly spaced frames
+            step = total_frames / num_frames
+            frame_indices = [int(i * step) for i in range(num_frames)]
+        # Extract frames
+        frames_base64 = []
+        for frame_idx in frame_indices:
+            # Set video position to the frame
+            video.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
+            # Read the frame
+            ret, frame = video.read()
+            if not ret:
+                continue
+            # Check and resize frame if needed to fit OpenAI Vision API limits
+            height, width = frame.shape[:2]
+            short_side = min(width, height)
+            long_side = max(width, height)
+            if short_side > max_short_side or long_side > max_long_side:
+                # Calculate scale factor to fit within dimension constraints
+                short_scale = max_short_side / short_side if short_side > max_short_side else 1.0
+                long_scale = max_long_side / long_side if long_side > max_long_side else 1.0
+                scale_factor = min(short_scale, long_scale) * 0.95  # 0.95 for safety margin
+                new_width = int(width * scale_factor)
+                new_height = int(height * scale_factor)
+                # Resize frame using LANCZOS (high quality)
+                frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LANCZOS4)
+            # Encode frame to JPEG with quality=85 (same as understand_image)
+            encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 85]
+            ret, buffer = cv2.imencode(".jpg", frame, encode_param)
+            if not ret:
+                continue
+            # Convert to base64
+            frame_base64 = base64.b64encode(buffer).decode("utf-8")
+            frames_base64.append(frame_base64)
+        if not frames_base64:
+            raise Exception("Failed to extract any frames from video")
+        return frames_base64
+    finally:
+        # Release the video capture object
+        video.release()
+async def understand_video(
+    video_path: str,
+    prompt: str = "What's happening in this video? Please describe the content, actions, and any important details you observe across these frames.",
+    num_frames: int = 8,
+    model: str = "gpt-4.1",
+    allowed_paths: Optional[List[str]] = None,
+    agent_cwd: Optional[str] = None,
+) -> ExecutionResult:
+    """
+    Understand and analyze a video by extracting key frames and using OpenAI's gpt-4.1 API.
+    This tool extracts key frames from a video file and processes them through OpenAI's
+    gpt-4.1 API to provide insights, descriptions, or answer questions about the video content.
+    Args:
+        video_path: Path to the video file (MP4, AVI, MOV, etc.)
+                   - Relative path: Resolved relative to workspace
+                   - Absolute path: Must be within allowed directories
+        prompt: Question or instruction about the video (default: asks for general description)
+        num_frames: Number of key frames to extract from the video (default: 8)
+                   - Higher values provide more detail but increase API costs
+                   - Recommended range: 4-16 frames
+        model: Model to use (default: "gpt-4.1")
+        allowed_paths: List of allowed base paths for validation (optional)
+        agent_cwd: Agent's current working directory (automatically injected, optional)
+    Returns:
+        ExecutionResult containing:
+        - success: Whether operation succeeded
+        - operation: "understand_video"
+        - video_path: Path to the analyzed video
+        - num_frames_extracted: Number of frames extracted
+        - prompt: The prompt used
+        - model: Model used for analysis
+        - response: The model's understanding/description of the video
+    Examples:
+        understand_video("demo.mp4")
+        → Returns detailed description of the video content
+        understand_video("tutorial.mp4", "What steps are shown in this tutorial?")
+        → Returns analysis of tutorial steps
+        understand_video("meeting.mp4", "Summarize the key points discussed in this meeting", num_frames=12)
+        → Returns meeting summary based on 12 key frames
+        understand_video("sports.mp4", "What sport is being played and what are the key moments?")
+        → Returns sports analysis
+    Security:
+        - Requires valid OpenAI API key
+        - Requires opencv-python package for video processing
+        - Video file must exist and be readable
+        - Supports common video formats (MP4, AVI, MOV, MKV, etc.)
+    Note:
+        This tool extracts still frames from the video. Audio content is not analyzed.
+        For audio analysis, use the generate_text_with_input_audio tool.
+    """
+    try:
+        # Convert allowed_paths from strings to Path objects
+        allowed_paths_list = [Path(p) for p in allowed_paths] if allowed_paths else None
+        # Load environment variables
+        script_dir = Path(__file__).parent.parent.parent.parent
+        env_path = script_dir / ".env"
+        if env_path.exists():
+            load_dotenv(env_path)
+        else:
+            load_dotenv()
+        openai_api_key = os.getenv("OPENAI_API_KEY")
+        if not openai_api_key:
+            result = {
+                "success": False,
+                "operation": "understand_video",
+                "error": "OpenAI API key not found. Please set OPENAI_API_KEY in .env file or environment variable.",
+            }
+            return ExecutionResult(
+                output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+            )
+        # Initialize OpenAI client
+        client = OpenAI(api_key=openai_api_key)
+        # Resolve video path
+        # Use agent_cwd if available, otherwise fall back to Path.cwd()
+        base_dir = Path(agent_cwd) if agent_cwd else Path.cwd()
+        if Path(video_path).is_absolute():
+            vid_path = Path(video_path).resolve()
+        else:
+            vid_path = (base_dir / video_path).resolve()
+        # Validate video path
+        _validate_path_access(vid_path, allowed_paths_list)
+        if not vid_path.exists():
+            result = {
+                "success": False,
+                "operation": "understand_video",
+                "error": f"Video file does not exist: {vid_path}",
+            }
+            return ExecutionResult(
+                output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+            )
+        # Check if file is likely a video (by extension)
+        video_extensions = [".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".m4v", ".mpg", ".mpeg"]
+        if vid_path.suffix.lower() not in video_extensions:
+            result = {
+                "success": False,
+                "operation": "understand_video",
+                "error": f"File does not appear to be a video file: {vid_path}. Supported formats: {', '.join(video_extensions)}",
+            }
+            return ExecutionResult(
+                output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+            )
+        # Extract key frames from video
+        try:
+            frames_base64 = _extract_key_frames(vid_path, num_frames)
+        except ImportError as import_error:
+            result = {
+                "success": False,
+                "operation": "understand_video",
+                "error": str(import_error),
+            }
+            return ExecutionResult(
+                output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+            )
+        except Exception as extract_error:
+            result = {
+                "success": False,
+                "operation": "understand_video",
+                "error": f"Failed to extract frames from video: {str(extract_error)}",
+            }
+            return ExecutionResult(
+                output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+            )
+        # Build content array with prompt and all frames
+        content = [{"type": "input_text", "text": prompt}]
+        for frame_base64 in frames_base64:
+            content.append(
+                {
+                    "type": "input_image",
+                    "image_url": f"data:image/jpeg;base64,{frame_base64}",
+                },
+            )
+        try:
+            # Call OpenAI API for video understanding
+            response = client.responses.create(
+                model=model,
+                input=[
+                    {
+                        "role": "user",
+                        "content": content,
+                    },
+                ],
+            )
+            # Extract response text
+            response_text = response.output_text if hasattr(response, "output_text") else str(response.output)
+            result = {
+                "success": True,
+                "operation": "understand_video",
+                "video_path": str(vid_path),
+                "num_frames_extracted": len(frames_base64),
+                "prompt": prompt,
+                "model": model,
+                "response": response_text,
+            }
+            return ExecutionResult(
+                output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+            )
+        except Exception as api_error:
+            result = {
+                "success": False,
+                "operation": "understand_video",
+                "error": f"OpenAI API error: {str(api_error)}",
+            }
+            return ExecutionResult(
+                output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+            )
+    except Exception as e:
+        result = {
+            "success": False,
+            "operation": "understand_video",
+            "error": f"Failed to understand video: {str(e)}",
+        }
+        return ExecutionResult(
+            output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+        )

massgen 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

Potentially problematic release.

massgen 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl