PyPI - massgen - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

massgen 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of massgen might be problematic. Click here for more details.

Files changed (58) hide show

massgen/tool/_multimodal_tools/understand_audio.py CHANGED Viewed

@@ -42,6 +42,7 @@ async def understand_audio(
     audio_paths: List[str],
     model: str = "gpt-4o-transcribe",
     allowed_paths: Optional[List[str]] = None,
+    agent_cwd: Optional[str] = None,
 ) -> ExecutionResult:
     """
     Transcribe audio file(s) to text using OpenAI's Transcription API.
@@ -55,6 +56,7 @@ async def understand_audio(
                     - Absolute path: Must be within allowed directories
         model: Model to use (default: "gpt-4o-transcribe")
         allowed_paths: List of allowed base paths for validation (optional)
+        agent_cwd: Current working directory of the agent (optional)
     Returns:
         ExecutionResult containing:
@@ -108,10 +110,13 @@ async def understand_audio(
         for audio_path_str in audio_paths:
             # Resolve audio path
+            # Use agent_cwd if available, otherwise fall back to Path.cwd()
+            base_dir = Path(agent_cwd) if agent_cwd else Path.cwd()
             if Path(audio_path_str).is_absolute():
                 audio_path = Path(audio_path_str).resolve()
             else:
-                audio_path = (Path.cwd() / audio_path_str).resolve()
+                audio_path = (base_dir / audio_path_str).resolve()
             # Validate audio path
             _validate_path_access(audio_path, allowed_paths_list)
@@ -137,6 +142,19 @@ async def understand_audio(
                     output_blocks=[TextContent(data=json.dumps(result, indent=2))],
                 )
+            # Check file size (OpenAI Whisper API has 25MB limit)
+            file_size = audio_path.stat().st_size
+            max_size = 25 * 1024 * 1024  # 25MB
+            if file_size > max_size:
+                result = {
+                    "success": False,
+                    "operation": "generate_text_with_input_audio",
+                    "error": f"Audio file too large: {audio_path} ({file_size/1024/1024:.1f}MB > 25MB). " "Please use a smaller file or compress the audio.",
+                }
+                return ExecutionResult(
+                    output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+                )
             validated_audio_paths.append(audio_path)
         # Process each audio file separately using OpenAI Transcription API

massgen/tool/_multimodal_tools/understand_file.py CHANGED Viewed

@@ -199,6 +199,7 @@ async def understand_file(
     model: str = "gpt-4.1",
     max_chars: int = 50000,
     allowed_paths: Optional[List[str]] = None,
+    agent_cwd: Optional[str] = None,
 ) -> ExecutionResult:
     """
     Understand and analyze file contents using OpenAI's gpt-4.1 API.
@@ -216,6 +217,7 @@ async def understand_file(
                   - Prevents processing extremely large files
                   - Applies to both text files and extracted content from documents
         allowed_paths: List of allowed base paths for validation (optional)
+        agent_cwd: Agent's current working directory (automatically injected, optional)
     Returns:
         ExecutionResult containing:
@@ -306,10 +308,13 @@ async def understand_file(
         client = OpenAI(api_key=openai_api_key)
         # Resolve file path
+        # Use agent_cwd if available, otherwise fall back to Path.cwd()
+        base_dir = Path(agent_cwd) if agent_cwd else Path.cwd()
         if Path(file_path).is_absolute():
             f_path = Path(file_path).resolve()
         else:
-            f_path = (Path.cwd() / file_path).resolve()
+            f_path = (base_dir / file_path).resolve()
         # Validate file path
         _validate_path_access(f_path, allowed_paths_list)

massgen/tool/_multimodal_tools/understand_image.py CHANGED Viewed

@@ -12,6 +12,7 @@ from typing import List, Optional
 from dotenv import load_dotenv
 from openai import OpenAI
+from massgen.logger_config import logger
 from massgen.tool._result import ExecutionResult, TextContent
@@ -44,6 +45,7 @@ async def understand_image(
     prompt: str = "What's in this image? Please describe it in detail.",
     model: str = "gpt-4.1",
     allowed_paths: Optional[List[str]] = None,
+    agent_cwd: Optional[str] = None,
 ) -> ExecutionResult:
     """
     Understand and analyze an image using OpenAI's gpt-4.1 API.
@@ -58,6 +60,7 @@ async def understand_image(
         prompt: Question or instruction about the image (default: "What's in this image? Please describe it in detail.")
         model: Model to use (default: "gpt-4.1")
         allowed_paths: List of allowed base paths for validation (optional)
+        agent_cwd: Agent's current working directory (automatically injected)
     Returns:
         ExecutionResult containing:
@@ -111,10 +114,13 @@ async def understand_image(
         client = OpenAI(api_key=openai_api_key)
         # Resolve image path
+        # Use agent_cwd if available, otherwise fall back to Path.cwd()
+        base_dir = Path(agent_cwd) if agent_cwd else Path.cwd()
         if Path(image_path).is_absolute():
             img_path = Path(image_path).resolve()
         else:
-            img_path = (Path.cwd() / image_path).resolve()
+            img_path = (base_dir / image_path).resolve()
         # Validate image path
         _validate_path_access(img_path, allowed_paths_list)
@@ -140,11 +146,112 @@ async def understand_image(
                 output_blocks=[TextContent(data=json.dumps(result, indent=2))],
             )
-        # Read and encode image to base64
+        # Read image and check size and dimensions
         try:
-            with open(img_path, "rb") as image_file:
-                image_data = image_file.read()
-            base64_image = base64.b64encode(image_data).decode("utf-8")
+            # OpenAI Vision API limits:
+            # - Up to 20MB per image
+            # - High-resolution: 768px (short side) x 2000px (long side)
+            file_size = img_path.stat().st_size
+            max_size = 18 * 1024 * 1024  # 18MB (conservative buffer under OpenAI's 20MB limit)
+            max_short_side = 768  # Maximum pixels for short side
+            max_long_side = 2000  # Maximum pixels for long side
+            # Try to import PIL for dimension/size checking
+            try:
+                import io
+                from PIL import Image
+            except ImportError:
+                # PIL not available - fall back to simple file reading
+                # This will work for small images but may fail for large ones
+                if file_size > max_size:
+                    result = {
+                        "success": False,
+                        "operation": "understand_image",
+                        "error": f"Image too large ({file_size/1024/1024:.1f}MB > {max_size/1024/1024:.0f}MB) and PIL not available for resizing. Install with: pip install pillow",
+                    }
+                    return ExecutionResult(
+                        output_blocks=[TextContent(data=json.dumps(result, indent=2))],
+                    )
+                # Read without resizing
+                with open(img_path, "rb") as image_file:
+                    image_data = image_file.read()
+                base64_image = base64.b64encode(image_data).decode("utf-8")
+                mime_type = "image/jpeg" if img_path.suffix.lower() in [".jpg", ".jpeg"] else "image/png"
+                logger.info(f"Read image without dimension check (PIL not available): {img_path.name} ({file_size/1024/1024:.1f}MB)")
+            else:
+                # PIL available - check both file size and dimensions
+                img = Image.open(img_path)
+                img.size
+                original_width, original_height = img.size
+                # Determine short and long sides
+                short_side = min(original_width, original_height)
+                long_side = max(original_width, original_height)
+                # Check if we need to resize
+                needs_resize = False
+                resize_reason = []
+                if file_size > max_size:
+                    needs_resize = True
+                    resize_reason.append(f"file size {file_size/1024/1024:.1f}MB > {max_size/1024/1024:.0f}MB")
+                if short_side > max_short_side or long_side > max_long_side:
+                    needs_resize = True
+                    resize_reason.append(f"dimensions {original_width}x{original_height} exceed {max_short_side}x{max_long_side}")
+                if needs_resize:
+                    # Calculate scale factor based on both size and dimensions
+                    scale_factors = []
+                    # Scale for file size (if needed)
+                    if file_size > max_size:
+                        # Estimate: reduce dimensions by sqrt of size ratio
+                        size_scale = (max_size / file_size) ** 0.5 * 0.8  # 0.8 for safety margin
+                        scale_factors.append(size_scale)
+                    # Scale for dimensions (if needed)
+                    if short_side > max_short_side or long_side > max_long_side:
+                        # Calculate scale needed to fit within dimension constraints
+                        short_scale = max_short_side / short_side if short_side > max_short_side else 1.0
+                        long_scale = max_long_side / long_side if long_side > max_long_side else 1.0
+                        dimension_scale = min(short_scale, long_scale) * 0.95  # 0.95 for safety margin
+                        scale_factors.append(dimension_scale)
+                    # Use the most restrictive scale factor
+                    scale_factor = min(scale_factors)
+                    new_width = int(original_width * scale_factor)
+                    new_height = int(original_height * scale_factor)
+                    # Resize image
+                    img_resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
+                    # Convert to bytes
+                    img_byte_arr = io.BytesIO()
+                    # Save as JPEG for better compression
+                    img_resized.convert("RGB").save(img_byte_arr, format="JPEG", quality=85, optimize=True)
+                    image_data = img_byte_arr.getvalue()
+                    base64_image = base64.b64encode(image_data).decode("utf-8")
+                    mime_type = "image/jpeg"
+                    logger.info(
+                        f"Resized image ({', '.join(resize_reason)}): "
+                        f"{original_width}x{original_height} ({file_size/1024/1024:.1f}MB) -> "
+                        f"{new_width}x{new_height} ({len(image_data)/1024/1024:.1f}MB)",
+                    )
+                else:
+                    # No resize needed - read normally
+                    with open(img_path, "rb") as image_file:
+                        image_data = image_file.read()
+                    base64_image = base64.b64encode(image_data).decode("utf-8")
+                    # Determine MIME type
+                    mime_type = "image/jpeg" if img_path.suffix.lower() in [".jpg", ".jpeg"] else "image/png"
+                    logger.info(f"Image within limits: {original_width}x{original_height} ({file_size/1024/1024:.1f}MB)")
         except Exception as read_error:
             result = {
                 "success": False,
@@ -155,9 +262,6 @@ async def understand_image(
                 output_blocks=[TextContent(data=json.dumps(result, indent=2))],
             )
-        # Determine MIME type
-        mime_type = "image/jpeg" if img_path.suffix.lower() in [".jpg", ".jpeg"] else "image/png"
         try:
             # Call OpenAI API for image understanding
             response = client.responses.create(

massgen/tool/_multimodal_tools/understand_video.py CHANGED Viewed

@@ -41,14 +41,14 @@ def _validate_path_access(path: Path, allowed_paths: Optional[List[Path]] = None
 def _extract_key_frames(video_path: Path, num_frames: int = 8) -> List[str]:
     """
-    Extract key frames from a video file.
+    Extract key frames from a video file and resize them to fit OpenAI Vision API limits.
     Args:
         video_path: Path to the video file
         num_frames: Number of key frames to extract
     Returns:
-        List of base64-encoded frame images
+        List of base64-encoded frame images (resized to fit 768px x 2000px limits)
     Raises:
         ImportError: If opencv-python is not installed
@@ -61,6 +61,10 @@ def _extract_key_frames(video_path: Path, num_frames: int = 8) -> List[str]:
             "opencv-python is required for video frame extraction. " "Please install it with: pip install opencv-python",
         )
+    # OpenAI Vision API limits for images (same as understand_image)
+    max_short_side = 768  # Maximum pixels for short side
+    max_long_side = 2000  # Maximum pixels for long side
     # Open the video file
     video = cv2.VideoCapture(str(video_path))
@@ -96,8 +100,26 @@ def _extract_key_frames(video_path: Path, num_frames: int = 8) -> List[str]:
             if not ret:
                 continue
-            # Encode frame to JPEG
-            ret, buffer = cv2.imencode(".jpg", frame)
+            # Check and resize frame if needed to fit OpenAI Vision API limits
+            height, width = frame.shape[:2]
+            short_side = min(width, height)
+            long_side = max(width, height)
+            if short_side > max_short_side or long_side > max_long_side:
+                # Calculate scale factor to fit within dimension constraints
+                short_scale = max_short_side / short_side if short_side > max_short_side else 1.0
+                long_scale = max_long_side / long_side if long_side > max_long_side else 1.0
+                scale_factor = min(short_scale, long_scale) * 0.95  # 0.95 for safety margin
+                new_width = int(width * scale_factor)
+                new_height = int(height * scale_factor)
+                # Resize frame using LANCZOS (high quality)
+                frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LANCZOS4)
+            # Encode frame to JPEG with quality=85 (same as understand_image)
+            encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 85]
+            ret, buffer = cv2.imencode(".jpg", frame, encode_param)
             if not ret:
                 continue
@@ -122,6 +144,7 @@ async def understand_video(
     num_frames: int = 8,
     model: str = "gpt-4.1",
     allowed_paths: Optional[List[str]] = None,
+    agent_cwd: Optional[str] = None,
 ) -> ExecutionResult:
     """
     Understand and analyze a video by extracting key frames and using OpenAI's gpt-4.1 API.
@@ -139,6 +162,7 @@ async def understand_video(
                    - Recommended range: 4-16 frames
         model: Model to use (default: "gpt-4.1")
         allowed_paths: List of allowed base paths for validation (optional)
+        agent_cwd: Agent's current working directory (automatically injected, optional)
     Returns:
         ExecutionResult containing:
@@ -201,10 +225,13 @@ async def understand_video(
         client = OpenAI(api_key=openai_api_key)
         # Resolve video path
+        # Use agent_cwd if available, otherwise fall back to Path.cwd()
+        base_dir = Path(agent_cwd) if agent_cwd else Path.cwd()
         if Path(video_path).is_absolute():
             vid_path = Path(video_path).resolve()
         else:
-            vid_path = (Path.cwd() / video_path).resolve()
+            vid_path = (base_dir / video_path).resolve()
         # Validate video path
         _validate_path_access(vid_path, allowed_paths_list)

massgen 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

Potentially problematic release.

massgen 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl