PyPI - massgen - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

massgen 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of massgen might be problematic. Click here for more details.

Files changed (58) hide show

massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml ADDED Viewed

@@ -0,0 +1,33 @@
+# MassGen Configuration: Text to Speech Continue Generation Tool
+# Usage:
+#    uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml "I want to you tell me a very short introduction about Sherlock Homes in one sentence, and I want you to use emotion voice to read it out loud."
+agents:
+  - id: "text_to_speech_continue_generation_tool"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace1"
+      enable_audio_generation: true
+      custom_tools:
+        - name: ["text_to_speech_transcription_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py"
+          function: ["text_to_speech_transcription_generation"]
+        - name: ["understand_audio"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_audio.py"
+          function: ["understand_audio"]
+        - name: ["text_to_speech_continue_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py"
+          function: ["text_to_speech_continue_generation"]
+    system_message: |
+      You are an AI assistant with access to text-to-speech generation capabilities.
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+ui:
+  display_type: "simple"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml ADDED Viewed

@@ -0,0 +1,47 @@
+# MassGen Configuration: Text to Video Generation Tool
+# Usage:
+#   uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml "Generate a 4 seconds video with neon-lit alley at night, light rain, slow push-in, cinematic."
+agents:
+  - id: "text_to_video_generation_tool1"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace1"
+      enable_video_generation: true
+      custom_tools:
+        - name: ["understand_video"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_video.py"
+          function: ["understand_video"]
+        - name: ["text_to_video_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_video_generation.py"
+          function: ["text_to_video_generation"]
+    system_message: |
+      You are an AI assistant with access to text-to-video generation capabilities.
+  - id: "text_to_video_generation_tool2"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace2"
+      enable_video_generation: true
+      custom_tools:
+        - name: ["understand_video"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_video.py"
+          function: ["understand_video"]
+        - name: ["text_to_video_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_video_generation.py"
+          function: ["text_to_video_generation"]
+    system_message: |
+      You are an AI assistant with access to text-to-video generation capabilities.
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+ui:
+  display_type: "rich_terminal"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml ADDED Viewed

@@ -0,0 +1,29 @@
+# MassGen Configuration: Text to Video Generation Tool
+# Usage:
+#   uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml "Generate a 4 seconds video with neon-lit alley at night, light rain, slow push-in, cinematic."
+agents:
+  - id: "text_to_video_generation_tool"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace1"
+      enable_video_generation: true
+      custom_tools:
+        - name: ["understand_video"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_video.py"
+          function: ["understand_video"]
+        - name: ["text_to_video_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_video_generation.py"
+          function: ["text_to_video_generation"]
+    system_message: |
+      You are an AI assistant with access to text-to-video generation capabilities.
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+ui:
+  display_type: "simple"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 # MassGen Configuration: Understand Audio Tool
 # Usage:
-#   massgen --config massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml "Please summarize the content in this audio."
+#   uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml "Please summarize the content in this audio."
 agents:
   - id: "understand_audio_tool"
     backend:

massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 # MassGen Configuration: Understand File Tool
 # Usage:
-#   massgen --config massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml "Please summarize the content in this file."
+#   uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml "Please summarize the content in this file."
 agents:
   - id: "understand_file_tool"
     backend:

massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 # MassGen Configuration: Understand Image Tool
 # Usage:
-#   massgen --config massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml "Please summarize the content in this image."
+#   uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml "Please summarize the content in this image."
 agents:
   - id: "understand_image_tool"
     backend:

massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 # MassGen Configuration: Understand Video Tool
 # Usage:
-#   massgen --config massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml "What's happening in this video?"
+#   uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml "What's happening in this video?"
 agents:
   - id: "understand_video_tool"
     backend:

massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml CHANGED Viewed

@@ -51,7 +51,7 @@ orchestrator:
   snapshot_storage: "snapshots"
   agent_temporary_workspace: "temp_workspaces"
   context_paths:
-    - path: "docs/case_studies"
+    - path: "docs/source/examples/case_studies"
       permission: "read"
 ui:

massgen/filesystem_manager/_filesystem_manager.py CHANGED Viewed

@@ -57,6 +57,7 @@ class FilesystemManager:
         command_line_docker_network_mode: str = "none",
         command_line_docker_enable_sudo: bool = False,
         enable_audio_generation: bool = False,
+        enable_file_generation: bool = False,
     ):
         """
         Initialize FilesystemManager.

massgen/filesystem_manager/_path_permission_manager.py CHANGED Viewed

@@ -90,6 +90,68 @@ class PathPermissionManager:
         "massgen_logs",
     ]
+    # Binary file extensions that should not be read by text-based tools
+    # These files should be handled by specialized tools (understand_image, understand_video, etc.)
+    BINARY_FILE_EXTENSIONS = {
+        # Images
+        ".jpg",
+        ".jpeg",
+        ".png",
+        ".gif",
+        ".bmp",
+        ".ico",
+        ".svg",
+        ".webp",
+        ".tiff",
+        ".tif",
+        # Videos
+        ".mp4",
+        ".avi",
+        ".mov",
+        ".mkv",
+        ".flv",
+        ".wmv",
+        ".webm",
+        ".m4v",
+        ".mpg",
+        ".mpeg",
+        # Audio
+        ".mp3",
+        ".wav",
+        ".ogg",
+        ".flac",
+        ".aac",
+        ".m4a",
+        ".wma",
+        # Archives
+        ".zip",
+        ".tar",
+        ".gz",
+        ".bz2",
+        ".7z",
+        ".rar",
+        ".xz",
+        # Executables and binaries
+        ".exe",
+        ".bin",
+        ".dll",
+        ".so",
+        ".dylib",
+        ".o",
+        ".a",
+        ".pyc",
+        ".class",
+        ".jar",
+        # Office documents (binary formats - use understand_file tool)
+        ".doc",  # Old Word (not supported by understand_file)
+        ".xls",  # Old Excel (not supported by understand_file)
+        ".ppt",  # Old PowerPoint (not supported by understand_file)
+        ".pdf",  # PDF (supported by understand_file with PyPDF2)
+        ".docx",  # Word (supported by understand_file with python-docx)
+        ".xlsx",  # Excel (supported by understand_file with openpyxl)
+        ".pptx",  # PowerPoint (supported by understand_file with python-pptx)
+    }
     def __init__(
         self,
         context_write_access_enabled: bool = False,
@@ -440,6 +502,12 @@ class PathPermissionManager:
             - allowed: Whether the tool call should proceed
             - reason: Explanation if blocked (None if allowed)
         """
+        # Check if read tool is trying to read binary files (images, videos, etc.)
+        if self._is_text_read_tool(tool_name):
+            binary_check_result = self._validate_binary_file_access(tool_name, tool_args)
+            if not binary_check_result[0]:
+                return binary_check_result
         # Track read operations for read-before-delete enforcement
         if self._is_read_tool(tool_name):
             self._track_read_operation(tool_name, tool_args)
@@ -495,6 +563,33 @@ class PathPermissionManager:
         return False
+    def _is_text_read_tool(self, tool_name: str) -> bool:
+        """
+        Check if a tool is a text-based read operation that should not access binary files.
+        These tools are designed for reading text files and should be blocked from
+        reading binary files (images, videos, audio, etc.) to prevent context pollution.
+        Tools that read text file contents:
+        - Read: Claude Code read tool
+        - read_text_file: MCP filesystem read tool
+        - read_file: Generic read operations
+        """
+        # Use lowercase for case-insensitive matching
+        tool_lower = tool_name.lower()
+        # Check if tool name contains any text read operation keywords
+        text_read_keywords = [
+            "read_text_file",  # MCP filesystem: read_text_file
+            "read_file",  # Generic read operations
+        ]
+        # Also check for exact "Read" match (Claude Code tool)
+        if tool_name == "Read":
+            return True
+        return any(keyword in tool_lower for keyword in text_read_keywords)
     def _is_read_tool(self, tool_name: str) -> bool:
         """
         Check if a tool is a read operation that should be tracked.
@@ -518,6 +613,59 @@ class PathPermissionManager:
         return any(keyword in tool_lower for keyword in read_keywords)
+    def _validate_binary_file_access(self, tool_name: str, tool_args: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
+        """
+        Validate that text-based read tools are not trying to read binary files.
+        Binary files (images, videos, audio, etc.) should be handled by specialized tools
+        to prevent context pollution with binary data.
+        Args:
+            tool_name: Name of the tool being called
+            tool_args: Arguments passed to the tool
+        Returns:
+            Tuple of (allowed: bool, reason: Optional[str])
+            - allowed: False if trying to read binary file, True otherwise
+            - reason: Explanation if blocked (None if allowed)
+        """
+        # Extract file path from arguments
+        file_path = self._extract_file_path(tool_args)
+        if not file_path:
+            # Can't determine path - allow (tool may not access files)
+            return (True, None)
+        # Resolve path
+        try:
+            file_path_str = self._resolve_path_against_workspace(file_path)
+            path = Path(file_path_str)
+        except Exception:
+            # If path resolution fails, allow (will fail elsewhere if invalid)
+            return (True, None)
+        # Check file extension
+        file_extension = path.suffix.lower()
+        if file_extension in self.BINARY_FILE_EXTENSIONS:
+            # Determine appropriate tool suggestion based on file type
+            if file_extension in {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".ico", ".svg", ".webp", ".tiff", ".tif"}:
+                suggestion = "For images, use understand_image tool"
+            elif file_extension in {".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".m4v", ".mpg", ".mpeg"}:
+                suggestion = "For videos, use understand_video tool"
+            elif file_extension in {".mp3", ".wav", ".ogg", ".flac", ".aac", ".m4a", ".wma"}:
+                suggestion = "For audio files, use generate_text_with_input_audio tool"
+            elif file_extension in {".pdf"}:
+                suggestion = "For PDF files, use understand_file tool"
+            elif file_extension in {".docx", ".xlsx", ".pptx"}:
+                suggestion = "For Office documents, use understand_file tool"
+            else:
+                suggestion = "Use appropriate specialized tool for this file type"
+            reason = f"Cannot read binary file '{path.name}' with {tool_name}. {suggestion}."
+            logger.warning(f"[PathPermissionManager] Blocked {tool_name} from reading binary file: {path}")
+            return (False, reason)
+        return (True, None)
     def _is_delete_tool(self, tool_name: str) -> bool:
         """
         Check if a tool is a delete operation.

massgen/message_templates.py CHANGED Viewed

@@ -302,6 +302,8 @@ IMPORTANT: You are responding to the latest message in an ongoing conversation.
         original_system_message: Optional[str] = None,
         enable_image_generation: bool = False,
         enable_audio_generation: bool = False,
+        enable_file_generation: bool = False,
+        enable_video_generation: bool = False,
         has_irreversible_actions: bool = False,
         enable_command_execution: bool = False,
     ) -> str:
@@ -311,6 +313,8 @@ IMPORTANT: You are responding to the latest message in an ongoing conversation.
             original_system_message: The agent's original system message to preserve
             enable_image_generation: Whether image generation is enabled
             enable_audio_generation: Whether audio generation is enabled
+            enable_file_generation: Whether file generation is enabled
+            enable_video_generation: Whether video generation is enabled
             has_irreversible_actions: Whether agent has write access to context paths (requires actual file delivery)
             enable_command_execution: Whether command execution is enabled for this agent
         """
@@ -335,21 +339,165 @@ Present the best possible coordinated answer by combining the strengths from all
         # Add image generation instructions only if enabled
         if enable_image_generation:
             presentation_instructions += """For image generation tasks:
-- Extract image paths from the existing answer and resolve them in the shared reference.
-- Gather all agent-produced images (ignore non-existent files).
-- MUST call the generate-image tool with these input images to synthesize one final image combining their strengths.
-- MUST save the final outputand output the saved path.
-"""
+  **MANDATORY WORKFLOW - You MUST follow these steps in order:**
+  Step 1: **Check for existing images (REQUIRED)**
+  - First, list all files in the Shared Reference directory (temp_workspaces) to find ALL images from EVERY agent
+  - Look for image files (.png, .jpg, .jpeg, .gif, .webp, etc.) in each agent's workspace subdirectory
+  Step 2: **Understand ALL existing images (REQUIRED if images exist)**
+  - For EACH image file you found, you MUST call the **understand_image** tool to extract its key visual elements, composition, style, and quality
+  - Do this for images from yourself AND from other agents - analyze ALL images found
+  - DO NOT skip this step even if you think you know the content
+  Step 3: **Synthesize and generate final image (REQUIRED)**
+  - If existing images were found and analyzed:
+    * Synthesize ALL image analyses into a single, detailed, combined prompt
+    * The combined prompt should capture the best visual elements, composition, style, and quality from all analyzed images
+    * Call **image_to_image_generation** with this synthesized prompt and ALL images to create the final unified image
+  - If NO existing images were found:
+    * Generate a new image based directly on the original task requirements
+    * Call **text_to_image_generation** with a prompt derived from the original task
+  Step 4: **Save and report (REQUIRED)**
+  - Save the final generated image in your workspace
+  - Report the saved path in your final answer
+  **CRITICAL**: You MUST complete Steps 1-4 in order. Do not skip checking for existing images. Do not skip calling
+  understand_image on found images. This is a mandatory synthesis workflow.
+  """
+        #             presentation_instructions += """For image generation tasks:
+        # - Extract image paths from the existing answer and resolve them in the shared reference.
+        # - Gather all agent-produced images (ignore non-existent files).
+        # - IMPORTANT: If you find ANY existing images (from yourself or other agents), you MUST call the understand_image tool
+        #   to analyze EACH image and extract their key visual elements, composition, style, and quality.
+        # - IMPORTANT: Synthesize insights from all analyzed images into a detailed, combined prompt that captures the best elements.
+        # - IMPORTANT: Call text_to_image_generation with this synthesized prompt to generate the final image.
+        # - IMPORTANT: Save the final output in your workspace and output the saved path.
+        # - If no existing images are found, generate based on the original task requirements.
+        # """
         # Add audio generation instructions only if enabled
         if enable_audio_generation:
             presentation_instructions += """For audio generation tasks:
-- Extract audio paths from the existing answer and resolve them in the shared reference.
-- Gather ALL audio files produced by EVERY agent (ignore non-existent files).
-  IMPORTANT: You MUST call the generate_text_with_input_audio tool to obtain transcriptions
-  for EACH AND EVERY audio file from ALL agents - no audio should be skipped or overlooked.
-- MUST combine the strengths of all transcriptions into one final detailed transcription that captures the best elements from each.
-- MUST use the convert_text_to_audio tool to convert this final transcription to a new audio file and save it, then output the saved path.
-"""
+  **MANDATORY WORKFLOW - You MUST follow these steps in order:**
+  Step 1: **Check for existing audios (REQUIRED)**
+  - First, list all files in the Shared Reference directory (temp_workspaces) to find ALL audio files from EVERY agent
+  - Look for audio files (.mp3, .wav, .flac, etc.) in each agent's workspace subdirectory
+  Step 2: **Understand ALL existing audios (REQUIRED if audios exist)**
+  - For EACH audio file you found, you MUST call the **understand_audio** tool to extract its transcription
+  - Do this for audios from yourself AND from other agents - analyze ALL audios found
+  - DO NOT skip this step even if you think you know the content
+  Step 3: **Synthesize and generate final audio (REQUIRED)**
+  - If existing audios were found and analyzed:
+    * Synthesize ALL audio transcriptions into a single, detailed, combined transcription
+    * The combined transcription should capture the best content from all analyzed audios
+    * Call **text_to_speech_transcription_generation** with this synthesized transcription to create the final unified audio
+  - If NO existing audios were found:
+    * Generate a new audio based directly on the original task requirements
+    * Call **text_to_speech_transcription_generation** with a transcription derived from the original task
+  Step 4: **Save and report (REQUIRED)**
+  - Save the final generated audio in your workspace
+  - Report the saved path in your final answer
+  **CRITICAL**: You MUST complete Steps 1-4 in order. Do not skip checking for existing audios. Do not skip calling
+  understand_audio on found audios. This is a mandatory synthesis workflow.
+  """
+        #                         presentation_instructions += """For audio generation tasks:
+        # - Extract audio paths from the existing answer and resolve them in the shared reference.
+        # - Gather ALL audio files produced by EVERY agent (ignore non-existent files).
+        # - IMPORTANT: If you find ANY existing audios (from yourself or other agents), you MUST call the **understand_audio** tool to extract each audio's transcription.
+        # - IMPORTANT: Synthesize transcriptions from all audios into a detailed, combined transcription.
+        # - IMPORTANT: You MUST call the **text_to_speech_transcription_generation** tool with this synthesized transcription to generate the final audio.
+        # - IMPORTANT: Save the final output in your workspace and output the saved path.
+        # - If no existing audios are found, generate based on the original task requirements.
+        # """
+        # Add file generation instructions only if enabled
+        if enable_file_generation:
+            presentation_instructions += """For file generation tasks:
+  **MANDATORY WORKFLOW - You MUST follow these steps in order:**
+  Step 1: **Check for existing files (REQUIRED)**
+  - First, list all files in the Shared Reference directory (temp_workspaces) to find ALL files from EVERY agent
+  - Look for files of the requested type in each agent's workspace subdirectory
+  Step 2: **Understand ALL existing files (REQUIRED if files exist)**
+  - For EACH file you found, you MUST call the **understand_file** tool to extract its content, structure, and key elements
+  - Do this for files from yourself AND from other agents - analyze ALL files found
+  - DO NOT skip this step even if you think you know the content
+  Step 3: **Synthesize and generate final file (REQUIRED)**
+  - If existing files were found and analyzed:
+    * Synthesize ALL file contents into a single, detailed, combined content
+    * The combined content should capture the best elements, structure, and information from all analyzed files
+    * Call **text_to_file_generation** with this synthesized content to generate the final unified file
+  - If NO existing files were found:
+    * Generate a new file based directly on the original task requirements
+    * Call **text_to_file_generation** with content derived from the original task
+  Step 4: **Save and report (REQUIRED)**
+  - Save the final generated file in your workspace
+  - Report the saved path in your final answer
+  **CRITICAL**: You MUST complete Steps 1-4 in order. Do not skip checking for existing files. Do not skip calling
+  understand_file on found files. This is a mandatory synthesis workflow.
+  """
+        #             presentation_instructions += """For file generation tasks:
+        # - Extract file paths from the existing answer and resolve them in the shared reference.
+        # - Gather ALL files produced by EVERY agent (ignore non-existent files).
+        # - IMPORTANT: If you find ANY existing files (from yourself or other agents), you MUST call the **understand_file** tool to extract each file's content.
+        # - IMPORTANT: Synthesize contents from all files into a detailed, combined content.
+        # - IMPORTANT: You MUST call the **text_to_file_generation** tool with this synthesized content to generate the final file.
+        # - IMPORTANT: Save the final output in your workspace and output the saved path.
+        # - If no existing files are found, generate based on the original task requirements.
+        # """
+        # Add video generation instructions only if enabled
+        if enable_video_generation:
+            presentation_instructions += """For video generation tasks:
+  **MANDATORY WORKFLOW - You MUST follow these steps in order:**
+  Step 1: **Check for existing videos (REQUIRED)**
+  - First, list all files in the Shared Reference directory (temp_workspaces) to find ALL videos from EVERY agent
+  - Look for video files (.mp4, .avi, .mov, etc.) in each agent's workspace subdirectory
+  Step 2: **Understand ALL existing videos (REQUIRED if videos exist)**
+  - For EACH video file you found, you MUST call the **understand_video** tool to extract its description, visual features, and
+  key elements
+  - Do this for videos from yourself AND from other agents - analyze ALL videos found
+  - DO NOT skip this step even if you think you know the content
+  Step 3: **Synthesize and generate final video (REQUIRED)**
+  - If existing videos were found and analyzed:
+    * Synthesize ALL video descriptions into a single, detailed, combined prompt
+    * The combined prompt should capture the best visual elements, composition, motion, and style from all analyzed videos
+    * Call **text_to_video_generation** with this synthesized prompt to create the final unified video
+  - If NO existing videos were found:
+    * Generate a new video based directly on the original task requirements
+    * Call **text_to_video_generation** with a prompt derived from the original task
+  Step 4: **Save and report (REQUIRED)**
+  - Save the final generated video in your workspace
+  - Report the saved path in your final answer
+  **CRITICAL**: You MUST complete Steps 1-4 in order. Do not skip checking for existing videos. Do not skip calling
+  understand_video on found videos. This is a mandatory synthesis workflow.
+  """
+        #             presentation_instructions += """For video generation tasks:
+        # - Extract video paths from the existing answer and resolve them in the shared reference.
+        # - Gather ALL videos produced by EVERY agent (ignore non-existent files).
+        # - IMPORTANT: If you find ANY existing videos (from yourself or other agents), you MUST call the **understand_video** tool to extract each video's description and key features.
+        # - IMPORTANT: Synthesize descriptions from all videos into a detailed, combined prompt capturing the best elements.
+        # - IMPORTANT: You MUST call the **text_to_video_generation** tool with this synthesized prompt to generate the final video.
+        # - IMPORTANT: Save the final output in your workspace and output the saved path.
+        # - If no existing videos are found, generate based on the original task requirements.
+        # """
         # Add irreversible actions reminder if needed
         # TODO: Integrate more general irreversible actions handling in future (i.e., not just for context file delivery)

massgen/orchestrator.py CHANGED Viewed

@@ -2513,6 +2513,20 @@ INSTRUCTIONS FOR NEXT ATTEMPT:
         elif hasattr(agent, "backend") and hasattr(agent.backend, "backend_params"):
             enable_audio_generation = agent.backend.backend_params.get("enable_audio_generation", False)
+        # Check if file generation is enabled for this agent
+        enable_file_generation = False
+        if hasattr(agent, "config") and agent.config:
+            enable_file_generation = agent.config.backend_params.get("enable_file_generation", False)
+        elif hasattr(agent, "backend") and hasattr(agent.backend, "backend_params"):
+            enable_file_generation = agent.backend.backend_params.get("enable_file_generation", False)
+        # Check if video generation is enabled for this agent
+        enable_video_generation = False
+        if hasattr(agent, "config") and agent.config:
+            enable_video_generation = agent.config.backend_params.get("enable_video_generation", False)
+        elif hasattr(agent, "backend") and hasattr(agent.backend, "backend_params"):
+            enable_video_generation = agent.backend.backend_params.get("enable_video_generation", False)
         # Check if agent has write access to context paths (requires file delivery)
         has_irreversible_actions = False
         if agent.backend.filesystem_manager:
@@ -2525,6 +2539,8 @@ INSTRUCTIONS FOR NEXT ATTEMPT:
             agent_system_message,
             enable_image_generation,
             enable_audio_generation,
+            enable_file_generation,
+            enable_video_generation,
             has_irreversible_actions,
             enable_command_execution,
         )

massgen 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

Potentially problematic release.

massgen 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl