PyPI - massgen - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

massgen 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of massgen might be problematic. Click here for more details.

Files changed (90) hide show

massgen/__init__.py +1 -1
massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
massgen/api_params_handler/_claude_api_params_handler.py +4 -0
massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
massgen/api_params_handler/_response_api_params_handler.py +4 -0
massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
massgen/backend/docs/permissions_and_context_files.md +2 -2
massgen/backend/response.py +2 -0
massgen/chat_agent.py +340 -20
massgen/cli.py +326 -19
massgen/configs/README.md +92 -41
massgen/configs/memory/gpt5mini_gemini_baseline_research_to_implementation.yaml +94 -0
massgen/configs/memory/gpt5mini_gemini_context_window_management.yaml +187 -0
massgen/configs/memory/gpt5mini_gemini_research_to_implementation.yaml +127 -0
massgen/configs/memory/gpt5mini_high_reasoning_gemini.yaml +107 -0
massgen/configs/memory/single_agent_compression_test.yaml +64 -0
massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +1 -1
massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +1 -1
massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +1 -1
massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +1 -1
massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +1 -1
massgen/filesystem_manager/_filesystem_manager.py +1 -0
massgen/filesystem_manager/_path_permission_manager.py +148 -0
massgen/memory/README.md +277 -0
massgen/memory/__init__.py +26 -0
massgen/memory/_base.py +193 -0
massgen/memory/_compression.py +237 -0
massgen/memory/_context_monitor.py +211 -0
massgen/memory/_conversation.py +255 -0
massgen/memory/_fact_extraction_prompts.py +333 -0
massgen/memory/_mem0_adapters.py +257 -0
massgen/memory/_persistent.py +687 -0
massgen/memory/docker-compose.qdrant.yml +36 -0
massgen/memory/docs/DESIGN.md +388 -0
massgen/memory/docs/QUICKSTART.md +409 -0
massgen/memory/docs/SUMMARY.md +319 -0
massgen/memory/docs/agent_use_memory.md +408 -0
massgen/memory/docs/orchestrator_use_memory.md +586 -0
massgen/memory/examples.py +237 -0
massgen/message_templates.py +160 -12
massgen/orchestrator.py +223 -7
massgen/tests/memory/test_agent_compression.py +174 -0
massgen/{configs/tools → tests}/memory/test_context_window_management.py +30 -30
massgen/tests/memory/test_force_compression.py +154 -0
massgen/tests/memory/test_simple_compression.py +147 -0
massgen/tests/test_agent_memory.py +534 -0
massgen/tests/test_binary_file_blocking.py +274 -0
massgen/tests/test_case_studies.md +12 -12
massgen/tests/test_conversation_memory.py +382 -0
massgen/tests/test_multimodal_size_limits.py +407 -0
massgen/tests/test_orchestrator_memory.py +620 -0
massgen/tests/test_persistent_memory.py +435 -0
massgen/token_manager/token_manager.py +6 -0
massgen/tool/_manager.py +7 -2
massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
massgen/tool/_multimodal_tools/understand_audio.py +19 -1
massgen/tool/_multimodal_tools/understand_file.py +6 -1
massgen/tool/_multimodal_tools/understand_image.py +112 -8
massgen/tool/_multimodal_tools/understand_video.py +32 -5
massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
massgen/tool/docs/multimodal_tools.md +589 -0
massgen/tools/__init__.py +8 -0
massgen/tools/_planning_mcp_server.py +520 -0
massgen/tools/planning_dataclasses.py +434 -0
{massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/METADATA +142 -82
{massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/RECORD +84 -41
massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +0 -67
massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +0 -68
massgen/configs/tools/memory/README.md +0 -199
massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +0 -131
massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +0 -133
massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +0 -97
{massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/WHEEL +0 -0
{massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/entry_points.txt +0 -0
{massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/licenses/LICENSE +0 -0
{massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/top_level.txt +0 -0

massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml ADDED Viewed

@@ -0,0 +1,61 @@
+# MassGen Configuration: Text to File Generation Tool
+# Usage:
+#   uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml "Please generate a comprehensive business presentation about Artificial Intelligence in Healthcare for our upcoming board meeting. The presentation should include the following slides: 1) Title slide with presentation title and date, 2) Executive Summary highlighting key findings, 3) Market Overview showing the current AI healthcare market size and growth trends, 4) Technology Applications including AI in diagnostics, drug discovery, and patient care, 5) Case Studies showcasing 3-4 successful implementations with metrics, 6) Competitive Landscape analyzing major players and their solutions, 7) Implementation Roadmap with timeline and milestones, 8) ROI Analysis with projected costs and benefits, 9) Risk Assessment and mitigation strategies, 10) Recommendations and next steps. Please make it professional with approximately 15-20 slides, use clear bullet points, include suggested visual elements for each slide, and save it as a PPTX file with a modern business layout."
+agents:
+  - id: "text_to_file_generation_tool1"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace1"
+      enable_file_generation: true
+      custom_tools:
+        - name: ["text_to_file_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
+          function: ["text_to_file_generation"]
+        - name: ["understand_file"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_file.py"
+          function: ["understand_file"]
+    system_message: |
+      You are an AI assistant with access to text-to-file generation capabilities.
+      When generating PPTX presentations, format your content with:
+      - Use "# Title" or "## Title" for slide titles
+      - Use "---" to separate slides
+      - Use "- Item" for bullet points
+      - Use "  - Subitem" for sub-bullets (two spaces indent)
+      - Structure content in a slide-friendly format with clear, concise points
+  - id: "text_to_file_generation_tool2"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace2"
+      enable_file_generation: true
+      custom_tools:
+        - name: ["text_to_file_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
+          function: ["text_to_file_generation"]
+        - name: ["understand_file"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_file.py"
+          function: ["understand_file"]
+    system_message: |
+      You are an AI assistant with access to text-to-file generation capabilities.
+      When generating PPTX presentations, format your content with:
+      - Use "# Title" or "## Title" for slide titles
+      - Use "---" to separate slides
+      - Use "- Item" for bullet points
+      - Use "  - Subitem" for sub-bullets (two spaces indent)
+      - Structure content in a slide-friendly format with clear, concise points
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+ui:
+  display_type: "rich_terminal"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml ADDED Viewed

@@ -0,0 +1,29 @@
+# MassGen Configuration: Text to File Generation Tool
+# Usage:
+#   uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml "Please generate a comprehensive technical report about the latest developments in Large Language Models (LLMs) and Generative AI. The report should include the following sections: 1) Executive Summary, 2) Introduction to LLMs and their architecture, 3) Recent breakthroughs in 2024-2025, 4) Applications in industry including healthcare, finance, and education, 5) Ethical considerations and limitations, 6) Future directions and research opportunities. Please make the report approximately 10-15 pages long with proper citations and references, and save it as a PDF file with a professional layout."
+agents:
+  - id: "text_to_file_generation_tool"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace1"
+      enable_file_generation: true
+      custom_tools:
+        - name: ["text_to_file_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
+          function: ["text_to_file_generation"]
+        - name: ["understand_file"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_file.py"
+          function: ["understand_file"]
+    system_message: |
+      You are an AI assistant with access to text-to-file generation capabilities.
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+ui:
+  display_type: "simple"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml ADDED Viewed

@@ -0,0 +1,51 @@
+# MassGen Configuration: Text to Image Generation Tool
+# Usage:
+#    uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml "Please generate an image of a cat in space."
+agents:
+  - id: "text_to_image_generation_tool1"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace1"
+      enable_image_generation: true
+      custom_tools:
+        - name: ["text_to_image_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
+          function: ["text_to_image_generation"]
+        - name: ["understand_image"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_image.py"
+          function: ["understand_image"]
+        - name: ["image_to_image_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/image_to_image_generation.py"
+          function: ["image_to_image_generation"]
+    system_message: |
+      You are an AI assistant with access to text-to-image generation capabilities.
+  - id: "text_to_image_generation_tool2"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace2"
+      enable_image_generation: true
+      custom_tools:
+        - name: ["text_to_image_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
+          function: ["text_to_image_generation"]
+        - name: ["understand_image"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_image.py"
+          function: ["understand_image"]
+    system_message: |
+      You are an AI assistant with access to text-to-image generation capabilities.
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+ui:
+  display_type: "rich_terminal"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml ADDED Viewed

@@ -0,0 +1,33 @@
+# MassGen Configuration: Text to Image Generation Tool
+# Usage:
+#    uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml "Please generate an image of a cat in space."
+agents:
+  - id: "text_to_image_generation_tool"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace1"
+      enable_image_generation: true
+      custom_tools:
+        - name: ["text_to_image_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
+          function: ["text_to_image_generation"]
+        - name: ["understand_image"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_image.py"
+          function: ["understand_image"]
+        - name: ["image_to_image_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/image_to_image_generation.py"
+          function: ["image_to_image_generation"]
+    system_message: |
+      You are an AI assistant with access to text-to-image generation capabilities.
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+ui:
+  display_type: "simple"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml ADDED Viewed

@@ -0,0 +1,55 @@
+# MassGen Configuration: Text to Speech Continue Generation Tool
+# Usage:
+#    uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml "I want to you tell me a very short introduction about Sherlock Homes in one sentence, and I want you to use emotion voice to read it out loud."
+agents:
+  - id: "text_to_speech_continue_generation_tool1"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace1"
+      enable_audio_generation: true
+      custom_tools:
+        - name: ["text_to_speech_transcription_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py"
+          function: ["text_to_speech_transcription_generation"]
+        - name: ["understand_audio"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_audio.py"
+          function: ["understand_audio"]
+        - name: ["text_to_speech_continue_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py"
+          function: ["text_to_speech_continue_generation"]
+    system_message: |
+      You are an AI assistant with access to text-to-speech generation capabilities.
+  - id: "text_to_speech_continue_generation_tool2"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace2"
+      enable_audio_generation: true
+      custom_tools:
+        - name: ["text_to_speech_transcription_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py"
+          function: ["text_to_speech_transcription_generation"]
+        - name: ["understand_audio"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_audio.py"
+          function: ["understand_audio"]
+        - name: ["text_to_speech_continue_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py"
+          function: ["text_to_speech_continue_generation"]
+    system_message: |
+      You are an AI assistant with access to text-to-speech generation capabilities.
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+ui:
+  display_type: "rich_terminal"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml ADDED Viewed

@@ -0,0 +1,33 @@
+# MassGen Configuration: Text to Speech Continue Generation Tool
+# Usage:
+#    uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml "I want to you tell me a very short introduction about Sherlock Homes in one sentence, and I want you to use emotion voice to read it out loud."
+agents:
+  - id: "text_to_speech_continue_generation_tool"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace1"
+      enable_audio_generation: true
+      custom_tools:
+        - name: ["text_to_speech_transcription_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py"
+          function: ["text_to_speech_transcription_generation"]
+        - name: ["understand_audio"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_audio.py"
+          function: ["understand_audio"]
+        - name: ["text_to_speech_continue_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py"
+          function: ["text_to_speech_continue_generation"]
+    system_message: |
+      You are an AI assistant with access to text-to-speech generation capabilities.
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+ui:
+  display_type: "simple"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml ADDED Viewed

@@ -0,0 +1,47 @@
+# MassGen Configuration: Text to Video Generation Tool
+# Usage:
+#   uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml "Generate a 4 seconds video with neon-lit alley at night, light rain, slow push-in, cinematic."
+agents:
+  - id: "text_to_video_generation_tool1"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace1"
+      enable_video_generation: true
+      custom_tools:
+        - name: ["understand_video"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_video.py"
+          function: ["understand_video"]
+        - name: ["text_to_video_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_video_generation.py"
+          function: ["text_to_video_generation"]
+    system_message: |
+      You are an AI assistant with access to text-to-video generation capabilities.
+  - id: "text_to_video_generation_tool2"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace2"
+      enable_video_generation: true
+      custom_tools:
+        - name: ["understand_video"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_video.py"
+          function: ["understand_video"]
+        - name: ["text_to_video_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_video_generation.py"
+          function: ["text_to_video_generation"]
+    system_message: |
+      You are an AI assistant with access to text-to-video generation capabilities.
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+ui:
+  display_type: "rich_terminal"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml ADDED Viewed

@@ -0,0 +1,29 @@
+# MassGen Configuration: Text to Video Generation Tool
+# Usage:
+#   uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml "Generate a 4 seconds video with neon-lit alley at night, light rain, slow push-in, cinematic."
+agents:
+  - id: "text_to_video_generation_tool"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace1"
+      enable_video_generation: true
+      custom_tools:
+        - name: ["understand_video"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_video.py"
+          function: ["understand_video"]
+        - name: ["text_to_video_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_video_generation.py"
+          function: ["text_to_video_generation"]
+    system_message: |
+      You are an AI assistant with access to text-to-video generation capabilities.
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+ui:
+  display_type: "simple"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 # MassGen Configuration: Understand Audio Tool
 # Usage:
-#   massgen --config massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml "Please summarize the content in this audio."
+#   uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml "Please summarize the content in this audio."
 agents:
   - id: "understand_audio_tool"
     backend:

massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 # MassGen Configuration: Understand File Tool
 # Usage:
-#   massgen --config massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml "Please summarize the content in this file."
+#   uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml "Please summarize the content in this file."
 agents:
   - id: "understand_file_tool"
     backend:

massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 # MassGen Configuration: Understand Image Tool
 # Usage:
-#   massgen --config massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml "Please summarize the content in this image."
+#   uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml "Please summarize the content in this image."
 agents:
   - id: "understand_image_tool"
     backend:

massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 # MassGen Configuration: Understand Video Tool
 # Usage:
-#   massgen --config massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml "What's happening in this video?"
+#   uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml "What's happening in this video?"
 agents:
   - id: "understand_video_tool"
     backend:

massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml CHANGED Viewed

@@ -51,7 +51,7 @@ orchestrator:
   snapshot_storage: "snapshots"
   agent_temporary_workspace: "temp_workspaces"
   context_paths:
-    - path: "docs/case_studies"
+    - path: "docs/source/examples/case_studies"
       permission: "read"
 ui:

massgen/filesystem_manager/_filesystem_manager.py CHANGED Viewed

@@ -57,6 +57,7 @@ class FilesystemManager:
         command_line_docker_network_mode: str = "none",
         command_line_docker_enable_sudo: bool = False,
         enable_audio_generation: bool = False,
+        enable_file_generation: bool = False,
     ):
         """
         Initialize FilesystemManager.

massgen/filesystem_manager/_path_permission_manager.py CHANGED Viewed

@@ -90,6 +90,68 @@ class PathPermissionManager:
         "massgen_logs",
     ]
+    # Binary file extensions that should not be read by text-based tools
+    # These files should be handled by specialized tools (understand_image, understand_video, etc.)
+    BINARY_FILE_EXTENSIONS = {
+        # Images
+        ".jpg",
+        ".jpeg",
+        ".png",
+        ".gif",
+        ".bmp",
+        ".ico",
+        ".svg",
+        ".webp",
+        ".tiff",
+        ".tif",
+        # Videos
+        ".mp4",
+        ".avi",
+        ".mov",
+        ".mkv",
+        ".flv",
+        ".wmv",
+        ".webm",
+        ".m4v",
+        ".mpg",
+        ".mpeg",
+        # Audio
+        ".mp3",
+        ".wav",
+        ".ogg",
+        ".flac",
+        ".aac",
+        ".m4a",
+        ".wma",
+        # Archives
+        ".zip",
+        ".tar",
+        ".gz",
+        ".bz2",
+        ".7z",
+        ".rar",
+        ".xz",
+        # Executables and binaries
+        ".exe",
+        ".bin",
+        ".dll",
+        ".so",
+        ".dylib",
+        ".o",
+        ".a",
+        ".pyc",
+        ".class",
+        ".jar",
+        # Office documents (binary formats - use understand_file tool)
+        ".doc",  # Old Word (not supported by understand_file)
+        ".xls",  # Old Excel (not supported by understand_file)
+        ".ppt",  # Old PowerPoint (not supported by understand_file)
+        ".pdf",  # PDF (supported by understand_file with PyPDF2)
+        ".docx",  # Word (supported by understand_file with python-docx)
+        ".xlsx",  # Excel (supported by understand_file with openpyxl)
+        ".pptx",  # PowerPoint (supported by understand_file with python-pptx)
+    }
     def __init__(
         self,
         context_write_access_enabled: bool = False,
@@ -440,6 +502,12 @@ class PathPermissionManager:
             - allowed: Whether the tool call should proceed
             - reason: Explanation if blocked (None if allowed)
         """
+        # Check if read tool is trying to read binary files (images, videos, etc.)
+        if self._is_text_read_tool(tool_name):
+            binary_check_result = self._validate_binary_file_access(tool_name, tool_args)
+            if not binary_check_result[0]:
+                return binary_check_result
         # Track read operations for read-before-delete enforcement
         if self._is_read_tool(tool_name):
             self._track_read_operation(tool_name, tool_args)
@@ -495,6 +563,33 @@ class PathPermissionManager:
         return False
+    def _is_text_read_tool(self, tool_name: str) -> bool:
+        """
+        Check if a tool is a text-based read operation that should not access binary files.
+        These tools are designed for reading text files and should be blocked from
+        reading binary files (images, videos, audio, etc.) to prevent context pollution.
+        Tools that read text file contents:
+        - Read: Claude Code read tool
+        - read_text_file: MCP filesystem read tool
+        - read_file: Generic read operations
+        """
+        # Use lowercase for case-insensitive matching
+        tool_lower = tool_name.lower()
+        # Check if tool name contains any text read operation keywords
+        text_read_keywords = [
+            "read_text_file",  # MCP filesystem: read_text_file
+            "read_file",  # Generic read operations
+        ]
+        # Also check for exact "Read" match (Claude Code tool)
+        if tool_name == "Read":
+            return True
+        return any(keyword in tool_lower for keyword in text_read_keywords)
     def _is_read_tool(self, tool_name: str) -> bool:
         """
         Check if a tool is a read operation that should be tracked.
@@ -518,6 +613,59 @@ class PathPermissionManager:
         return any(keyword in tool_lower for keyword in read_keywords)
+    def _validate_binary_file_access(self, tool_name: str, tool_args: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
+        """
+        Validate that text-based read tools are not trying to read binary files.
+        Binary files (images, videos, audio, etc.) should be handled by specialized tools
+        to prevent context pollution with binary data.
+        Args:
+            tool_name: Name of the tool being called
+            tool_args: Arguments passed to the tool
+        Returns:
+            Tuple of (allowed: bool, reason: Optional[str])
+            - allowed: False if trying to read binary file, True otherwise
+            - reason: Explanation if blocked (None if allowed)
+        """
+        # Extract file path from arguments
+        file_path = self._extract_file_path(tool_args)
+        if not file_path:
+            # Can't determine path - allow (tool may not access files)
+            return (True, None)
+        # Resolve path
+        try:
+            file_path_str = self._resolve_path_against_workspace(file_path)
+            path = Path(file_path_str)
+        except Exception:
+            # If path resolution fails, allow (will fail elsewhere if invalid)
+            return (True, None)
+        # Check file extension
+        file_extension = path.suffix.lower()
+        if file_extension in self.BINARY_FILE_EXTENSIONS:
+            # Determine appropriate tool suggestion based on file type
+            if file_extension in {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".ico", ".svg", ".webp", ".tiff", ".tif"}:
+                suggestion = "For images, use understand_image tool"
+            elif file_extension in {".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".m4v", ".mpg", ".mpeg"}:
+                suggestion = "For videos, use understand_video tool"
+            elif file_extension in {".mp3", ".wav", ".ogg", ".flac", ".aac", ".m4a", ".wma"}:
+                suggestion = "For audio files, use generate_text_with_input_audio tool"
+            elif file_extension in {".pdf"}:
+                suggestion = "For PDF files, use understand_file tool"
+            elif file_extension in {".docx", ".xlsx", ".pptx"}:
+                suggestion = "For Office documents, use understand_file tool"
+            else:
+                suggestion = "Use appropriate specialized tool for this file type"
+            reason = f"Cannot read binary file '{path.name}' with {tool_name}. {suggestion}."
+            logger.warning(f"[PathPermissionManager] Blocked {tool_name} from reading binary file: {path}")
+            return (False, reason)
+        return (True, None)
     def _is_delete_tool(self, tool_name: str) -> bool:
         """
         Check if a tool is a delete operation.

massgen 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

Potentially problematic release.

massgen 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl