PyPI - massgen - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

massgen 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of massgen might be problematic. Click here for more details.

Files changed (58) hide show

massgen/__init__.py CHANGED Viewed

@@ -68,7 +68,7 @@ from .chat_agent import (
 from .message_templates import MessageTemplates, get_templates
 from .orchestrator import Orchestrator, create_orchestrator
-__version__ = "0.1.3"
+__version__ = "0.1.4"
 __author__ = "MassGen Contributors"

massgen/api_params_handler/_chat_completions_api_params_handler.py CHANGED Viewed

@@ -24,6 +24,10 @@ class ChatCompletionsAPIParamsHandler(APIParamsHandlerBase):
                 "allowed_tools",
                 "exclude_tools",
                 "custom_tools",  # Custom tools configuration (processed separately)
+                "enable_file_generation",  # Internal flag for file generation (used in system messages only)
+                "enable_image_generation",  # Internal flag for image generation (used in system messages only)
+                "enable_audio_generation",  # Internal flag for audio generation (used in system messages only)
+                "enable_video_generation",  # Internal flag for video generation (used in system messages only)
             },
         )

massgen/api_params_handler/_claude_api_params_handler.py CHANGED Viewed

@@ -24,6 +24,10 @@ class ClaudeAPIParamsHandler(APIParamsHandlerBase):
                 "exclude_tools",
                 "custom_tools",  # Custom tools configuration (processed separately)
                 "_has_files_api_files",
+                "enable_file_generation",  # Internal flag for file generation (used in system messages only)
+                "enable_image_generation",  # Internal flag for image generation (used in system messages only)
+                "enable_audio_generation",  # Internal flag for audio generation (used in system messages only)
+                "enable_video_generation",  # Internal flag for video generation (used in system messages only)
             },
         )

massgen/api_params_handler/_gemini_api_params_handler.py CHANGED Viewed

@@ -19,6 +19,10 @@ class GeminiAPIParamsHandler(APIParamsHandlerBase):
             "allowed_tools",
             "exclude_tools",
             "custom_tools",
+            "enable_file_generation",  # Internal flag for file generation (used in system messages only)
+            "enable_image_generation",  # Internal flag for image generation (used in system messages only)
+            "enable_audio_generation",  # Internal flag for audio generation (used in system messages only)
+            "enable_video_generation",  # Internal flag for video generation (used in system messages only)
         }
         return set(base) | extra

massgen/api_params_handler/_response_api_params_handler.py CHANGED Viewed

@@ -24,6 +24,10 @@ class ResponseAPIParamsHandler(APIParamsHandlerBase):
                 "exclude_tools",
                 "custom_tools",  # Custom tools configuration (processed separately)
                 "_has_file_search_files",  # Internal flag for file search tracking
+                "enable_file_generation",  # Internal flag for file generation (used in system messages only)
+                "enable_image_generation",  # Internal flag for image generation (used in system messages only)
+                "enable_audio_generation",  # Internal flag for audio generation (used in system messages only)
+                "enable_video_generation",  # Internal flag for video generation (used in system messages only)
             },
         )

massgen/backend/base_with_custom_tool_and_mcp.py CHANGED Viewed

@@ -284,9 +284,19 @@ class CustomToolAndMCPBackend(LLMBackend):
                     # Register each function with its corresponding values
                     for i, func in enumerate(functions):
+                        # Inject agent_cwd into preset_args if filesystem_manager is available
+                        final_preset_args = preset_args_list[i].copy() if preset_args_list[i] else {}
+                        if self.filesystem_manager and self.filesystem_manager.cwd:
+                            final_preset_args["agent_cwd"] = self.filesystem_manager.cwd
+                            logger.info(f"Injecting agent_cwd for {func}: {self.filesystem_manager.cwd}")
+                        elif self.filesystem_manager:
+                            logger.warning(f"filesystem_manager exists but cwd is None for {func}")
+                        else:
+                            logger.warning(f"No filesystem_manager available for {func}")
                         # Load the function first if custom name is needed
                         if names[i] and names[i] != func:
-                            # Need to load function and apply custom name
+                            # Load function to apply custom name
                             if path:
                                 loaded_func = self.custom_tool_manager._load_function_from_path(path, func)
                             else:
@@ -296,7 +306,6 @@ class CustomToolAndMCPBackend(LLMBackend):
                                 logger.error(f"Could not load function '{func}' from path: {path}")
                                 continue
-                            # Apply custom name by modifying __name__ attribute
                             loaded_func.__name__ = names[i]
                             # Register with loaded function (no path needed)
@@ -304,7 +313,7 @@ class CustomToolAndMCPBackend(LLMBackend):
                                 path=None,
                                 func=loaded_func,
                                 category=category,
-                                preset_args=preset_args_list[i],
+                                preset_args=final_preset_args,
                                 description=descriptions[i],
                             )
                         else:
@@ -313,7 +322,7 @@ class CustomToolAndMCPBackend(LLMBackend):
                                 path=path,
                                 func=func,
                                 category=category,
-                                preset_args=preset_args_list[i],
+                                preset_args=final_preset_args,
                                 description=descriptions[i],
                             )
@@ -404,9 +413,19 @@ class CustomToolAndMCPBackend(LLMBackend):
         """
         import json
+        # Parse arguments
+        arguments = json.loads(call["arguments"]) if isinstance(call["arguments"], str) else call["arguments"]
+        # Ensure agent_cwd is always injected if filesystem_manager is available
+        # This provides a fallback in case preset_args didn't work during registration
+        if self.filesystem_manager and self.filesystem_manager.cwd:
+            if "agent_cwd" not in arguments or arguments.get("agent_cwd") is None:
+                arguments["agent_cwd"] = self.filesystem_manager.cwd
+                logger.info(f"Dynamically injected agent_cwd at execution time: {self.filesystem_manager.cwd}")
         tool_request = {
             "name": call["name"],
-            "input": json.loads(call["arguments"]) if isinstance(call["arguments"], str) else call["arguments"],
+            "input": arguments,
         }
         result_text = ""
@@ -1120,6 +1139,7 @@ class CustomToolAndMCPBackend(LLMBackend):
         **kwargs,
     ) -> AsyncGenerator[StreamChunk, None]:
         """Simple passthrough streaming without MCP processing."""
         agent_id = kwargs.get("agent_id", None)
         all_params = {**self.config, **kwargs}
         processed_messages = await self._process_upload_files(messages, all_params)

massgen/backend/docs/permissions_and_context_files.md CHANGED Viewed

@@ -1067,8 +1067,8 @@ Files delivered:
 - **Multi-Turn Design**: `docs/dev_notes/multi_turn_filesystem_design.md` - Detailed architecture for session persistence and turn-based workflows
 - **MCP Integration**: `docs/dev_notes/gemini_filesystem_mcp_design.md` - How filesystem access works through Model Context Protocol
 - **Context Sharing**: `docs/dev_notes/v0.0.14-context.md` - Original context sharing design
-- **User Context Paths**: `docs/case_studies/user-context-path-support-with-copy-mcp.md` - Case study on adding user-specified paths
-- **Claude Code Workspace**: `docs/case_studies/claude-code-workspace-management.md` - Native filesystem integration patterns
+- **User Context Paths**: `docs/source/examples/case_studies/user-context-path-support-with-copy-mcp.md` - Case study on adding user-specified paths
+- **Claude Code Workspace**: `docs/source/examples/case_studies/claude-code-workspace-management.md` - Native filesystem integration patterns
 ## Conclusion

massgen/backend/response.py CHANGED Viewed

@@ -57,6 +57,7 @@ class ResponseBackend(CustomToolAndMCPBackend):
         Wraps parent implementation to ensure File Search cleanup happens after streaming completes.
         """
         try:
             async for chunk in super().stream_with_tools(messages, tools, **kwargs):
                 yield chunk
@@ -145,6 +146,7 @@ class ResponseBackend(CustomToolAndMCPBackend):
         **kwargs,
     ) -> AsyncGenerator[StreamChunk, None]:
         """Recursively stream MCP responses, executing function calls as needed."""
         agent_id = kwargs.get("agent_id")
         # Build API params for this iteration

massgen/configs/README.md CHANGED Viewed

@@ -227,53 +227,62 @@ Most configurations use environment variables for API keys:so
 ## Release History & Examples
-### v0.1.3 - Latest
-**New Features:** Post-Evaluation Workflow, Custom Multimodal Understanding Tools, Docker Sudo Mode
+### v0.1.4 - Latest
+**New Features:** Multimodal Generation Tools, Binary File Protection, Crawl4AI Integration
 **Configuration Files:**
-- `configs/tools/custom_tools/multimodal_tools/understand_image.yaml` - Image analysis configuration
-- `configs/tools/custom_tools/multimodal_tools/understand_audio.yaml` - Audio transcription configuration
-- `configs/tools/custom_tools/multimodal_tools/understand_video.yaml` - Video analysis configuration
-- `configs/tools/custom_tools/multimodal_tools/understand_file.yaml` - Document processing configuration
+- `text_to_image_generation_single.yaml` / `text_to_image_generation_multi.yaml` - Image generation
+- `text_to_video_generation_single.yaml` / `text_to_video_generation_multi.yaml` - Video generation
+- `text_to_speech_generation_single.yaml` / `text_to_speech_generation_multi.yaml` - Audio generation
+- `text_to_file_generation_single.yaml` / `text_to_file_generation_multi.yaml` - Document generation
+- `crawl4ai_example.yaml` - Web scraping configuration
 **Documentation:**
-- `massgen/tool/docs/multimodal_tools.md` - Complete 779-line multimodal tools guide
-- `docs/source/user_guide/multimodal.rst` - Updated multimodal documentation with custom tools
-- `docs/source/user_guide/code_execution.rst` - Enhanced with 98 lines documenting sudo mode
-- `massgen/docker/README.md` - Updated Docker documentation with sudo mode instructions
-**Case Study:**
-- [Multimodal Video Understanding](../../docs/case_studies/multimodal-case-study-video-analysis.md)
-**Example Resources:**
-- `configs/resources/v0.1.3-example/multimodality.jpg` - Image example
-- `configs/resources/v0.1.3-example/Sherlock_Holmes.mp3` - Audio example
-- `configs/resources/v0.1.3-example/oppenheimer_trailer_1920.mp4` - Video example
-- `configs/resources/v0.1.3-example/TUMIX.pdf` - PDF document example
+- `README_PYPI.md` - Standalone PyPI package documentation
+- `docs/dev_notes/release_checklist.md` - Release workflow guide
+- `docs/source/user_guide/protected_paths.rst` - Binary file protection documentation
+- `.github/workflows/docs-automation.yml` - Documentation CI/CD automation
 **Key Features:**
-- **Post-Evaluation Tools**: Submit and restart capabilities for winning agents with confidence assessments
-- **Multimodal Understanding**: Analyze images, audio, video, and documents using GPT-4.1
-- **Docker Sudo Mode**: Execute privileged commands in containerized environments
-- **Config Builder**: Improved workflow with auto-detection and better provider handling
+- **Generation Tools**: Create images, videos, audio, and documents using OpenAI APIs
+- **Binary File Protection**: Automatic blocking prevents text tools from reading 40+ binary file types
+- **Web Scraping**: Crawl4AI integration for intelligent content extraction
+- **Enhanced Security**: Smart tool suggestions guide users to appropriate specialized tools
 **Try it:**
 ```bash
 # Install or upgrade
 pip install --upgrade massgen
+# Generate an image from text
+massgen --config @examples/tools/custom_tools/multimodal_tools/text_to_image_generation_single \
+  "Please generate an image of a cat in space."
+# Generate a video from text
+massgen --config @examples/tools/custom_tools/multimodal_tools/text_to_video_generation_single \
+  "Generate a 4 seconds video with neon-lit alley at night, light rain, slow push-in, cinematic."
+# Generate documents (PDF, DOCX, etc.)
+massgen --config @examples/tools/custom_tools/multimodal_tools/text_to_file_generation_single \
+  "Please generate a comprehensive technical report about the latest developments in Large Language Models (LLMs)."
+```
+### v0.1.3
+**New Features:** Post-Evaluation Workflow, Custom Multimodal Understanding Tools, Docker Sudo Mode
+**Configuration Files:**
+- `understand_image.yaml`, `understand_audio.yaml`, `understand_video.yaml`, `understand_file.yaml`
+**Key Features:**
+- **Post-Evaluation Tools**: Submit and restart capabilities for winning agents
+- **Multimodal Understanding**: Analyze images, audio, video, and documents
+- **Docker Sudo Mode**: Execute privileged commands in containers
+**Try it:**
+```bash
 # Try multimodal image understanding
-# (Requires OPENAI_API_KEY in .env)
 massgen --config @examples/tools/custom_tools/multimodal_tools/understand_image \
   "Please summarize the content in this image."
-# Try multimodal audio understanding
-massgen --config @examples/tools/custom_tools/multimodal_tools/understand_audio \
-  "Please summarize the content in this audio."
-# Try multimodal video understanding
-massgen --config @examples/tools/custom_tools/multimodal_tools/understand_video \
-  "What's happening in this video?"
 ```
 ### v0.1.2
@@ -284,7 +293,7 @@ massgen --config @examples/tools/custom_tools/multimodal_tools/understand_video
 - `configs/basic/multi/three_agents_default.yaml` - Updated with Grok-4-fast model
 **Documentation:**
-- `docs/case_studies/INTELLIGENT_PLANNING_MODE.md` - Complete intelligent planning mode guide
+- `docs/dev_notes/intelligent_planning_mode.md` - Complete intelligent planning mode guide
 **Key Features:**
 - **Intelligent Planning Mode**: Automatic analysis of question irreversibility for dynamic MCP tool blocking
@@ -392,7 +401,7 @@ massgen --config @examples/tools/code-execution/docker_with_resource_limits \
 - `massgen/configs/basic/single/single_gpt4o_video_generation.yaml` - Video generation with OpenAI Sora-2
 **Case Study:**
-- [Universal Code Execution via MCP](../../docs/case_studies/universal-code-execution-mcp.md)
+- [Universal Code Execution via MCP](../../docs/source/examples/case_studies/universal-code-execution-mcp.md)
 **Key Features:**
 - Universal `execute_command` tool works across Claude, Gemini, OpenAI (Response API), and Chat Completions providers (Grok, ZAI, etc.)
@@ -465,7 +474,7 @@ massgen --config @examples/tools/filesystem/cc_gpt5_gemini_filesystem \
 - New `FileOperationTracker` class for read-before-delete enforcement
 - Enhanced PathPermissionManager with operation tracking methods
-**Case Study:** [MCP Planning Mode](../../docs/case_studies/mcp-planning-mode.md)
+**Case Study:** [MCP Planning Mode](../../docs/source/examples/case_studies/mcp-planning-mode.md)
 **Try it:**
 ```bash
@@ -492,7 +501,7 @@ massgen --config @examples/tools/planning/five_agents_twitter_mcp_planning_mode
 - New `ExternalAgentBackend` class bridging MassGen with external frameworks
 - Multiple code executor types: LocalCommandLineCodeExecutor, DockerCommandLineCodeExecutor, JupyterCodeExecutor, YepCodeCodeExecutor
-**Case Study:** [AG2 Framework Integration](../../docs/case_studies/ag2-framework-integration.md)
+**Case Study:** [AG2 Framework Integration](../../docs/source/examples/case_studies/ag2-framework-integration.md)
 **Try it:**
 ```bash
@@ -561,7 +570,7 @@ massgen --config @examples/tools/filesystem/gemini_gpt5nano_file_context_path \
 - Automatic `.massgen` directory management for persistent conversation context
 - Enhanced path permissions with `will_be_writable` flag and smart exclusion patterns
-**Case Study:** [Multi-Turn Filesystem Support](../../docs/case_studies/multi-turn-filesystem-support.md)
+**Case Study:** [Multi-Turn Filesystem Support](../../docs/source/examples/case_studies/multi-turn-filesystem-support.md)
 ```bash
 # Turn 1 - Initial creation
 Turn 1: Make a website about Bob Dylan
@@ -599,7 +608,7 @@ massgen --config @examples/basic/multi/two_qwen_vllm \
 - All configs now organized by provider & use case (basic/, providers/, tools/, teams/)
 - Use same configs as v0.0.21 for compatibility, but now with improved performance
-**Case Study:** [Advanced Filesystem with User Context Path Support](../../docs/case_studies/v0.0.21-v0.0.22-filesystem-permissions.md)
+**Case Study:** [Advanced Filesystem with User Context Path Support](../../docs/source/examples/case_studies/v0.0.21-v0.0.22-filesystem-permissions.md)
 ```bash
 # Multi-agent collaboration with granular filesystem permissions
 massgen --config @examples/tools/filesystem/gpt5mini_cc_fs_context_path "Enhance the website in massgen/configs/resources with: 1) A dark/light theme toggle with smooth transitions, 2) An interactive feature that helps users engage with the blog content (your choice - could be search, filtering by topic, reading time estimates, social sharing, reactions, etc.), and 3) Visual polish with CSS animations or transitions that make the site feel more modern and responsive. Use vanilla JavaScript and be creative with the implementation details."
@@ -645,7 +654,7 @@ massgen --config @examples/tools/mcp/gpt5_nano_mcp_example \
 ### v0.0.16
 **New Features:** Unified Filesystem Support with MCP Integration
-**Case Study:** [Cross-Backend Collaboration with Gemini MCP Filesystem](../../docs/case_studies/unified-filesystem-mcp-integration.md)
+**Case Study:** [Cross-Backend Collaboration with Gemini MCP Filesystem](../../docs/source/examples/case_studies/unified-filesystem-mcp-integration.md)
 ```bash
 # Gemini and Claude Code agents with unified filesystem via MCP
 massgen --config @examples/tools/mcp/gemini_mcp_filesystem_test_with_claude_code "Create a presentation that teaches a reinforcement learning algorithm and output it in LaTeX Beamer format. No figures should be added."
@@ -658,7 +667,7 @@ massgen --config @examples/tools/mcp/gemini_mcp_filesystem_test_with_claude_code
 ### v0.0.12 - v0.0.14
 **New Features:** Enhanced Logging and Workspace Management
-**Case Study:** [Claude Code Workspace Management with Comprehensive Logging](../../docs/case_studies/claude-code-workspace-management.md)
+**Case Study:** [Claude Code Workspace Management with Comprehensive Logging](../../docs/source/examples/case_studies/claude-code-workspace-management.md)
 ```bash
 # Multi-agent Claude Code collaboration with enhanced workspace isolation
 massgen --config @examples/tools/filesystem/claude_code_context_sharing "Create a website about a diverse set of fun facts about LLMs, placing the output in one index.html file"

massgen/configs/tools/custom_tools/crawl4ai_example.yaml ADDED Viewed

@@ -0,0 +1,55 @@
+# MassGen Configuration: Crawl4AI Web Scraping via Custom Tools
+#
+# Prerequisites:
+#   1. Start crawl4ai Docker container (one-time setup):
+#      docker pull unclecode/crawl4ai:latest
+#      docker run -d -p 11235:11235 --name crawl4ai --shm-size=1g unclecode/crawl4ai:latest
+#
+#   2. Verify container is running:
+#      docker ps | grep crawl4ai
+#
+#   3. Test REST API endpoint (optional):
+#      curl -X POST http://localhost:11235/md -H "Content-Type: application/json" -d '{"url": "https://example.com", "f": "fit"}'
+#
+# Usage:
+#   massgen --config @examples/configs/tools/custom_tools/crawl4ai_example.yaml "Please search for the MassGen docs, take a screenshot of the website, and explain that screenshot"
+#
+# Available Tools (via Custom Tools):
+#   - crawl4ai_md: Generate markdown from web content
+#   - crawl4ai_html: Extract preprocessed HTML
+#   - crawl4ai_screenshot: Capture webpage screenshots
+#   - crawl4ai_pdf: Generate PDF documents
+#   - crawl4ai_execute_js: Run JavaScript on web pages
+#   - crawl4ai_crawl: Perform multi-URL crawling
+#   - crawl4ai_ask: Query the Crawl4AI library context
+#
+# Note: Multiple agents can use these tools concurrently.
+#       The server handles up to 5 concurrent crawls by default.
+agents:
+  - id: "web_scraper_agent"
+    backend:
+      type: "openai"  # Works with any backend: openai, gemini, claude_code, etc.
+      model: "gpt-5-mini"
+      cwd: "workspace1"
+      # Register crawl4ai custom tools
+      custom_tools:
+        - name: ["crawl4ai_md", "crawl4ai_html", "crawl4ai_screenshot", "crawl4ai_pdf", "crawl4ai_execute_js", "crawl4ai_crawl"]
+          category: "web_scraping"
+          path: "massgen/tool/_web_tools/crawl4ai_tool.py"
+          function: ["crawl4ai_md", "crawl4ai_html", "crawl4ai_screenshot", "crawl4ai_pdf", "crawl4ai_execute_js", "crawl4ai_crawl"]
+        - name: ["understand_image"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_image.py"
+          function: ["understand_image"]
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+  coordination:
+    max_orchestration_restarts: 2  # Default: 0 (allows 3 total attempts: initial + 2 restarts)
+ui:
+  display_type: "rich_terminal"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml ADDED Viewed

@@ -0,0 +1,61 @@
+# MassGen Configuration: Text to File Generation Tool
+# Usage:
+#   uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml "Please generate a comprehensive business presentation about Artificial Intelligence in Healthcare for our upcoming board meeting. The presentation should include the following slides: 1) Title slide with presentation title and date, 2) Executive Summary highlighting key findings, 3) Market Overview showing the current AI healthcare market size and growth trends, 4) Technology Applications including AI in diagnostics, drug discovery, and patient care, 5) Case Studies showcasing 3-4 successful implementations with metrics, 6) Competitive Landscape analyzing major players and their solutions, 7) Implementation Roadmap with timeline and milestones, 8) ROI Analysis with projected costs and benefits, 9) Risk Assessment and mitigation strategies, 10) Recommendations and next steps. Please make it professional with approximately 15-20 slides, use clear bullet points, include suggested visual elements for each slide, and save it as a PPTX file with a modern business layout."
+agents:
+  - id: "text_to_file_generation_tool1"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace1"
+      enable_file_generation: true
+      custom_tools:
+        - name: ["text_to_file_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
+          function: ["text_to_file_generation"]
+        - name: ["understand_file"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_file.py"
+          function: ["understand_file"]
+    system_message: |
+      You are an AI assistant with access to text-to-file generation capabilities.
+      When generating PPTX presentations, format your content with:
+      - Use "# Title" or "## Title" for slide titles
+      - Use "---" to separate slides
+      - Use "- Item" for bullet points
+      - Use "  - Subitem" for sub-bullets (two spaces indent)
+      - Structure content in a slide-friendly format with clear, concise points
+  - id: "text_to_file_generation_tool2"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace2"
+      enable_file_generation: true
+      custom_tools:
+        - name: ["text_to_file_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
+          function: ["text_to_file_generation"]
+        - name: ["understand_file"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_file.py"
+          function: ["understand_file"]
+    system_message: |
+      You are an AI assistant with access to text-to-file generation capabilities.
+      When generating PPTX presentations, format your content with:
+      - Use "# Title" or "## Title" for slide titles
+      - Use "---" to separate slides
+      - Use "- Item" for bullet points
+      - Use "  - Subitem" for sub-bullets (two spaces indent)
+      - Structure content in a slide-friendly format with clear, concise points
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+ui:
+  display_type: "rich_terminal"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml ADDED Viewed

@@ -0,0 +1,29 @@
+# MassGen Configuration: Text to File Generation Tool
+# Usage:
+#   uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml "Please generate a comprehensive technical report about the latest developments in Large Language Models (LLMs) and Generative AI. The report should include the following sections: 1) Executive Summary, 2) Introduction to LLMs and their architecture, 3) Recent breakthroughs in 2024-2025, 4) Applications in industry including healthcare, finance, and education, 5) Ethical considerations and limitations, 6) Future directions and research opportunities. Please make the report approximately 10-15 pages long with proper citations and references, and save it as a PDF file with a professional layout."
+agents:
+  - id: "text_to_file_generation_tool"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace1"
+      enable_file_generation: true
+      custom_tools:
+        - name: ["text_to_file_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
+          function: ["text_to_file_generation"]
+        - name: ["understand_file"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_file.py"
+          function: ["understand_file"]
+    system_message: |
+      You are an AI assistant with access to text-to-file generation capabilities.
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+ui:
+  display_type: "simple"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml ADDED Viewed

@@ -0,0 +1,51 @@
+# MassGen Configuration: Text to Image Generation Tool
+# Usage:
+#    uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml "Please generate an image of a cat in space."
+agents:
+  - id: "text_to_image_generation_tool1"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace1"
+      enable_image_generation: true
+      custom_tools:
+        - name: ["text_to_image_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
+          function: ["text_to_image_generation"]
+        - name: ["understand_image"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_image.py"
+          function: ["understand_image"]
+        - name: ["image_to_image_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/image_to_image_generation.py"
+          function: ["image_to_image_generation"]
+    system_message: |
+      You are an AI assistant with access to text-to-image generation capabilities.
+  - id: "text_to_image_generation_tool2"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace2"
+      enable_image_generation: true
+      custom_tools:
+        - name: ["text_to_image_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
+          function: ["text_to_image_generation"]
+        - name: ["understand_image"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_image.py"
+          function: ["understand_image"]
+    system_message: |
+      You are an AI assistant with access to text-to-image generation capabilities.
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+ui:
+  display_type: "rich_terminal"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml ADDED Viewed

@@ -0,0 +1,33 @@
+# MassGen Configuration: Text to Image Generation Tool
+# Usage:
+#    uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml "Please generate an image of a cat in space."
+agents:
+  - id: "text_to_image_generation_tool"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace1"
+      enable_image_generation: true
+      custom_tools:
+        - name: ["text_to_image_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
+          function: ["text_to_image_generation"]
+        - name: ["understand_image"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_image.py"
+          function: ["understand_image"]
+        - name: ["image_to_image_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/image_to_image_generation.py"
+          function: ["image_to_image_generation"]
+    system_message: |
+      You are an AI assistant with access to text-to-image generation capabilities.
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+ui:
+  display_type: "simple"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml ADDED Viewed

@@ -0,0 +1,55 @@
+# MassGen Configuration: Text to Speech Continue Generation Tool
+# Usage:
+#    uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml "I want to you tell me a very short introduction about Sherlock Homes in one sentence, and I want you to use emotion voice to read it out loud."
+agents:
+  - id: "text_to_speech_continue_generation_tool1"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace1"
+      enable_audio_generation: true
+      custom_tools:
+        - name: ["text_to_speech_transcription_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py"
+          function: ["text_to_speech_transcription_generation"]
+        - name: ["understand_audio"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_audio.py"
+          function: ["understand_audio"]
+        - name: ["text_to_speech_continue_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py"
+          function: ["text_to_speech_continue_generation"]
+    system_message: |
+      You are an AI assistant with access to text-to-speech generation capabilities.
+  - id: "text_to_speech_continue_generation_tool2"
+    backend:
+      type: "openai"
+      model: "gpt-4o"
+      cwd: "workspace2"
+      enable_audio_generation: true
+      custom_tools:
+        - name: ["text_to_speech_transcription_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py"
+          function: ["text_to_speech_transcription_generation"]
+        - name: ["understand_audio"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_audio.py"
+          function: ["understand_audio"]
+        - name: ["text_to_speech_continue_generation"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py"
+          function: ["text_to_speech_continue_generation"]
+    system_message: |
+      You are an AI assistant with access to text-to-speech generation capabilities.
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+ui:
+  display_type: "rich_terminal"
+  logging_enabled: true

massgen 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

Potentially problematic release.

massgen 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl