PyPI - massgen - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

massgen 0.1.4py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of massgen might be problematic. Click here for more details.

Files changed (46) hide show

massgen/__init__.py +1 -1
massgen/chat_agent.py +340 -20
massgen/cli.py +326 -19
massgen/configs/README.md +52 -10
massgen/configs/memory/gpt5mini_gemini_baseline_research_to_implementation.yaml +94 -0
massgen/configs/memory/gpt5mini_gemini_context_window_management.yaml +187 -0
massgen/configs/memory/gpt5mini_gemini_research_to_implementation.yaml +127 -0
massgen/configs/memory/gpt5mini_high_reasoning_gemini.yaml +107 -0
massgen/configs/memory/single_agent_compression_test.yaml +64 -0
massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +98 -0
massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +54 -0
massgen/memory/README.md +277 -0
massgen/memory/__init__.py +26 -0
massgen/memory/_base.py +193 -0
massgen/memory/_compression.py +237 -0
massgen/memory/_context_monitor.py +211 -0
massgen/memory/_conversation.py +255 -0
massgen/memory/_fact_extraction_prompts.py +333 -0
massgen/memory/_mem0_adapters.py +257 -0
massgen/memory/_persistent.py +687 -0
massgen/memory/docker-compose.qdrant.yml +36 -0
massgen/memory/docs/DESIGN.md +388 -0
massgen/memory/docs/QUICKSTART.md +409 -0
massgen/memory/docs/SUMMARY.md +319 -0
massgen/memory/docs/agent_use_memory.md +408 -0
massgen/memory/docs/orchestrator_use_memory.md +586 -0
massgen/memory/examples.py +237 -0
massgen/orchestrator.py +207 -7
massgen/tests/memory/test_agent_compression.py +174 -0
massgen/tests/memory/test_context_window_management.py +286 -0
massgen/tests/memory/test_force_compression.py +154 -0
massgen/tests/memory/test_simple_compression.py +147 -0
massgen/tests/test_agent_memory.py +534 -0
massgen/tests/test_conversation_memory.py +382 -0
massgen/tests/test_orchestrator_memory.py +620 -0
massgen/tests/test_persistent_memory.py +435 -0
massgen/token_manager/token_manager.py +6 -0
massgen/tools/__init__.py +8 -0
massgen/tools/_planning_mcp_server.py +520 -0
massgen/tools/planning_dataclasses.py +434 -0
{massgen-0.1.4.dist-info → massgen-0.1.5.dist-info}/METADATA +109 -76
{massgen-0.1.4.dist-info → massgen-0.1.5.dist-info}/RECORD +46 -12
{massgen-0.1.4.dist-info → massgen-0.1.5.dist-info}/WHEEL +0 -0
{massgen-0.1.4.dist-info → massgen-0.1.5.dist-info}/entry_points.txt +0 -0
{massgen-0.1.4.dist-info → massgen-0.1.5.dist-info}/licenses/LICENSE +0 -0
{massgen-0.1.4.dist-info → massgen-0.1.5.dist-info}/top_level.txt +0 -0

massgen/configs/memory/gpt5mini_gemini_context_window_management.yaml ADDED Viewed

@@ -0,0 +1,187 @@
+# Example Configuration: Context Window Management with Memory
+#
+# Use Case: Demonstrates automatic context compression when approaching token limits
+#
+# This configuration demonstrates:
+# - Automatic context window monitoring and compression
+# - Token-aware conversation management (75% threshold, 40% target)
+# - Persistent memory integration for long-term knowledge retention
+# - Graceful handling when context window fills up
+# - Multi-agent collaboration with shared context management
+#
+# Run with:
+# uv run massgen --config @examples/memory/gpt5mini_gemini_context_window_management.yaml "Analyze the MassGen codebase comprehensively. Create an architecture document that explains: (1) Core components and their responsibilities, (2) How different modules interact, (3) Key design patterns used, (4) Main entry points and request flows. Read > 30 files to build a complete understanding."
+# ====================
+# AGENT DEFINITIONS
+# ====================
+agents:
+  - id: "agent_a"
+    backend:
+      # Use GPT-5-mini with medium reasoning
+      type: "openai"
+      model: "gpt-5-mini"
+      text:
+        verbosity: "medium"
+      reasoning:
+        effort: "medium"
+        summary: "auto"
+      cwd: "workspace1"
+  - id: "agent_b"
+    backend:
+      # Use Gemini 2.5 Flash for cost-effective testing
+      type: "gemini"
+      model: "gemini-2.5-flash"
+      cwd: "workspace2"
+# ====================
+# MEMORY CONFIGURATION
+# ====================
+memory:
+  # Enable/disable persistent memory (default: true)
+  enabled: true
+  # Memory configuration
+  conversation_memory:
+    enabled: true  # Short-term conversation tracking (recommended: always true)
+  persistent_memory:
+    enabled: true  # Long-term knowledge storage (set to false to disable)
+    on_disk: true  # Persist across restarts
+    # session_name: "test_session"  # Optional - if not specified, auto-generates unique ID
+                                     # Format: agent_storyteller_20251023_143022_a1b2c3
+                                     # Specify to continue a specific session
+    # Vector store backend (default: qdrant)
+    vector_store: "qdrant"
+    # LLM configuration for memory operations (fact extraction)
+    # RECOMMENDED: Use mem0's native LLMs (no adapter overhead, no async complexity)
+    llm:
+      provider: "openai"  # Options: openai, anthropic, groq, together, etc.
+      model: "gpt-4.1-nano-2025-04-14"  # Fast and cheap model for memory ops (mem0's default)
+    # Embedding configuration (uses mem0's native embedders)
+    # RECOMMENDED: Specify provider and model for clarity
+    embedding:
+      provider: "openai"  # Options: openai, together, azure_openai, gemini, huggingface, etc.
+      model: "text-embedding-3-small"  # OpenAI's efficient embedding model
+    # Qdrant client configuration
+    # IMPORTANT: For multi-agent setups, use server mode to avoid concurrent access errors
+    qdrant:
+      mode: "server"  # Options: "server" (recommended for multi-agent) or "local" (single agent only)
+      host: "localhost"  # Qdrant server host (default: localhost)
+      port: 6333         # Qdrant server port (default: 6333)
+      # For local mode (single agent only):
+      # mode: "local"
+      # path: ".massgen/qdrant"  # Local storage path
+  # Context window management thresholds
+  compression:
+    trigger_threshold: 0.25  # Compress when context usage exceeds 25%
+    target_ratio: 0.10       # Target 10% of context after compression
+  # Memory retrieval configuration
+  retrieval:
+    limit: 5              # Number of memory facts to retrieve from mem0 (default: 5)
+    exclude_recent: true  # Only retrieve after compression to avoid duplicates (default: true)
+# Memory system behavior when enabled:
+# - ConversationMemory: Tracks short-term conversation history (verbatim messages)
+# - PersistentMemory: Stores long-term knowledge in vector database (extracted facts via mem0)
+# - Automatic compression: Triggers at threshold, removes old messages from conversation_memory
+# - Smart retrieval: Only retrieves from persistent_memory AFTER compression
+#   - Before compression: All context in conversation_memory, no retrieval (avoids duplicates)
+#   - After compression: Retrieves relevant facts from compressed messages
+# - Each agent gets separate memory: agent_name defaults to agent ID (agent_a, agent_b)
+#
+# How mem0 works:
+# - When recording: mem0's LLM extracts key facts from conversations
+# - When retrieving: Returns extracted facts (e.g., "User explored Mars", not full conversation)
+# - retrieval.limit controls how many facts to retrieve (each fact is ~1 sentence)
+#
+# Session management (UNIFIED):
+# - Each agent gets separate memory (agent_name = agent ID: agent_a, agent_b)
+# - Session ID is unified between orchestrator and memory system:
+#   - Interactive mode: session_YYYYMMDD_HHMMSS (created at start, shared by all turns)
+#   - Single question: temp_YYYYMMDD_HHMMSS (created per run, isolated)
+# - Memories are isolated per session: agent_a in session_1 can't access session_2 memories
+# - To continue a previous session: Specify session_name in YAML (overrides auto-generation)
+# - For cross-session memory: Remove session_name from YAML or set to null
+# - Qdrant database: Shared at .massgen/qdrant, filtered by agent_id + session_id
+#
+# To disable persistent memory for testing, set:
+#   memory.persistent_memory.enabled: false
+#
+# See massgen/memory/docs/ for detailed documentation.
+# ====================
+# ORCHESTRATOR CONFIGURATION
+# ====================
+orchestrator:
+  # Multi-turn mode to enable interactive storytelling
+  session_storage: "memory_test_sessions"
+  # Agent workspace for any file operations
+  agent_temporary_workspace: "memory_test_workspaces"
+  snapshot_storage: "memory_test_snapshots"
+  # Additional context paths
+  context_paths:
+    - path: "massgen"
+      permission: "read"
+# ====================
+# UI CONFIGURATION
+# ====================
+ui:
+  display_type: "rich_terminal"
+  logging_enabled: true
+# ====================
+# EXECUTION FLOW
+# ====================
+# What happens:
+# 1. User starts an interactive story with the agent
+# 2. Agent responds with detailed narrative (400-600 words per turn)
+# 3. As conversation continues, token usage is monitored automatically
+# 4. When context usage reaches 75% of model's limit:
+#    - System logs: "📊 Context usage: X / Y tokens (Z%) - compressing old context"
+#    - Old messages are compressed into persistent memory (if configured)
+#    - Recent messages (fitting in 40% of context window) are kept
+#    - Compression details logged: "📦 Compressed N messages (X tokens) into long-term memory"
+# 5. Agent continues seamlessly with compressed context
+# 6. Story maintains consistency by referencing persistent memories
+# 7. Process repeats as needed for very long conversations
+#
+# Expected logs with persistent memory:
+#
+# Turn 1-10 (Before compression):
+#   📊 Context Window (Turn 5): 45,000 / 128,000 tokens (35%)
+#   ⏭️  Skipping retrieval (no compression yet, all context in conversation_memory)
+#
+# Turn 11 (Compression triggers):
+#   ⚠️  Context Window (Turn 11): 96,000 / 128,000 tokens (75%) - Approaching limit!
+#   🔄 Attempting compression (96,000 → 51,200 tokens)
+#   📦 Context compressed: Removed 15 old messages (44,800 tokens).
+#      Kept 8 recent messages (51,200 tokens).
+#      Old messages remain accessible via semantic search.
+#   ✅ Conversation history updated after compression: 8 messages
+#
+# Turn 12+ (After compression):
+#   🔍 Retrieving compressed memories (limit=5, compressed=True)...
+#   💭 Retrieved 3 memory fact(s) from mem0
+#   [Agent sees: retrieved facts + recent 8 messages - no duplication!]
+#
+# Expected output WITHOUT persistent memory:
+#   📦 Context compressed: Removed 15 messages (44,800 tokens).
+#      No persistent memory - old messages NOT retrievable.
+#
+# Token Budget Allocation (after compression):
+# - Conversation history: 40% (kept in active context)
+# - Retrieved memories: ~5 facts (~100-250 tokens)
+# - New user messages: varies
+# - System prompt overhead: varies
+# - Response generation: varies

massgen/configs/memory/gpt5mini_gemini_research_to_implementation.yaml ADDED Viewed

@@ -0,0 +1,127 @@
+# Example Configuration: Memory-Enhanced Research-to-Implementation Workflow
+#
+# Use Case: Demonstrates how memory enables strategic self-improvement
+#
+# This configuration demonstrates MassGen's self-evolution capabilities:
+# - Session 1: Research multi-agent AI papers using crawl4ai
+# - Session 2: Apply research findings to say how to improve MassGen itself
+# - Memory: Bridges research phase to implementation analysis
+# - Self-improvement: Agents use external research to enhance their own architecture
+#
+# Prerequisites:
+# 1. Start Qdrant server:
+#    docker run -d -p 6333:6333 -p 6334:6334 -v $(pwd)/.massgen/qdrant_storage:/qdrant/storage:z qdrant/qdrant
+#
+# 2. Start crawl4ai Docker container:
+#    docker pull unclecode/crawl4ai:latest
+#    docker run -d -p 11235:11235 --name crawl4ai --shm-size=1g unclecode/crawl4ai:latest
+#
+# 3. Verify both containers are running:
+#    docker ps | grep -E "qdrant|crawl4ai"
+#
+# Run Session 1 (Research Phase):
+#   uv run massgen --config @examples/memory/gpt5mini_gemini_research_to_implementation.yaml "Use crawl4ai to research the latest multi-agent AI papers and techniques from 2025. Focus on: coordination mechanisms, voting strategies, tool-use patterns, and architectural innovations."
+#
+# Run Session 2 (Implementation Analysis) - Same session:
+#   "Based on the multi-agent research from earlier, which techniques should we implement in MassGen to make it more state-of-the-art? Consider MassGen's current architecture and what would be most impactful."
+#
+# ====================
+# AGENT DEFINITIONS
+# ====================
+agents:
+  - id: "agent_a"
+    backend:
+      type: "openai"
+      model: "gpt-5-mini"
+      text:
+        verbosity: "medium"
+      reasoning:
+        effort: "medium"
+        summary: "auto"
+      cwd: "workspace1"
+      # Register crawl4ai custom tools for web scraping
+      custom_tools:
+        - name: ["crawl4ai_md", "crawl4ai_html", "crawl4ai_screenshot", "crawl4ai_pdf", "crawl4ai_execute_js", "crawl4ai_crawl"]
+          category: "web_scraping"
+          path: "massgen/tool/_web_tools/crawl4ai_tool.py"
+          function: ["crawl4ai_md", "crawl4ai_html", "crawl4ai_screenshot", "crawl4ai_pdf", "crawl4ai_execute_js", "crawl4ai_crawl"]
+  - id: "agent_b"
+    backend:
+      type: "gemini"
+      model: "gemini-2.5-flash"
+      cwd: "workspace2"
+      # Register crawl4ai custom tools for web scraping
+      custom_tools:
+        - name: ["crawl4ai_md", "crawl4ai_html", "crawl4ai_screenshot", "crawl4ai_pdf", "crawl4ai_execute_js", "crawl4ai_crawl"]
+          category: "web_scraping"
+          path: "massgen/tool/_web_tools/crawl4ai_tool.py"
+          function: ["crawl4ai_md", "crawl4ai_html", "crawl4ai_screenshot", "crawl4ai_pdf", "crawl4ai_execute_js", "crawl4ai_crawl"]
+# ====================
+# MEMORY CONFIGURATION
+# ====================
+memory:
+  enabled: true
+  conversation_memory:
+    enabled: true
+  persistent_memory:
+    enabled: true
+    on_disk: true
+    session_name: "research_to_implementation"  # Same session for both phases
+    vector_store: "qdrant"
+    # LLM configuration for memory operations
+    llm:
+      provider: "openai"
+      model: "gpt-4.1-nano-2025-04-14"
+    # Embedding configuration
+    embedding:
+      provider: "openai"
+      model: "text-embedding-3-small"
+    # Qdrant server configuration (required for multi-agent)
+    qdrant:
+      mode: "server"
+      host: "localhost"
+      port: 6333
+  # Context window management
+  compression:
+    trigger_threshold: 0.75  # Compress at 75%
+    target_ratio: 0.40       # Keep 40% after compression
+  # Memory retrieval configuration
+  retrieval:
+    limit: 10             # Get more facts for cross-phase synthesis
+    exclude_recent: true  # Only retrieve after compression
+# ====================
+# ORCHESTRATOR CONFIGURATION
+# ====================
+orchestrator:
+  # Multi-turn mode for interactive research sessions
+  session_storage: "research_sessions"
+  agent_temporary_workspace: "research_workspaces"
+  snapshot_storage: "research_snapshots"
+  # Give agents read access to MassGen codebase for Phase 2
+  context_paths:
+    - path: "massgen"
+      permission: "read"
+    - path: "docs"
+      permission: "read"
+# ====================
+# UI CONFIGURATION
+# ====================
+ui:
+  display_type: "rich_terminal"
+  logging_enabled: true

massgen/configs/memory/gpt5mini_high_reasoning_gemini.yaml ADDED Viewed

@@ -0,0 +1,107 @@
+# Example Configuration: Context Window Management with Memory
+#
+# Use Case: Demonstrates automatic context compression when approaching token limits
+#
+# This configuration demonstrates:
+# - Automatic context window monitoring and compression
+# - Token-aware conversation management (75% threshold, 40% target)
+# - Persistent memory integration for long-term knowledge retention
+# - Graceful handling when context window fills up
+# - Multi-agent collaboration with shared context management
+#
+# Run with:
+# uv run massgen --config @examples/memory/gpt5mini_high_reasoning_gemini.yaml "Analyze the pros and cons of using LLMs in commercial applications."
+# ====================
+# AGENT DEFINITIONS
+# ====================
+agents:
+  - id: "agent_a"
+    backend:
+      # Use GPT-5-mini with medium reasoning
+      type: "openai"
+      model: "gpt-5-mini"
+      text:
+        verbosity: "medium"
+      reasoning:
+        effort: "high"
+        summary: "auto"
+      enable_web_search: true
+  - id: "agent_b"
+    backend:
+      # Use Gemini 2.5 Flash for cost-effective testing
+      type: "gemini"
+      model: "gemini-2.5-flash"
+      enable_web_search: true
+# ====================
+# MEMORY CONFIGURATION
+# ====================
+memory:
+  # Enable/disable persistent memory (default: true)
+  enabled: true
+  # Memory configuration
+  conversation_memory:
+    enabled: true  # Short-term conversation tracking (recommended: always true)
+  persistent_memory:
+    enabled: true  # Long-term knowledge storage (set to false to disable)
+    on_disk: true  # Persist across restarts
+    # session_name: "test_session"  # Optional - if not specified, auto-generates unique ID
+                                     # Format: agent_storyteller_20251023_143022_a1b2c3
+                                     # Specify to continue a specific session
+    # Vector store backend (default: qdrant)
+    vector_store: "qdrant"
+    # LLM configuration for memory operations (fact extraction)
+    # RECOMMENDED: Use mem0's native LLMs (no adapter overhead, no async complexity)
+    llm:
+      provider: "openai"  # Options: openai, anthropic, groq, together, etc.
+      model: "gpt-4.1-nano-2025-04-14"  # Fast and cheap model for memory ops (mem0's default)
+    # Embedding configuration (uses mem0's native embedders)
+    # RECOMMENDED: Specify provider and model for clarity
+    embedding:
+      provider: "openai"  # Options: openai, together, azure_openai, gemini, huggingface, etc.
+      model: "text-embedding-3-small"  # OpenAI's efficient embedding model
+    # Qdrant client configuration
+    # IMPORTANT: For multi-agent setups, use server mode to avoid concurrent access errors
+    qdrant:
+      mode: "server"  # Options: "server" (recommended for multi-agent) or "local" (single agent only)
+      host: "localhost"  # Qdrant server host (default: localhost)
+      port: 6333         # Qdrant server port (default: 6333)
+      # For local mode (single agent only):
+      # mode: "local"
+      # path: ".massgen/qdrant"  # Local storage path
+  # Context window management thresholds
+  compression:
+    trigger_threshold: 0.25  # Compress when context usage exceeds 25%
+    target_ratio: 0.10       # Target 10% of context after compression
+  # Memory retrieval configuration
+  retrieval:
+    limit: 5              # Number of memory facts to retrieve from mem0 (default: 5)
+    exclude_recent: true  # Only retrieve after compression to avoid duplicates (default: true)
+# ====================
+# ORCHESTRATOR CONFIGURATION
+# ====================
+orchestrator:
+  # Multi-turn mode to enable interactive storytelling
+  session_storage: "memory_test_sessions"
+  # Agent workspace for any file operations
+  agent_temporary_workspace: "memory_test_workspaces"
+  snapshot_storage: "memory_test_snapshots"
+# ====================
+# UI CONFIGURATION
+# ====================
+ui:
+  display_type: "rich_terminal"
+  logging_enabled: true

massgen/configs/memory/single_agent_compression_test.yaml ADDED Viewed

@@ -0,0 +1,64 @@
+# Single Agent Compression Test
+#
+# Simple config to test per-agent memory and compression with ONE agent.
+# Set very low thresholds to trigger compression quickly.
+#
+# Run with:
+#   uv run massgen --config massgen/configs/memory/single_agent_compression_test.yaml "Analyze the MassGen codebase comprehensively. Create an architecture document that explains: (1) Core components and their responsibilities, (2) How different modules interact, (3) Key design patterns used, (4) Main entry points and request flows. Read > 30 files to build a complete understanding."
+agents:
+  - id: "test_agent"
+    backend:
+      type: "openai"
+      model: "gpt-5-nano"
+      cwd: "workspace1"
+# Global memory configuration
+memory:
+  enabled: true
+  conversation_memory:
+    enabled: true
+  persistent_memory:
+    enabled: true
+    # LLM for memory operations
+    llm:
+      provider: "openai"
+      model: "gpt-4.1-nano-2025-04-14"
+    # Embedding for vector search
+    embedding:
+      provider: "openai"
+      model: "text-embedding-3-small"
+    # Qdrant configuration (use server mode)
+    qdrant:
+      mode: "server"
+      host: "localhost"
+      port: 6333
+  compression:
+    trigger_threshold: 0.02   # Compress at 2% (very low for quick testing)
+    target_ratio: 0.01      # Keep 1% after compression
+  # Memory retrieval configuration
+  retrieval:
+    limit: 5              # Number of memory facts to retrieve from mem0 (default: 5)
+    exclude_recent: true  # Only retrieve after compression to avoid duplicates (default: true)
+                          # When true: retrieval skipped until compression occurs
+                          # When false: always retrieve (may duplicate recent context)
+# Orchestrator config
+orchestrator:
+  snapshot_storage: "test_compression_sessions"
+  agent_temporary_workspace: "test_compression_workspace"
+  context_paths:
+    - path: "massgen"
+      permission: "read"
+# UI
+ui:
+  display_type: "rich_terminal"
+  logging_enabled: true

massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml ADDED Viewed

@@ -0,0 +1,98 @@
+# massgen --config massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml "Analyze docs.massgen.ai and tell me how to improve its design."
+agents:
+- id: agent_a
+  backend:
+    type: openai
+    model: gpt-5-codex
+    text:
+      verbosity: medium
+    reasoning:
+      effort: medium
+      summary: auto
+    cwd: workspace1
+    enable_mcp_command_line: true
+    command_line_execution_mode: docker
+    command_line_docker_network_mode: "bridge"      # Enable network access (default: none)
+    enable_web_search: true
+    custom_tools:
+      - name: ["understand_image"]
+        category: "multimodal"
+        path: "massgen/tool/_multimodal_tools/understand_image.py"
+        function: ["understand_image"]
+    mcp_servers:
+        playwright:
+          type: "stdio"
+          command: "npx"
+          args: [
+            "@playwright/mcp@latest",
+            "--browser=chrome",  # Use Chrome browser
+            "--caps=vision,pdf", # Enable vision and PDF capabilities
+            "--user-data-dir=${cwd}/playwright-profile", # Persistent browser profile within workspace
+            "--output-dir=${cwd}", # Save screenshots/PDFs directly to workspace
+            # "--save-trace"       # Save Playwright traces for debugging
+          ]
+- id: agent_b
+  backend:
+    type: claude_code
+    model: claude-sonnet-4-5-20250929
+    cwd: workspace2
+    enable_mcp_command_line: true
+    command_line_execution_mode: docker
+    command_line_docker_network_mode: "bridge"      # Enable network access (default: none)
+    custom_tools:
+      - name: ["understand_image"]
+        category: "multimodal"
+        path: "massgen/tool/_multimodal_tools/understand_image.py"
+        function: ["understand_image"]
+    mcp_servers:
+        playwright:
+          type: "stdio"
+          command: "npx"
+          args: [
+            "@playwright/mcp@latest",
+            "--browser=chrome",  # Use Chrome browser
+            "--caps=vision,pdf", # Enable vision and PDF capabilities
+            "--user-data-dir=${cwd}/playwright-profile", # Persistent browser profile within workspace
+            "--output-dir=${cwd}", # Save screenshots/PDFs directly to workspace
+            # "--save-trace"       # Save Playwright traces for debugging
+          ]
+- id: agent_c
+  backend:
+    type: chatcompletion
+    base_url: "https://openrouter.ai/api/v1"
+    model: qwen/qwen3-coder
+    cwd: workspace3
+    enable_mcp_command_line: true
+    command_line_execution_mode: docker
+    command_line_docker_network_mode: "bridge"      # Enable network access (default: none)
+    custom_tools:
+      - name: ["understand_image"]
+        category: "multimodal"
+        path: "massgen/tool/_multimodal_tools/understand_image.py"
+        function: ["understand_image"]
+    mcp_servers:
+        playwright:
+          type: "stdio"
+          command: "npx"
+          args: [
+            "@playwright/mcp@latest",
+            "--browser=chrome",  # Use Chrome browser
+            "--caps=vision,pdf", # Enable vision and PDF capabilities
+            "--user-data-dir=${cwd}/playwright-profile", # Persistent browser profile within workspace
+            "--output-dir=${cwd}", # Save screenshots/PDFs directly to workspace
+            # "--save-trace"       # Save Playwright traces for debugging
+          ]
+ui:
+  display_type: rich_terminal
+  logging_enabled: true
+orchestrator:
+  snapshot_storage: snapshots
+  agent_temporary_workspace: temp_workspaces
+  session_storage: sessions
+  # voting_sensitivity: balanced
+  max_new_answers_per_agent: 5
+  # answer_novelty_requirement: balanced

massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml ADDED Viewed

@@ -0,0 +1,54 @@
+# MassGen Configuration: Understand Video Example
+#
+# Use Case: Analyze a specific video file using the understand_video tool
+#
+# This demonstrates direct video analysis without needing to download.
+# The video file is provided as a context path for agents to analyze.
+#
+# Run with:
+#   uv run massgen --config massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml "What is shown in this video?"
+agents:
+  - id: "agent_a"
+    backend:
+      type: "openai"
+      model: "gpt-5-mini"
+      text:
+        verbosity: "medium"
+      reasoning:
+        effort: "medium"
+        summary: "auto"
+      custom_tools:
+        - name: ["understand_video"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_video.py"
+          function: ["understand_video"]
+      cwd: "workspace1"
+  - id: "agent_b"
+    backend:
+      type: "gemini"
+      model: "gemini-2.5-pro"
+      custom_tools:
+        - name: ["understand_video"]
+          category: "multimodal"
+          path: "massgen/tool/_multimodal_tools/understand_video.py"
+          function: ["understand_video"]
+      cwd: "workspace2"
+orchestrator:
+  snapshot_storage: "snapshots"
+  agent_temporary_workspace: "temp_workspaces"
+  context_paths:
+    - path: "massgen/configs/resources/v0.1.3-example/case-study-videos/Dp2oldJJImw.mp4"
+      permission: "read"
+ui:
+  display_type: "rich_terminal"
+  logging_enabled: true
+# What happens:
+# 1. Agents have read access to the video file
+# 2. They can use understand_video tool to analyze it
+# 3. Tool extracts 8 frames and analyzes with GPT-4.1
+# 4. Agents collaborate to provide comprehensive insights

massgen 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

Potentially problematic release.

massgen 0.1.4py3-none-any.whl → 0.1.5py3-none-any.whl