massgen 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (46) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/chat_agent.py +340 -20
  3. massgen/cli.py +326 -19
  4. massgen/configs/README.md +52 -10
  5. massgen/configs/memory/gpt5mini_gemini_baseline_research_to_implementation.yaml +94 -0
  6. massgen/configs/memory/gpt5mini_gemini_context_window_management.yaml +187 -0
  7. massgen/configs/memory/gpt5mini_gemini_research_to_implementation.yaml +127 -0
  8. massgen/configs/memory/gpt5mini_high_reasoning_gemini.yaml +107 -0
  9. massgen/configs/memory/single_agent_compression_test.yaml +64 -0
  10. massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +98 -0
  11. massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +54 -0
  12. massgen/memory/README.md +277 -0
  13. massgen/memory/__init__.py +26 -0
  14. massgen/memory/_base.py +193 -0
  15. massgen/memory/_compression.py +237 -0
  16. massgen/memory/_context_monitor.py +211 -0
  17. massgen/memory/_conversation.py +255 -0
  18. massgen/memory/_fact_extraction_prompts.py +333 -0
  19. massgen/memory/_mem0_adapters.py +257 -0
  20. massgen/memory/_persistent.py +687 -0
  21. massgen/memory/docker-compose.qdrant.yml +36 -0
  22. massgen/memory/docs/DESIGN.md +388 -0
  23. massgen/memory/docs/QUICKSTART.md +409 -0
  24. massgen/memory/docs/SUMMARY.md +319 -0
  25. massgen/memory/docs/agent_use_memory.md +408 -0
  26. massgen/memory/docs/orchestrator_use_memory.md +586 -0
  27. massgen/memory/examples.py +237 -0
  28. massgen/orchestrator.py +207 -7
  29. massgen/tests/memory/test_agent_compression.py +174 -0
  30. massgen/tests/memory/test_context_window_management.py +286 -0
  31. massgen/tests/memory/test_force_compression.py +154 -0
  32. massgen/tests/memory/test_simple_compression.py +147 -0
  33. massgen/tests/test_agent_memory.py +534 -0
  34. massgen/tests/test_conversation_memory.py +382 -0
  35. massgen/tests/test_orchestrator_memory.py +620 -0
  36. massgen/tests/test_persistent_memory.py +435 -0
  37. massgen/token_manager/token_manager.py +6 -0
  38. massgen/tools/__init__.py +8 -0
  39. massgen/tools/_planning_mcp_server.py +520 -0
  40. massgen/tools/planning_dataclasses.py +434 -0
  41. {massgen-0.1.4.dist-info → massgen-0.1.5.dist-info}/METADATA +109 -76
  42. {massgen-0.1.4.dist-info → massgen-0.1.5.dist-info}/RECORD +46 -12
  43. {massgen-0.1.4.dist-info → massgen-0.1.5.dist-info}/WHEEL +0 -0
  44. {massgen-0.1.4.dist-info → massgen-0.1.5.dist-info}/entry_points.txt +0 -0
  45. {massgen-0.1.4.dist-info → massgen-0.1.5.dist-info}/licenses/LICENSE +0 -0
  46. {massgen-0.1.4.dist-info → massgen-0.1.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,187 @@
1
+ # Example Configuration: Context Window Management with Memory
2
+ #
3
+ # Use Case: Demonstrates automatic context compression when approaching token limits
4
+ #
5
+ # This configuration demonstrates:
6
+ # - Automatic context window monitoring and compression
7
+ # - Token-aware conversation management (75% threshold, 40% target)
8
+ # - Persistent memory integration for long-term knowledge retention
9
+ # - Graceful handling when context window fills up
10
+ # - Multi-agent collaboration with shared context management
11
+ #
12
+ # Run with:
13
+ # uv run massgen --config @examples/memory/gpt5mini_gemini_context_window_management.yaml "Analyze the MassGen codebase comprehensively. Create an architecture document that explains: (1) Core components and their responsibilities, (2) How different modules interact, (3) Key design patterns used, (4) Main entry points and request flows. Read > 30 files to build a complete understanding."
14
+
15
+ # ====================
16
+ # AGENT DEFINITIONS
17
+ # ====================
18
+ agents:
19
+ - id: "agent_a"
20
+ backend:
21
+ # Use GPT-5-mini with medium reasoning
22
+ type: "openai"
23
+ model: "gpt-5-mini"
24
+ text:
25
+ verbosity: "medium"
26
+ reasoning:
27
+ effort: "medium"
28
+ summary: "auto"
29
+ cwd: "workspace1"
30
+
31
+ - id: "agent_b"
32
+ backend:
33
+ # Use Gemini 2.5 Flash for cost-effective testing
34
+ type: "gemini"
35
+ model: "gemini-2.5-flash"
36
+ cwd: "workspace2"
37
+
38
+ # ====================
39
+ # MEMORY CONFIGURATION
40
+ # ====================
41
+ memory:
42
+ # Enable/disable persistent memory (default: true)
43
+ enabled: true
44
+
45
+ # Memory configuration
46
+ conversation_memory:
47
+ enabled: true # Short-term conversation tracking (recommended: always true)
48
+
49
+ persistent_memory:
50
+ enabled: true # Long-term knowledge storage (set to false to disable)
51
+ on_disk: true # Persist across restarts
52
+ # session_name: "test_session" # Optional - if not specified, auto-generates unique ID
53
+ # Format: agent_storyteller_20251023_143022_a1b2c3
54
+ # Specify to continue a specific session
55
+
56
+ # Vector store backend (default: qdrant)
57
+ vector_store: "qdrant"
58
+
59
+ # LLM configuration for memory operations (fact extraction)
60
+ # RECOMMENDED: Use mem0's native LLMs (no adapter overhead, no async complexity)
61
+ llm:
62
+ provider: "openai" # Options: openai, anthropic, groq, together, etc.
63
+ model: "gpt-4.1-nano-2025-04-14" # Fast and cheap model for memory ops (mem0's default)
64
+
65
+ # Embedding configuration (uses mem0's native embedders)
66
+ # RECOMMENDED: Specify provider and model for clarity
67
+ embedding:
68
+ provider: "openai" # Options: openai, together, azure_openai, gemini, huggingface, etc.
69
+ model: "text-embedding-3-small" # OpenAI's efficient embedding model
70
+
71
+ # Qdrant client configuration
72
+ # IMPORTANT: For multi-agent setups, use server mode to avoid concurrent access errors
73
+ qdrant:
74
+ mode: "server" # Options: "server" (recommended for multi-agent) or "local" (single agent only)
75
+ host: "localhost" # Qdrant server host (default: localhost)
76
+ port: 6333 # Qdrant server port (default: 6333)
77
+ # For local mode (single agent only):
78
+ # mode: "local"
79
+ # path: ".massgen/qdrant" # Local storage path
80
+
81
+ # Context window management thresholds
82
+ compression:
83
+ trigger_threshold: 0.25 # Compress when context usage exceeds 25%
84
+ target_ratio: 0.10 # Target 10% of context after compression
85
+
86
+ # Memory retrieval configuration
87
+ retrieval:
88
+ limit: 5 # Number of memory facts to retrieve from mem0 (default: 5)
89
+ exclude_recent: true # Only retrieve after compression to avoid duplicates (default: true)
90
+
91
+ # Memory system behavior when enabled:
92
+ # - ConversationMemory: Tracks short-term conversation history (verbatim messages)
93
+ # - PersistentMemory: Stores long-term knowledge in vector database (extracted facts via mem0)
94
+ # - Automatic compression: Triggers at threshold, removes old messages from conversation_memory
95
+ # - Smart retrieval: Only retrieves from persistent_memory AFTER compression
96
+ # - Before compression: All context in conversation_memory, no retrieval (avoids duplicates)
97
+ # - After compression: Retrieves relevant facts from compressed messages
98
+ # - Each agent gets separate memory: agent_name defaults to agent ID (agent_a, agent_b)
99
+ #
100
+ # How mem0 works:
101
+ # - When recording: mem0's LLM extracts key facts from conversations
102
+ # - When retrieving: Returns extracted facts (e.g., "User explored Mars", not full conversation)
103
+ # - retrieval.limit controls how many facts to retrieve (each fact is ~1 sentence)
104
+ #
105
+ # Session management (UNIFIED):
106
+ # - Each agent gets separate memory (agent_name = agent ID: agent_a, agent_b)
107
+ # - Session ID is unified between orchestrator and memory system:
108
+ # - Interactive mode: session_YYYYMMDD_HHMMSS (created at start, shared by all turns)
109
+ # - Single question: temp_YYYYMMDD_HHMMSS (created per run, isolated)
110
+ # - Memories are isolated per session: agent_a in session_1 can't access session_2 memories
111
+ # - To continue a previous session: Specify session_name in YAML (overrides auto-generation)
112
+ # - For cross-session memory: Remove session_name from YAML or set to null
113
+ # - Qdrant database: Shared at .massgen/qdrant, filtered by agent_id + session_id
114
+ #
115
+ # To disable persistent memory for testing, set:
116
+ # memory.persistent_memory.enabled: false
117
+ #
118
+ # See massgen/memory/docs/ for detailed documentation.
119
+
120
+ # ====================
121
+ # ORCHESTRATOR CONFIGURATION
122
+ # ====================
123
+ orchestrator:
124
+ # Multi-turn mode to enable interactive storytelling
125
+ session_storage: "memory_test_sessions"
126
+
127
+ # Agent workspace for any file operations
128
+ agent_temporary_workspace: "memory_test_workspaces"
129
+ snapshot_storage: "memory_test_snapshots"
130
+
131
+ # Additional context paths
132
+ context_paths:
133
+ - path: "massgen"
134
+ permission: "read"
135
+
136
+ # ====================
137
+ # UI CONFIGURATION
138
+ # ====================
139
+ ui:
140
+ display_type: "rich_terminal"
141
+ logging_enabled: true
142
+
143
+ # ====================
144
+ # EXECUTION FLOW
145
+ # ====================
146
+ # What happens:
147
+ # 1. User starts an interactive story with the agent
148
+ # 2. Agent responds with detailed narrative (400-600 words per turn)
149
+ # 3. As conversation continues, token usage is monitored automatically
150
+ # 4. When context usage reaches 75% of model's limit:
151
+ # - System logs: "📊 Context usage: X / Y tokens (Z%) - compressing old context"
152
+ # - Old messages are compressed into persistent memory (if configured)
153
+ # - Recent messages (fitting in 40% of context window) are kept
154
+ # - Compression details logged: "📦 Compressed N messages (X tokens) into long-term memory"
155
+ # 5. Agent continues seamlessly with compressed context
156
+ # 6. Story maintains consistency by referencing persistent memories
157
+ # 7. Process repeats as needed for very long conversations
158
+ #
159
+ # Expected logs with persistent memory:
160
+ #
161
+ # Turn 1-10 (Before compression):
162
+ # 📊 Context Window (Turn 5): 45,000 / 128,000 tokens (35%)
163
+ # ⏭️ Skipping retrieval (no compression yet, all context in conversation_memory)
164
+ #
165
+ # Turn 11 (Compression triggers):
166
+ # ⚠️ Context Window (Turn 11): 96,000 / 128,000 tokens (75%) - Approaching limit!
167
+ # 🔄 Attempting compression (96,000 → 51,200 tokens)
168
+ # 📦 Context compressed: Removed 15 old messages (44,800 tokens).
169
+ # Kept 8 recent messages (51,200 tokens).
170
+ # Old messages remain accessible via semantic search.
171
+ # ✅ Conversation history updated after compression: 8 messages
172
+ #
173
+ # Turn 12+ (After compression):
174
+ # 🔍 Retrieving compressed memories (limit=5, compressed=True)...
175
+ # 💭 Retrieved 3 memory fact(s) from mem0
176
+ # [Agent sees: retrieved facts + recent 8 messages - no duplication!]
177
+ #
178
+ # Expected output WITHOUT persistent memory:
179
+ # 📦 Context compressed: Removed 15 messages (44,800 tokens).
180
+ # No persistent memory - old messages NOT retrievable.
181
+ #
182
+ # Token Budget Allocation (after compression):
183
+ # - Conversation history: 40% (kept in active context)
184
+ # - Retrieved memories: ~5 facts (~100-250 tokens)
185
+ # - New user messages: varies
186
+ # - System prompt overhead: varies
187
+ # - Response generation: varies
@@ -0,0 +1,127 @@
1
+ # Example Configuration: Memory-Enhanced Research-to-Implementation Workflow
2
+ #
3
+ # Use Case: Demonstrates how memory enables strategic self-improvement
4
+ #
5
+ # This configuration demonstrates MassGen's self-evolution capabilities:
6
+ # - Session 1: Research multi-agent AI papers using crawl4ai
7
+ # - Session 2: Apply research findings to say how to improve MassGen itself
8
+ # - Memory: Bridges research phase to implementation analysis
9
+ # - Self-improvement: Agents use external research to enhance their own architecture
10
+ #
11
+ # Prerequisites:
12
+ # 1. Start Qdrant server:
13
+ # docker run -d -p 6333:6333 -p 6334:6334 -v $(pwd)/.massgen/qdrant_storage:/qdrant/storage:z qdrant/qdrant
14
+ #
15
+ # 2. Start crawl4ai Docker container:
16
+ # docker pull unclecode/crawl4ai:latest
17
+ # docker run -d -p 11235:11235 --name crawl4ai --shm-size=1g unclecode/crawl4ai:latest
18
+ #
19
+ # 3. Verify both containers are running:
20
+ # docker ps | grep -E "qdrant|crawl4ai"
21
+ #
22
+ # Run Session 1 (Research Phase):
23
+ # uv run massgen --config @examples/memory/gpt5mini_gemini_research_to_implementation.yaml "Use crawl4ai to research the latest multi-agent AI papers and techniques from 2025. Focus on: coordination mechanisms, voting strategies, tool-use patterns, and architectural innovations."
24
+ #
25
+ # Run Session 2 (Implementation Analysis) - Same session:
26
+ # "Based on the multi-agent research from earlier, which techniques should we implement in MassGen to make it more state-of-the-art? Consider MassGen's current architecture and what would be most impactful."
27
+ #
28
+
29
+ # ====================
30
+ # AGENT DEFINITIONS
31
+ # ====================
32
+ agents:
33
+ - id: "agent_a"
34
+ backend:
35
+ type: "openai"
36
+ model: "gpt-5-mini"
37
+ text:
38
+ verbosity: "medium"
39
+ reasoning:
40
+ effort: "medium"
41
+ summary: "auto"
42
+ cwd: "workspace1"
43
+
44
+ # Register crawl4ai custom tools for web scraping
45
+ custom_tools:
46
+ - name: ["crawl4ai_md", "crawl4ai_html", "crawl4ai_screenshot", "crawl4ai_pdf", "crawl4ai_execute_js", "crawl4ai_crawl"]
47
+ category: "web_scraping"
48
+ path: "massgen/tool/_web_tools/crawl4ai_tool.py"
49
+ function: ["crawl4ai_md", "crawl4ai_html", "crawl4ai_screenshot", "crawl4ai_pdf", "crawl4ai_execute_js", "crawl4ai_crawl"]
50
+
51
+ - id: "agent_b"
52
+ backend:
53
+ type: "gemini"
54
+ model: "gemini-2.5-flash"
55
+ cwd: "workspace2"
56
+
57
+ # Register crawl4ai custom tools for web scraping
58
+ custom_tools:
59
+ - name: ["crawl4ai_md", "crawl4ai_html", "crawl4ai_screenshot", "crawl4ai_pdf", "crawl4ai_execute_js", "crawl4ai_crawl"]
60
+ category: "web_scraping"
61
+ path: "massgen/tool/_web_tools/crawl4ai_tool.py"
62
+ function: ["crawl4ai_md", "crawl4ai_html", "crawl4ai_screenshot", "crawl4ai_pdf", "crawl4ai_execute_js", "crawl4ai_crawl"]
63
+
64
+
65
+ # ====================
66
+ # MEMORY CONFIGURATION
67
+ # ====================
68
+ memory:
69
+ enabled: true
70
+
71
+ conversation_memory:
72
+ enabled: true
73
+
74
+ persistent_memory:
75
+ enabled: true
76
+ on_disk: true
77
+ session_name: "research_to_implementation" # Same session for both phases
78
+ vector_store: "qdrant"
79
+
80
+ # LLM configuration for memory operations
81
+ llm:
82
+ provider: "openai"
83
+ model: "gpt-4.1-nano-2025-04-14"
84
+
85
+ # Embedding configuration
86
+ embedding:
87
+ provider: "openai"
88
+ model: "text-embedding-3-small"
89
+
90
+ # Qdrant server configuration (required for multi-agent)
91
+ qdrant:
92
+ mode: "server"
93
+ host: "localhost"
94
+ port: 6333
95
+
96
+ # Context window management
97
+ compression:
98
+ trigger_threshold: 0.75 # Compress at 75%
99
+ target_ratio: 0.40 # Keep 40% after compression
100
+
101
+ # Memory retrieval configuration
102
+ retrieval:
103
+ limit: 10 # Get more facts for cross-phase synthesis
104
+ exclude_recent: true # Only retrieve after compression
105
+
106
+ # ====================
107
+ # ORCHESTRATOR CONFIGURATION
108
+ # ====================
109
+ orchestrator:
110
+ # Multi-turn mode for interactive research sessions
111
+ session_storage: "research_sessions"
112
+ agent_temporary_workspace: "research_workspaces"
113
+ snapshot_storage: "research_snapshots"
114
+
115
+ # Give agents read access to MassGen codebase for Phase 2
116
+ context_paths:
117
+ - path: "massgen"
118
+ permission: "read"
119
+ - path: "docs"
120
+ permission: "read"
121
+
122
+ # ====================
123
+ # UI CONFIGURATION
124
+ # ====================
125
+ ui:
126
+ display_type: "rich_terminal"
127
+ logging_enabled: true
@@ -0,0 +1,107 @@
1
+ # Example Configuration: Context Window Management with Memory
2
+ #
3
+ # Use Case: Demonstrates automatic context compression when approaching token limits
4
+ #
5
+ # This configuration demonstrates:
6
+ # - Automatic context window monitoring and compression
7
+ # - Token-aware conversation management (75% threshold, 40% target)
8
+ # - Persistent memory integration for long-term knowledge retention
9
+ # - Graceful handling when context window fills up
10
+ # - Multi-agent collaboration with shared context management
11
+ #
12
+ # Run with:
13
+ # uv run massgen --config @examples/memory/gpt5mini_high_reasoning_gemini.yaml "Analyze the pros and cons of using LLMs in commercial applications."
14
+
15
+ # ====================
16
+ # AGENT DEFINITIONS
17
+ # ====================
18
+ agents:
19
+ - id: "agent_a"
20
+ backend:
21
+ # Use GPT-5-mini with medium reasoning
22
+ type: "openai"
23
+ model: "gpt-5-mini"
24
+ text:
25
+ verbosity: "medium"
26
+ reasoning:
27
+ effort: "high"
28
+ summary: "auto"
29
+ enable_web_search: true
30
+
31
+ - id: "agent_b"
32
+ backend:
33
+ # Use Gemini 2.5 Flash for cost-effective testing
34
+ type: "gemini"
35
+ model: "gemini-2.5-flash"
36
+ enable_web_search: true
37
+
38
+ # ====================
39
+ # MEMORY CONFIGURATION
40
+ # ====================
41
+ memory:
42
+ # Enable/disable persistent memory (default: true)
43
+ enabled: true
44
+
45
+ # Memory configuration
46
+ conversation_memory:
47
+ enabled: true # Short-term conversation tracking (recommended: always true)
48
+
49
+ persistent_memory:
50
+ enabled: true # Long-term knowledge storage (set to false to disable)
51
+ on_disk: true # Persist across restarts
52
+ # session_name: "test_session" # Optional - if not specified, auto-generates unique ID
53
+ # Format: agent_storyteller_20251023_143022_a1b2c3
54
+ # Specify to continue a specific session
55
+
56
+ # Vector store backend (default: qdrant)
57
+ vector_store: "qdrant"
58
+
59
+ # LLM configuration for memory operations (fact extraction)
60
+ # RECOMMENDED: Use mem0's native LLMs (no adapter overhead, no async complexity)
61
+ llm:
62
+ provider: "openai" # Options: openai, anthropic, groq, together, etc.
63
+ model: "gpt-4.1-nano-2025-04-14" # Fast and cheap model for memory ops (mem0's default)
64
+
65
+ # Embedding configuration (uses mem0's native embedders)
66
+ # RECOMMENDED: Specify provider and model for clarity
67
+ embedding:
68
+ provider: "openai" # Options: openai, together, azure_openai, gemini, huggingface, etc.
69
+ model: "text-embedding-3-small" # OpenAI's efficient embedding model
70
+
71
+ # Qdrant client configuration
72
+ # IMPORTANT: For multi-agent setups, use server mode to avoid concurrent access errors
73
+ qdrant:
74
+ mode: "server" # Options: "server" (recommended for multi-agent) or "local" (single agent only)
75
+ host: "localhost" # Qdrant server host (default: localhost)
76
+ port: 6333 # Qdrant server port (default: 6333)
77
+ # For local mode (single agent only):
78
+ # mode: "local"
79
+ # path: ".massgen/qdrant" # Local storage path
80
+
81
+ # Context window management thresholds
82
+ compression:
83
+ trigger_threshold: 0.25 # Compress when context usage exceeds 25%
84
+ target_ratio: 0.10 # Target 10% of context after compression
85
+
86
+ # Memory retrieval configuration
87
+ retrieval:
88
+ limit: 5 # Number of memory facts to retrieve from mem0 (default: 5)
89
+ exclude_recent: true # Only retrieve after compression to avoid duplicates (default: true)
90
+
91
+ # ====================
92
+ # ORCHESTRATOR CONFIGURATION
93
+ # ====================
94
+ orchestrator:
95
+ # Multi-turn mode to enable interactive storytelling
96
+ session_storage: "memory_test_sessions"
97
+
98
+ # Agent workspace for any file operations
99
+ agent_temporary_workspace: "memory_test_workspaces"
100
+ snapshot_storage: "memory_test_snapshots"
101
+
102
+ # ====================
103
+ # UI CONFIGURATION
104
+ # ====================
105
+ ui:
106
+ display_type: "rich_terminal"
107
+ logging_enabled: true
@@ -0,0 +1,64 @@
1
+ # Single Agent Compression Test
2
+ #
3
+ # Simple config to test per-agent memory and compression with ONE agent.
4
+ # Set very low thresholds to trigger compression quickly.
5
+ #
6
+ # Run with:
7
+ # uv run massgen --config massgen/configs/memory/single_agent_compression_test.yaml "Analyze the MassGen codebase comprehensively. Create an architecture document that explains: (1) Core components and their responsibilities, (2) How different modules interact, (3) Key design patterns used, (4) Main entry points and request flows. Read > 30 files to build a complete understanding."
8
+
9
+ agents:
10
+ - id: "test_agent"
11
+ backend:
12
+ type: "openai"
13
+ model: "gpt-5-nano"
14
+ cwd: "workspace1"
15
+
16
+ # Global memory configuration
17
+ memory:
18
+ enabled: true
19
+
20
+ conversation_memory:
21
+ enabled: true
22
+
23
+ persistent_memory:
24
+ enabled: true
25
+
26
+ # LLM for memory operations
27
+ llm:
28
+ provider: "openai"
29
+ model: "gpt-4.1-nano-2025-04-14"
30
+
31
+ # Embedding for vector search
32
+ embedding:
33
+ provider: "openai"
34
+ model: "text-embedding-3-small"
35
+
36
+ # Qdrant configuration (use server mode)
37
+ qdrant:
38
+ mode: "server"
39
+ host: "localhost"
40
+ port: 6333
41
+
42
+ compression:
43
+ trigger_threshold: 0.02 # Compress at 2% (very low for quick testing)
44
+ target_ratio: 0.01 # Keep 1% after compression
45
+
46
+ # Memory retrieval configuration
47
+ retrieval:
48
+ limit: 5 # Number of memory facts to retrieve from mem0 (default: 5)
49
+ exclude_recent: true # Only retrieve after compression to avoid duplicates (default: true)
50
+ # When true: retrieval skipped until compression occurs
51
+ # When false: always retrieve (may duplicate recent context)
52
+
53
+ # Orchestrator config
54
+ orchestrator:
55
+ snapshot_storage: "test_compression_sessions"
56
+ agent_temporary_workspace: "test_compression_workspace"
57
+ context_paths:
58
+ - path: "massgen"
59
+ permission: "read"
60
+
61
+ # UI
62
+ ui:
63
+ display_type: "rich_terminal"
64
+ logging_enabled: true
@@ -0,0 +1,98 @@
1
+ # massgen --config massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml "Analyze docs.massgen.ai and tell me how to improve its design."
2
+
3
+ agents:
4
+ - id: agent_a
5
+ backend:
6
+ type: openai
7
+ model: gpt-5-codex
8
+ text:
9
+ verbosity: medium
10
+ reasoning:
11
+ effort: medium
12
+ summary: auto
13
+ cwd: workspace1
14
+ enable_mcp_command_line: true
15
+ command_line_execution_mode: docker
16
+ command_line_docker_network_mode: "bridge" # Enable network access (default: none)
17
+ enable_web_search: true
18
+ custom_tools:
19
+ - name: ["understand_image"]
20
+ category: "multimodal"
21
+ path: "massgen/tool/_multimodal_tools/understand_image.py"
22
+ function: ["understand_image"]
23
+ mcp_servers:
24
+ playwright:
25
+ type: "stdio"
26
+ command: "npx"
27
+ args: [
28
+ "@playwright/mcp@latest",
29
+ "--browser=chrome", # Use Chrome browser
30
+ "--caps=vision,pdf", # Enable vision and PDF capabilities
31
+ "--user-data-dir=${cwd}/playwright-profile", # Persistent browser profile within workspace
32
+ "--output-dir=${cwd}", # Save screenshots/PDFs directly to workspace
33
+ # "--save-trace" # Save Playwright traces for debugging
34
+ ]
35
+
36
+ - id: agent_b
37
+ backend:
38
+ type: claude_code
39
+ model: claude-sonnet-4-5-20250929
40
+ cwd: workspace2
41
+ enable_mcp_command_line: true
42
+ command_line_execution_mode: docker
43
+ command_line_docker_network_mode: "bridge" # Enable network access (default: none)
44
+ custom_tools:
45
+ - name: ["understand_image"]
46
+ category: "multimodal"
47
+ path: "massgen/tool/_multimodal_tools/understand_image.py"
48
+ function: ["understand_image"]
49
+ mcp_servers:
50
+ playwright:
51
+ type: "stdio"
52
+ command: "npx"
53
+ args: [
54
+ "@playwright/mcp@latest",
55
+ "--browser=chrome", # Use Chrome browser
56
+ "--caps=vision,pdf", # Enable vision and PDF capabilities
57
+ "--user-data-dir=${cwd}/playwright-profile", # Persistent browser profile within workspace
58
+ "--output-dir=${cwd}", # Save screenshots/PDFs directly to workspace
59
+ # "--save-trace" # Save Playwright traces for debugging
60
+ ]
61
+
62
+ - id: agent_c
63
+ backend:
64
+ type: chatcompletion
65
+ base_url: "https://openrouter.ai/api/v1"
66
+ model: qwen/qwen3-coder
67
+ cwd: workspace3
68
+ enable_mcp_command_line: true
69
+ command_line_execution_mode: docker
70
+ command_line_docker_network_mode: "bridge" # Enable network access (default: none)
71
+ custom_tools:
72
+ - name: ["understand_image"]
73
+ category: "multimodal"
74
+ path: "massgen/tool/_multimodal_tools/understand_image.py"
75
+ function: ["understand_image"]
76
+ mcp_servers:
77
+ playwright:
78
+ type: "stdio"
79
+ command: "npx"
80
+ args: [
81
+ "@playwright/mcp@latest",
82
+ "--browser=chrome", # Use Chrome browser
83
+ "--caps=vision,pdf", # Enable vision and PDF capabilities
84
+ "--user-data-dir=${cwd}/playwright-profile", # Persistent browser profile within workspace
85
+ "--output-dir=${cwd}", # Save screenshots/PDFs directly to workspace
86
+ # "--save-trace" # Save Playwright traces for debugging
87
+ ]
88
+
89
+ ui:
90
+ display_type: rich_terminal
91
+ logging_enabled: true
92
+ orchestrator:
93
+ snapshot_storage: snapshots
94
+ agent_temporary_workspace: temp_workspaces
95
+ session_storage: sessions
96
+ # voting_sensitivity: balanced
97
+ max_new_answers_per_agent: 5
98
+ # answer_novelty_requirement: balanced
@@ -0,0 +1,54 @@
1
+ # MassGen Configuration: Understand Video Example
2
+ #
3
+ # Use Case: Analyze a specific video file using the understand_video tool
4
+ #
5
+ # This demonstrates direct video analysis without needing to download.
6
+ # The video file is provided as a context path for agents to analyze.
7
+ #
8
+ # Run with:
9
+ # uv run massgen --config massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml "What is shown in this video?"
10
+
11
+ agents:
12
+ - id: "agent_a"
13
+ backend:
14
+ type: "openai"
15
+ model: "gpt-5-mini"
16
+ text:
17
+ verbosity: "medium"
18
+ reasoning:
19
+ effort: "medium"
20
+ summary: "auto"
21
+ custom_tools:
22
+ - name: ["understand_video"]
23
+ category: "multimodal"
24
+ path: "massgen/tool/_multimodal_tools/understand_video.py"
25
+ function: ["understand_video"]
26
+ cwd: "workspace1"
27
+
28
+ - id: "agent_b"
29
+ backend:
30
+ type: "gemini"
31
+ model: "gemini-2.5-pro"
32
+ custom_tools:
33
+ - name: ["understand_video"]
34
+ category: "multimodal"
35
+ path: "massgen/tool/_multimodal_tools/understand_video.py"
36
+ function: ["understand_video"]
37
+ cwd: "workspace2"
38
+
39
+ orchestrator:
40
+ snapshot_storage: "snapshots"
41
+ agent_temporary_workspace: "temp_workspaces"
42
+ context_paths:
43
+ - path: "massgen/configs/resources/v0.1.3-example/case-study-videos/Dp2oldJJImw.mp4"
44
+ permission: "read"
45
+
46
+ ui:
47
+ display_type: "rich_terminal"
48
+ logging_enabled: true
49
+
50
+ # What happens:
51
+ # 1. Agents have read access to the video file
52
+ # 2. They can use understand_video tool to analyze it
53
+ # 3. Tool extracts 8 frames and analyzes with GPT-4.1
54
+ # 4. Agents collaborate to provide comprehensive insights