massgen 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +33 -7
  3. massgen/api_params_handler/_api_params_handler_base.py +3 -0
  4. massgen/api_params_handler/_chat_completions_api_params_handler.py +7 -1
  5. massgen/backend/azure_openai.py +9 -1
  6. massgen/backend/base.py +56 -0
  7. massgen/backend/base_with_custom_tool_and_mcp.py +4 -4
  8. massgen/backend/capabilities.py +6 -6
  9. massgen/backend/chat_completions.py +18 -11
  10. massgen/backend/claude_code.py +9 -1
  11. massgen/backend/gemini.py +71 -6
  12. massgen/backend/gemini_utils.py +30 -0
  13. massgen/backend/grok.py +39 -6
  14. massgen/backend/response.py +18 -11
  15. massgen/chat_agent.py +9 -3
  16. massgen/cli.py +319 -43
  17. massgen/config_builder.py +163 -18
  18. massgen/configs/README.md +78 -20
  19. massgen/configs/basic/multi/three_agents_default.yaml +2 -2
  20. massgen/configs/debug/restart_test_controlled.yaml +60 -0
  21. massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
  22. massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
  23. massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
  24. massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
  25. massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
  26. massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +67 -0
  27. massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +68 -0
  28. massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +98 -0
  29. massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
  30. massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
  31. massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
  32. massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
  33. massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +54 -0
  34. massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
  35. massgen/configs/tools/memory/README.md +199 -0
  36. massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +131 -0
  37. massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +133 -0
  38. massgen/configs/tools/memory/test_context_window_management.py +286 -0
  39. massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +97 -0
  40. massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +7 -29
  41. massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +5 -6
  42. massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +4 -4
  43. massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +4 -4
  44. massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +2 -2
  45. massgen/docker/README.md +83 -0
  46. massgen/filesystem_manager/_code_execution_server.py +22 -7
  47. massgen/filesystem_manager/_docker_manager.py +21 -1
  48. massgen/filesystem_manager/_filesystem_manager.py +8 -0
  49. massgen/filesystem_manager/_workspace_tools_server.py +0 -997
  50. massgen/formatter/_gemini_formatter.py +73 -0
  51. massgen/frontend/coordination_ui.py +175 -257
  52. massgen/frontend/displays/base_display.py +29 -0
  53. massgen/frontend/displays/rich_terminal_display.py +155 -9
  54. massgen/frontend/displays/simple_display.py +21 -0
  55. massgen/frontend/displays/terminal_display.py +22 -2
  56. massgen/logger_config.py +50 -6
  57. massgen/message_templates.py +123 -3
  58. massgen/orchestrator.py +652 -44
  59. massgen/tests/test_code_execution.py +178 -0
  60. massgen/tests/test_intelligent_planning_mode.py +643 -0
  61. massgen/tests/test_orchestration_restart.py +204 -0
  62. massgen/token_manager/token_manager.py +13 -4
  63. massgen/tool/__init__.py +4 -0
  64. massgen/tool/_multimodal_tools/understand_audio.py +193 -0
  65. massgen/tool/_multimodal_tools/understand_file.py +550 -0
  66. massgen/tool/_multimodal_tools/understand_image.py +212 -0
  67. massgen/tool/_multimodal_tools/understand_video.py +313 -0
  68. massgen/tool/docs/multimodal_tools.md +779 -0
  69. massgen/tool/workflow_toolkits/__init__.py +26 -0
  70. massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
  71. massgen/utils.py +1 -0
  72. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/METADATA +57 -52
  73. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/RECORD +77 -49
  74. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/WHEEL +0 -0
  75. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/entry_points.txt +0 -0
  76. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/licenses/LICENSE +0 -0
  77. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,286 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Test script for Context Window Management with Memory.
5
+
6
+ This script demonstrates how to configure and test the context window
7
+ management feature with persistent memory integration.
8
+
9
+ Usage:
10
+ python massgen/configs/tools/memory/test_context_window_management.py
11
+
12
+ # Or specify a custom config:
13
+ python massgen/configs/tools/memory/test_context_window_management.py --config path/to/config.yaml
14
+ """
15
+
16
+ import asyncio
17
+ import os
18
+ import sys
19
+ from pathlib import Path
20
+
21
+ # Add parent directory to path for imports
22
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
23
+
24
+ import yaml
25
+ from dotenv import load_dotenv
26
+
27
+ from massgen.backend.chat_completions import ChatCompletionsBackend
28
+ from massgen.chat_agent import SingleAgent
29
+ from massgen.memory import ConversationMemory, PersistentMemory
30
+
31
+ # Load environment variables from .env file
32
+ load_dotenv()
33
+
34
+
35
+ def load_config(config_path: str = None) -> dict:
36
+ """Load configuration from YAML file."""
37
+ if config_path is None:
38
+ # Default to the config in same directory
39
+ config_path = Path(__file__).parent / "gpt5mini_gemini_context_window_management.yaml"
40
+
41
+ with open(config_path, 'r') as f:
42
+ return yaml.safe_load(f)
43
+
44
+
45
+ async def test_with_persistent_memory(config: dict):
46
+ """Test context compression with persistent memory enabled."""
47
+ # Check if memory is enabled in config
48
+ memory_config = config.get('memory', {})
49
+ if not memory_config.get('enabled', True):
50
+ print("\n⚠️ Skipping: memory.enabled is false in config")
51
+ return
52
+
53
+ persistent_enabled = memory_config.get('persistent_memory', {}).get('enabled', True)
54
+ if not persistent_enabled:
55
+ print("\n⚠️ Skipping: memory.persistent_memory.enabled is false in config")
56
+ return
57
+
58
+ print("\n" + "=" * 70)
59
+ print("TEST 1: Context Window Management WITH Persistent Memory")
60
+ print("=" * 70 + "\n")
61
+
62
+ # Get memory settings from config
63
+ persistent_config = memory_config.get('persistent_memory', {})
64
+ agent_name = persistent_config.get('agent_name', 'storyteller_agent')
65
+ session_name = persistent_config.get('session_name', 'test_session')
66
+ on_disk = persistent_config.get('on_disk', True)
67
+
68
+ # Create LLM backend for both agent and memory
69
+ llm_backend = ChatCompletionsBackend(
70
+ type="openai",
71
+ model="gpt-4o-mini", # Use smaller model for faster testing
72
+ api_key=os.getenv("OPENAI_API_KEY"),
73
+ )
74
+
75
+ # Create embedding backend for persistent memory
76
+ embedding_backend = ChatCompletionsBackend(
77
+ type="openai",
78
+ model="text-embedding-3-small",
79
+ api_key=os.getenv("OPENAI_API_KEY"),
80
+ )
81
+
82
+ # Initialize memory systems
83
+ conversation_memory = ConversationMemory()
84
+ persistent_memory = PersistentMemory(
85
+ agent_name=agent_name,
86
+ session_name=session_name,
87
+ llm_backend=llm_backend,
88
+ embedding_backend=embedding_backend,
89
+ on_disk=on_disk,
90
+ )
91
+
92
+ # Create agent with memory
93
+ agent = SingleAgent(
94
+ backend=llm_backend,
95
+ agent_id="storyteller",
96
+ system_message="You are a creative storyteller. Create detailed, "
97
+ "immersive narratives with rich descriptions.",
98
+ conversation_memory=conversation_memory,
99
+ persistent_memory=persistent_memory,
100
+ )
101
+
102
+ print("✅ Agent initialized with memory")
103
+ print(f" - ConversationMemory: Active")
104
+ print(f" - PersistentMemory: Active (agent={agent_name}, session={session_name}, on_disk={on_disk})")
105
+ print(f" - Model context window: 128,000 tokens")
106
+ print(f" - Compression triggers at: 96,000 tokens (75%)")
107
+ print(f" - Target after compression: 51,200 tokens (40%)\n")
108
+
109
+ # Simulate a conversation that will fill context
110
+ # Each turn will add significant tokens
111
+ story_prompts = [
112
+ "Tell me the beginning of a space exploration story. Include details about the ship, crew, and their mission. (Make it 400+ words)",
113
+ "What happens when they encounter their first alien planet? Describe it in vivid detail.",
114
+ "Describe a tense first contact situation with aliens. What do they look like? How do they communicate?",
115
+ "The mission takes an unexpected turn. What crisis occurs and how does the crew respond?",
116
+ "Show me a dramatic action sequence involving the ship's technology and the alien environment.",
117
+ "Reveal a plot twist about one of the crew members or the mission itself.",
118
+ "Continue the story with escalating tension and more discoveries.",
119
+ "How do cultural differences between humans and aliens create conflicts?",
120
+ "Describe a major decision point for the crew captain. What are the stakes?",
121
+ "Bring the story to a climactic moment with high drama.",
122
+ ]
123
+
124
+ turn = 0
125
+ for prompt in story_prompts:
126
+ turn += 1
127
+ print(f"\n--- Turn {turn} ---")
128
+ print(f"User: {prompt}\n")
129
+
130
+ response_text = ""
131
+ async for chunk in agent.chat([{"role": "user", "content": prompt}]):
132
+ if chunk.type == "content" and chunk.content:
133
+ response_text += chunk.content
134
+
135
+ print(f"Agent: {response_text[:200]}...")
136
+ print(f" [{len(response_text)} chars in response]")
137
+
138
+ # Check if compression occurred by examining conversation size
139
+ if conversation_memory:
140
+ size = await conversation_memory.size()
141
+ print(f" [Conversation memory: {size} messages]\n")
142
+
143
+ print("\n✅ Test completed!")
144
+ print(" Check the output above for compression logs:")
145
+ print(" - Look for: '📊 Context usage: ...'")
146
+ print(" - Look for: '📦 Compressed N messages into long-term memory'")
147
+
148
+
149
+ async def test_without_persistent_memory(config: dict):
150
+ """Test context compression without persistent memory (warning case)."""
151
+ # Check if we should run this test
152
+ memory_config = config.get('memory', {})
153
+ persistent_enabled = memory_config.get('persistent_memory', {}).get('enabled', True)
154
+
155
+ if persistent_enabled:
156
+ # Skip if persistent memory is enabled - we already tested that scenario
157
+ print("\n⚠️ Skipping Test 2: persistent memory is enabled in config")
158
+ print(" To test without persistent memory, set memory.persistent_memory.enabled: false")
159
+ return
160
+
161
+ print("\n" + "=" * 70)
162
+ print("TEST 2: Context Window Management WITHOUT Persistent Memory")
163
+ print("=" * 70 + "\n")
164
+
165
+ # Create LLM backend
166
+ llm_backend = ChatCompletionsBackend(
167
+ type="openai",
168
+ model="gpt-4o-mini",
169
+ api_key=os.getenv("OPENAI_API_KEY"),
170
+ )
171
+
172
+ # Only conversation memory, NO persistent memory
173
+ conversation_memory = ConversationMemory()
174
+
175
+ # Create agent without persistent memory
176
+ agent = SingleAgent(
177
+ backend=llm_backend,
178
+ agent_id="storyteller_no_persist",
179
+ system_message="You are a creative storyteller.",
180
+ conversation_memory=conversation_memory,
181
+ persistent_memory=None, # No persistent memory!
182
+ )
183
+
184
+ print("⚠️ Agent initialized WITHOUT persistent memory")
185
+ print(f" - ConversationMemory: Active")
186
+ print(f" - PersistentMemory: NONE")
187
+ print(f" - This will trigger warning messages when context fills\n")
188
+
189
+ # Shorter test - just trigger compression
190
+ story_prompts = [
191
+ "Tell me a 500-word science fiction story about time travel.",
192
+ "Continue the story with 500 more words about paradoxes.",
193
+ "Add another 500 words with a plot twist.",
194
+ "Continue with 500 words about the resolution.",
195
+ "Write a 500-word epilogue.",
196
+ ]
197
+
198
+ turn = 0
199
+ for prompt in story_prompts:
200
+ turn += 1
201
+ print(f"\n--- Turn {turn} ---")
202
+ print(f"User: {prompt}\n")
203
+
204
+ response_text = ""
205
+ async for chunk in agent.chat([{"role": "user", "content": prompt}]):
206
+ if chunk.type == "content" and chunk.content:
207
+ response_text += chunk.content
208
+
209
+ print(f"Agent: {response_text[:150]}...")
210
+
211
+ print("\n✅ Test completed!")
212
+ print(" Check the output above for warning messages:")
213
+ print(" - Look for: '⚠️ Warning: Dropping N messages'")
214
+ print(" - Look for: 'No persistent memory configured'")
215
+
216
+
217
+ async def main(config_path: str = None):
218
+ """Run all tests."""
219
+ print("\n" + "=" * 70)
220
+ print("Context Window Management Test Suite")
221
+ print("=" * 70)
222
+
223
+ # Load configuration
224
+ config = load_config(config_path)
225
+
226
+ # Show memory configuration
227
+ memory_config = config.get('memory', {})
228
+ print(f"\n📋 Memory Configuration (from YAML):")
229
+ print(f" - Enabled: {memory_config.get('enabled', True)}")
230
+ print(f" - Conversation Memory: {memory_config.get('conversation_memory', {}).get('enabled', True)}")
231
+ print(f" - Persistent Memory: {memory_config.get('persistent_memory', {}).get('enabled', True)}")
232
+
233
+ if memory_config.get('persistent_memory', {}).get('enabled', True):
234
+ pm_config = memory_config.get('persistent_memory', {})
235
+ print(f" - Agent Name: {pm_config.get('agent_name', 'N/A')}")
236
+ print(f" - Session Name: {pm_config.get('session_name', 'N/A')}")
237
+ print(f" - On Disk: {pm_config.get('on_disk', True)}")
238
+
239
+ compression_config = memory_config.get('compression', {})
240
+ print(f" - Compression Trigger: {compression_config.get('trigger_threshold', 0.75)*100}%")
241
+ print(f" - Target After Compression: {compression_config.get('target_ratio', 0.40)*100}%\n")
242
+
243
+ # Check for API key
244
+ if not os.getenv("OPENAI_API_KEY"):
245
+ print("\n❌ Error: OPENAI_API_KEY environment variable not set")
246
+ print(" Please set your OpenAI API key:")
247
+ print(" export OPENAI_API_KEY='your-key-here'")
248
+ return
249
+
250
+ try:
251
+ # Test 1: With persistent memory (if enabled)
252
+ await test_with_persistent_memory(config)
253
+
254
+ # Wait between tests
255
+ print("\n" + "-" * 70)
256
+ print("Waiting 5 seconds before next test...")
257
+ print("-" * 70)
258
+ await asyncio.sleep(5)
259
+
260
+ # Test 2: Without persistent memory (if disabled in config)
261
+ await test_without_persistent_memory(config)
262
+
263
+ except KeyboardInterrupt:
264
+ print("\n\n⚠️ Test interrupted by user")
265
+ except Exception as e:
266
+ print(f"\n\n❌ Test failed with error: {e}")
267
+ import traceback
268
+ traceback.print_exc()
269
+
270
+ print("\n" + "=" * 70)
271
+ print("All tests completed!")
272
+ print("=" * 70 + "\n")
273
+
274
+
275
+ if __name__ == "__main__":
276
+ import argparse
277
+
278
+ parser = argparse.ArgumentParser(description="Test context window management with memory")
279
+ parser.add_argument(
280
+ "--config",
281
+ type=str,
282
+ help="Path to YAML config file (default: gpt5mini_gemini_context_window_management.yaml)"
283
+ )
284
+ args = parser.parse_args()
285
+
286
+ asyncio.run(main(args.config))
@@ -0,0 +1,97 @@
1
+ # Example Configuration: Multimodal Self-Evolution Analysis
2
+ #
3
+ # Use Case: MassGen agents analyze their own documentation videos to extract insights
4
+ #
5
+ # This configuration demonstrates MassGen's self-evolution capabilities through multimodal
6
+ # understanding. Agents use understand_video and understand_image tools to analyze case study
7
+ # videos, extract technical insights, and provide recommendations for documentation improvements.
8
+ #
9
+ # Run with:
10
+ # uv run python -m massgen.cli --config massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml "Analyze the MassGen case study video and extract key technical insights about the multi-agent collaboration capabilities demonstrated."
11
+
12
+ agents:
13
+ - id: "agent_a"
14
+ backend:
15
+ type: "openai"
16
+ model: "gpt-5-mini"
17
+ text:
18
+ verbosity: "medium"
19
+ reasoning:
20
+ effort: "medium"
21
+ summary: "auto"
22
+ enable_web_search: true
23
+ custom_tools:
24
+ - name: ["understand_video"]
25
+ category: "multimodal"
26
+ path: "massgen/tool/_multimodal_tools/understand_video.py"
27
+ function: ["understand_video"]
28
+ - name: ["understand_image"]
29
+ category: "multimodal"
30
+ path: "massgen/tool/_multimodal_tools/understand_image.py"
31
+ function: ["understand_image"]
32
+ system_message: |
33
+ You are an AI assistant analyzing MassGen's documentation and case studies to provide
34
+ insights for self-evolution and improvement.
35
+
36
+ You have access to multimodal understanding tools:
37
+ - understand_video: Analyzes video content by extracting key frames
38
+ - understand_image: Analyzes image content in detail
39
+
40
+ Your goal is to extract technical insights, identify documentation quality patterns,
41
+ and provide actionable recommendations for improvement. Focus on understanding
42
+ how MassGen presents itself to users and how the documentation could better
43
+ demonstrate self-evolution capabilities.
44
+
45
+ - id: "agent_b"
46
+ backend:
47
+ type: "openai"
48
+ model: "gpt-5-nano"
49
+ text:
50
+ verbosity: "medium"
51
+ reasoning:
52
+ effort: "medium"
53
+ summary: "auto"
54
+ enable_web_search: true
55
+ custom_tools:
56
+ - name: ["understand_video"]
57
+ category: "multimodal"
58
+ path: "massgen/tool/_multimodal_tools/understand_video.py"
59
+ function: ["understand_video"]
60
+ - name: ["understand_image"]
61
+ category: "multimodal"
62
+ path: "massgen/tool/_multimodal_tools/understand_image.py"
63
+ function: ["understand_image"]
64
+ system_message: |
65
+ You are an AI assistant analyzing MassGen's documentation and case studies to provide
66
+ insights for self-evolution and improvement.
67
+
68
+ You have access to multimodal understanding tools:
69
+ - understand_video: Analyzes video content by extracting key frames
70
+ - understand_image: Analyzes image content in detail
71
+
72
+ Your goal is to extract technical insights, identify documentation quality patterns,
73
+ and provide actionable recommendations for improvement. Focus on understanding
74
+ how MassGen presents itself to users and how the documentation could better
75
+ demonstrate self-evolution capabilities.
76
+
77
+ # Orchestrator-level configuration
78
+ orchestrator:
79
+ snapshot_storage: "snapshots"
80
+ agent_temporary_workspace: "agent_temp"
81
+
82
+ # Context paths at orchestrator level (for read-only source files)
83
+ filesystem:
84
+ context_paths:
85
+ - path: "massgen/configs/resources/v0.1.3-example"
86
+ permission: "read"
87
+
88
+ ui:
89
+ display_type: "rich_terminal"
90
+ logging_enabled: true
91
+
92
+ # What happens:
93
+ # 1. Both agents receive the prompt to analyze a case study video
94
+ # 2. Agents use understand_video to extract key frames and analyze content
95
+ # 3. Agents use understand_image on specific frames for detailed analysis
96
+ # 4. Agents collaborate to synthesize insights about MassGen's capabilities
97
+ # 5. Final output includes technical insights and improvement recommendations
@@ -28,11 +28,9 @@ agents:
28
28
  DISCORD_TOKEN: "${DISCORD_TOKEN}"
29
29
  security:
30
30
  level: "high"
31
- system_message: |
32
- Available Discord Tools:
33
- - Discord server interaction via MCP integration
34
- - Message reading, sending, and management
35
- - Channel and server information access
31
+ exclude_tools:
32
+ - mcp__discord__discord_send_webhook_message
33
+ - mcp__discord__discord_edit_webhook_message
36
34
 
37
35
  - id: "openai_discord_agent"
38
36
  backend:
@@ -50,11 +48,6 @@ agents:
50
48
  exclude_tools:
51
49
  - mcp__discord__discord_send_webhook_message
52
50
  - mcp__discord__discord_edit_webhook_message
53
- system_message: |
54
- Available Discord Tools:
55
- - Discord server interaction via MCP integration
56
- - Message reading, sending, and management
57
- - Channel and server information access
58
51
 
59
52
  - id: "claude_code_discord_agent"
60
53
  backend:
@@ -68,11 +61,6 @@ agents:
68
61
  args: ["-y", "mcp-discord", "--config", "${DISCORD_TOKEN}"]
69
62
  env:
70
63
  DISCORD_TOKEN: "${DISCORD_TOKEN}"
71
- system_message: |
72
- Available Discord Tools:
73
- - Discord server interaction via MCP integration
74
- - Message reading, sending, and management
75
- - Channel and server information access
76
64
 
77
65
  - id: "claude_discord_agent"
78
66
  backend:
@@ -90,11 +78,6 @@ agents:
90
78
  exclude_tools:
91
79
  - mcp__discord__discord_send_webhook_message
92
80
  - mcp__discord__discord_edit_webhook_message
93
- system_message: |
94
- Available Discord Tools:
95
- - Discord server interaction via MCP integration
96
- - Message reading, sending, and management
97
- - Channel and server information access
98
81
 
99
82
  - id: "grok_discord_agent"
100
83
  backend:
@@ -112,11 +95,6 @@ agents:
112
95
  exclude_tools:
113
96
  - mcp__discord__discord_send_webhook_message
114
97
  - mcp__discord__discord_edit_webhook_message
115
- system_message: |
116
- Available Discord Tools:
117
- - Discord server interaction via MCP integration
118
- - Message reading, sending, and management
119
- - Channel and server information access
120
98
 
121
99
  ui:
122
100
  display_type: "rich_terminal"
@@ -124,8 +102,8 @@ ui:
124
102
 
125
103
  # Orchestrator Settings with Coordination Configuration
126
104
  orchestrator:
127
- snapshot_storage: "massgen_logs/snapshots" # Directory for workspace snapshots
128
- agent_temporary_workspace: "massgen_logs/temp_workspaces" # Directory for temporary agent workspaces
105
+ snapshot_storage: "snapshots" # Directory for workspace snapshots
106
+ agent_temporary_workspace: "temp_workspaces" # Directory for temporary agent workspaces
129
107
  coordination:
130
108
  enable_planning_mode: true
131
109
  planning_mode_instruction: |
@@ -134,7 +112,7 @@ orchestrator:
134
112
  1. Describe your intended actions and reasoning
135
113
  2. Analyze other agents' proposals
136
114
  3. Use only the 'vote' or 'new_answer' tools for coordination
137
- 4. DO NOT execute any actual Discord commands or API calls
138
- 5. Save tool execution for the final presentation phase when the winning agent will implement the plan
115
+ 4. Execute read-only actions - DO NOT execute any actions that have side effects (e.g., sending messages, modifying data)
116
+ 5. Save actions that have side effects for the final presentation phase when the winning agent will implement the plan
139
117
 
140
118
  Focus on planning, analysis, and coordination rather than execution.
@@ -134,8 +134,8 @@ ui:
134
134
 
135
135
  # Orchestrator Settings with Coordination Configuration
136
136
  orchestrator:
137
- snapshot_storage: "massgen_logs/snapshots" # Directory for workspace snapshots
138
- agent_temporary_workspace: "massgen_logs/temp_workspaces" # Directory for temporary agent workspaces
137
+ snapshot_storage: "snapshots" # Directory for workspace snapshots
138
+ agent_temporary_workspace: "temp_workspaces" # Directory for temporary agent workspaces
139
139
  coordination:
140
140
  enable_planning_mode: true
141
141
  planning_mode_instruction: |
@@ -144,8 +144,7 @@ orchestrator:
144
144
  1. Describe your intended file operations and reasoning
145
145
  2. Analyze other agents' proposals for the filesystem tasks
146
146
  3. Use only the 'vote' or 'new_answer' tools for coordination
147
- 4. DO NOT execute any actual filesystem operations, file creation, or directory management
148
- 5. Save all file operations for the final presentation phase when the winning agent will implement the plan
147
+ 4. Execute read-only actions - DO NOT execute any actions that have side effects (e.g., sending messages, modifying data)
148
+ 5. Save actions that have side effects for the final presentation phase when the winning agent will implement the plan
149
149
 
150
- Focus on planning, analysis, and coordination rather than execution.
151
- Example: "I would create a 'src' directory and write a main.py file..." rather than actually creating them.
150
+ Focus on planning, analysis, and coordination rather than execution.
@@ -135,8 +135,8 @@ ui:
135
135
 
136
136
  # Orchestrator Settings with Coordination Configuration
137
137
  orchestrator:
138
- snapshot_storage: "massgen_logs/snapshots" # Directory for workspace snapshots
139
- agent_temporary_workspace: "massgen_logs/temp_workspaces" # Directory for temporary agent workspaces
138
+ snapshot_storage: "snapshots" # Directory for workspace snapshots
139
+ agent_temporary_workspace: "temp_workspaces" # Directory for temporary agent workspaces
140
140
  coordination:
141
141
  enable_planning_mode: true
142
142
  planning_mode_instruction: |
@@ -145,7 +145,7 @@ orchestrator:
145
145
  1. Describe your intended actions and reasoning
146
146
  2. Analyze other agents' proposals
147
147
  3. Use only the 'vote' or 'new_answer' tools for coordination
148
- 4. DO NOT execute any actual Notion commands or API calls
149
- 5. Save tool execution for the final presentation phase when the winning agent will implement the plan
148
+ 4. Execute read-only actions - DO NOT execute any actions that have side effects (e.g., sending messages, modifying data)
149
+ 5. Save actions that have side effects for the final presentation phase when the winning agent will implement the plan
150
150
 
151
151
  Focus on planning, analysis, and coordination rather than execution.
@@ -139,8 +139,8 @@ ui:
139
139
 
140
140
  # Orchestrator Settings with Coordination Configuration
141
141
  orchestrator:
142
- snapshot_storage: "massgen_logs/snapshots" # Directory for workspace snapshots
143
- agent_temporary_workspace: "massgen_logs/temp_workspaces" # Directory for temporary agent workspaces
142
+ snapshot_storage: "snapshots" # Directory for workspace snapshots
143
+ agent_temporary_workspace: "temp_workspaces" # Directory for temporary agent workspaces
144
144
  coordination:
145
145
  enable_planning_mode: true
146
146
  planning_mode_instruction: |
@@ -149,7 +149,7 @@ orchestrator:
149
149
  1. Describe your intended actions and reasoning
150
150
  2. Analyze other agents' proposals
151
151
  3. Use only the 'vote' or 'new_answer' tools for coordination
152
- 4. DO NOT execute any actual Twitter commands or API calls
153
- 5. Save tool execution for the final presentation phase when the winning agent will implement the plan
152
+ 4. Execute read-only actions - DO NOT execute any actions that have side effects (e.g., sending messages, modifying data)
153
+ 5. Save actions that have side effects for the final presentation phase when the winning agent will implement the plan
154
154
 
155
155
  Focus on planning, analysis, and coordination rather than execution.
@@ -67,7 +67,7 @@ orchestrator:
67
67
  2. Analyze other agents' proposals
68
68
  3. Use only the 'vote' or 'new_answer' tools for coordination
69
69
  4. You CAN use web search for information gathering
70
- 5. DO NOT execute Discord operations (sending messages, reading channels, etc.) - save these for execution phase
71
- 6. Save tool execution for the final presentation phase when the winning agent will implement the plan
70
+ 5. DO NOT execute any actions that have side effects (e.g., sending messages, modifying data)
71
+ 6. Save actions that have side effects for the final presentation phase when the winning agent will implement the plan
72
72
 
73
73
  Focus on planning, analysis, and coordination rather than execution.
massgen/docker/README.md CHANGED
@@ -115,6 +115,7 @@ agents:
115
115
  | `command_line_docker_memory_limit` | None | Memory limit (e.g., `"2g"`, `"512m"`) |
116
116
  | `command_line_docker_cpu_limit` | None | CPU cores limit (e.g., `2.0`) |
117
117
  | `command_line_docker_network_mode` | `"none"` | `"none"`, `"bridge"`, or `"host"` |
118
+ | `command_line_docker_enable_sudo` | `false` | Enable sudo in containers (isolated from host) |
118
119
 
119
120
  ## How It Works
120
121
 
@@ -204,6 +205,88 @@ docker build -t my-custom-runtime:latest -f Dockerfile.custom .
204
205
  command_line_docker_image: "my-custom-runtime:latest"
205
206
  ```
206
207
 
208
+ ### Sudo Variant (Runtime Package Installation)
209
+
210
+ The sudo variant allows agents to install system packages at runtime inside their Docker container.
211
+
212
+ **IMPORTANT: Build the image before first use:**
213
+ ```bash
214
+ bash massgen/docker/build.sh --sudo
215
+ ```
216
+
217
+ This builds `massgen/mcp-runtime-sudo:latest` with sudo access locally. (This image is not available on Docker Hub - you must build it yourself.)
218
+
219
+ **Enable in config:**
220
+ ```yaml
221
+ agent:
222
+ backend:
223
+ cwd: "workspace"
224
+ enable_mcp_command_line: true
225
+ command_line_execution_mode: "docker"
226
+ command_line_docker_enable_sudo: true # Automatically uses sudo image
227
+ ```
228
+
229
+ **What agents can do with sudo:**
230
+ ```bash
231
+ # Install system packages at runtime
232
+ sudo apt-get update && sudo apt-get install -y ffmpeg
233
+
234
+ # Install additional Python packages
235
+ sudo pip install tensorflow
236
+
237
+ # Modify system configuration inside the container
238
+ sudo apt-get install -y postgresql-client
239
+ ```
240
+
241
+ **Security model - Is this safe?**
242
+
243
+ **YES, it's still safe** because Docker container isolation is the primary security boundary:
244
+
245
+ ✅ **Container is fully isolated from your host:**
246
+ - Sudo inside container ≠ sudo on your computer
247
+ - Agent can only access mounted volumes (workspace, context paths)
248
+ - Cannot access your host filesystem outside mounts
249
+ - Cannot affect host processes or system configuration
250
+ - Docker namespaces/cgroups provide strong isolation
251
+
252
+ ✅ **What sudo can and cannot do:**
253
+ - ✅ Can: Install packages inside the container (apt, pip, npm)
254
+ - ✅ Can: Modify container system configuration
255
+ - ✅ Can: Read/write mounted workspace (same as without sudo)
256
+ - ❌ Cannot: Access your host filesystem outside mounts
257
+ - ❌ Cannot: Affect your host system
258
+ - ❌ Cannot: Break out of the container (unless Docker vulnerability exists)
259
+
260
+ ℹ️ **Note:**
261
+ - Container escape vulnerabilities (CVEs in Docker/kernel) are extremely rare and quickly patched
262
+ - Standard Docker security practices apply
263
+
264
+ ❌ **Don't do this (makes it unsafe):**
265
+ - Enabling privileged mode (not exposed in MassGen, would need code changes)
266
+ - Mounting sensitive host paths like `/`, `/etc`, `/usr`
267
+ - Disabling security features like AppArmor/SELinux
268
+
269
+ **When to use sudo variant vs custom images:**
270
+
271
+ | Approach | Use When | Performance | Security |
272
+ |----------|----------|-------------|----------|
273
+ | **Sudo variant** | Need flexibility, unknown packages upfront, prototyping | Slower (runtime install) | Good (container isolated) |
274
+ | **Custom image** | Know packages needed, production use, performance matters | Fast (pre-installed) | Best (minimal attack surface) |
275
+
276
+ **Custom image example (recommended for production):**
277
+ ```dockerfile
278
+ FROM massgen/mcp-runtime:latest
279
+ USER root
280
+ RUN apt-get update && apt-get install -y ffmpeg postgresql-client
281
+ USER massgen
282
+ ```
283
+
284
+ Build: `docker build -t my-runtime:latest .`
285
+
286
+ Use: `command_line_docker_image: "my-runtime:latest"`
287
+
288
+ **Bottom line:** The sudo variant is safe for most use cases because Docker container isolation is strong. Custom images are preferred for production because they're faster and have a smaller attack surface, but sudo is fine for development and prototyping.
289
+
207
290
  ## Security Features
208
291
 
209
292
  ### Filesystem Isolation