massgen 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- massgen/__init__.py +1 -1
- massgen/agent_config.py +33 -7
- massgen/api_params_handler/_api_params_handler_base.py +3 -0
- massgen/api_params_handler/_chat_completions_api_params_handler.py +7 -1
- massgen/backend/azure_openai.py +9 -1
- massgen/backend/base.py +56 -0
- massgen/backend/base_with_custom_tool_and_mcp.py +4 -4
- massgen/backend/capabilities.py +6 -6
- massgen/backend/chat_completions.py +18 -11
- massgen/backend/claude_code.py +9 -1
- massgen/backend/gemini.py +71 -6
- massgen/backend/gemini_utils.py +30 -0
- massgen/backend/grok.py +39 -6
- massgen/backend/response.py +18 -11
- massgen/chat_agent.py +9 -3
- massgen/cli.py +319 -43
- massgen/config_builder.py +163 -18
- massgen/configs/README.md +78 -20
- massgen/configs/basic/multi/three_agents_default.yaml +2 -2
- massgen/configs/debug/restart_test_controlled.yaml +60 -0
- massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
- massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
- massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
- massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
- massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
- massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +67 -0
- massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +68 -0
- massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +98 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +54 -0
- massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
- massgen/configs/tools/memory/README.md +199 -0
- massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +131 -0
- massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +133 -0
- massgen/configs/tools/memory/test_context_window_management.py +286 -0
- massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +97 -0
- massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +7 -29
- massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +5 -6
- massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +4 -4
- massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +4 -4
- massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +2 -2
- massgen/docker/README.md +83 -0
- massgen/filesystem_manager/_code_execution_server.py +22 -7
- massgen/filesystem_manager/_docker_manager.py +21 -1
- massgen/filesystem_manager/_filesystem_manager.py +8 -0
- massgen/filesystem_manager/_workspace_tools_server.py +0 -997
- massgen/formatter/_gemini_formatter.py +73 -0
- massgen/frontend/coordination_ui.py +175 -257
- massgen/frontend/displays/base_display.py +29 -0
- massgen/frontend/displays/rich_terminal_display.py +155 -9
- massgen/frontend/displays/simple_display.py +21 -0
- massgen/frontend/displays/terminal_display.py +22 -2
- massgen/logger_config.py +50 -6
- massgen/message_templates.py +123 -3
- massgen/orchestrator.py +652 -44
- massgen/tests/test_code_execution.py +178 -0
- massgen/tests/test_intelligent_planning_mode.py +643 -0
- massgen/tests/test_orchestration_restart.py +204 -0
- massgen/token_manager/token_manager.py +13 -4
- massgen/tool/__init__.py +4 -0
- massgen/tool/_multimodal_tools/understand_audio.py +193 -0
- massgen/tool/_multimodal_tools/understand_file.py +550 -0
- massgen/tool/_multimodal_tools/understand_image.py +212 -0
- massgen/tool/_multimodal_tools/understand_video.py +313 -0
- massgen/tool/docs/multimodal_tools.md +779 -0
- massgen/tool/workflow_toolkits/__init__.py +26 -0
- massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
- massgen/utils.py +1 -0
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/METADATA +57 -52
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/RECORD +77 -49
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/WHEEL +0 -0
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/entry_points.txt +0 -0
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Test script for Context Window Management with Memory.
|
|
5
|
+
|
|
6
|
+
This script demonstrates how to configure and test the context window
|
|
7
|
+
management feature with persistent memory integration.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
python massgen/configs/tools/memory/test_context_window_management.py
|
|
11
|
+
|
|
12
|
+
# Or specify a custom config:
|
|
13
|
+
python massgen/configs/tools/memory/test_context_window_management.py --config path/to/config.yaml
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import os
|
|
18
|
+
import sys
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
# Add parent directory to path for imports
|
|
22
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
|
|
23
|
+
|
|
24
|
+
import yaml
|
|
25
|
+
from dotenv import load_dotenv
|
|
26
|
+
|
|
27
|
+
from massgen.backend.chat_completions import ChatCompletionsBackend
|
|
28
|
+
from massgen.chat_agent import SingleAgent
|
|
29
|
+
from massgen.memory import ConversationMemory, PersistentMemory
|
|
30
|
+
|
|
31
|
+
# Load environment variables from .env file
|
|
32
|
+
load_dotenv()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def load_config(config_path: str = None) -> dict:
|
|
36
|
+
"""Load configuration from YAML file."""
|
|
37
|
+
if config_path is None:
|
|
38
|
+
# Default to the config in same directory
|
|
39
|
+
config_path = Path(__file__).parent / "gpt5mini_gemini_context_window_management.yaml"
|
|
40
|
+
|
|
41
|
+
with open(config_path, 'r') as f:
|
|
42
|
+
return yaml.safe_load(f)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
async def test_with_persistent_memory(config: dict):
|
|
46
|
+
"""Test context compression with persistent memory enabled."""
|
|
47
|
+
# Check if memory is enabled in config
|
|
48
|
+
memory_config = config.get('memory', {})
|
|
49
|
+
if not memory_config.get('enabled', True):
|
|
50
|
+
print("\n⚠️ Skipping: memory.enabled is false in config")
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
persistent_enabled = memory_config.get('persistent_memory', {}).get('enabled', True)
|
|
54
|
+
if not persistent_enabled:
|
|
55
|
+
print("\n⚠️ Skipping: memory.persistent_memory.enabled is false in config")
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
print("\n" + "=" * 70)
|
|
59
|
+
print("TEST 1: Context Window Management WITH Persistent Memory")
|
|
60
|
+
print("=" * 70 + "\n")
|
|
61
|
+
|
|
62
|
+
# Get memory settings from config
|
|
63
|
+
persistent_config = memory_config.get('persistent_memory', {})
|
|
64
|
+
agent_name = persistent_config.get('agent_name', 'storyteller_agent')
|
|
65
|
+
session_name = persistent_config.get('session_name', 'test_session')
|
|
66
|
+
on_disk = persistent_config.get('on_disk', True)
|
|
67
|
+
|
|
68
|
+
# Create LLM backend for both agent and memory
|
|
69
|
+
llm_backend = ChatCompletionsBackend(
|
|
70
|
+
type="openai",
|
|
71
|
+
model="gpt-4o-mini", # Use smaller model for faster testing
|
|
72
|
+
api_key=os.getenv("OPENAI_API_KEY"),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# Create embedding backend for persistent memory
|
|
76
|
+
embedding_backend = ChatCompletionsBackend(
|
|
77
|
+
type="openai",
|
|
78
|
+
model="text-embedding-3-small",
|
|
79
|
+
api_key=os.getenv("OPENAI_API_KEY"),
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Initialize memory systems
|
|
83
|
+
conversation_memory = ConversationMemory()
|
|
84
|
+
persistent_memory = PersistentMemory(
|
|
85
|
+
agent_name=agent_name,
|
|
86
|
+
session_name=session_name,
|
|
87
|
+
llm_backend=llm_backend,
|
|
88
|
+
embedding_backend=embedding_backend,
|
|
89
|
+
on_disk=on_disk,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Create agent with memory
|
|
93
|
+
agent = SingleAgent(
|
|
94
|
+
backend=llm_backend,
|
|
95
|
+
agent_id="storyteller",
|
|
96
|
+
system_message="You are a creative storyteller. Create detailed, "
|
|
97
|
+
"immersive narratives with rich descriptions.",
|
|
98
|
+
conversation_memory=conversation_memory,
|
|
99
|
+
persistent_memory=persistent_memory,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
print("✅ Agent initialized with memory")
|
|
103
|
+
print(f" - ConversationMemory: Active")
|
|
104
|
+
print(f" - PersistentMemory: Active (agent={agent_name}, session={session_name}, on_disk={on_disk})")
|
|
105
|
+
print(f" - Model context window: 128,000 tokens")
|
|
106
|
+
print(f" - Compression triggers at: 96,000 tokens (75%)")
|
|
107
|
+
print(f" - Target after compression: 51,200 tokens (40%)\n")
|
|
108
|
+
|
|
109
|
+
# Simulate a conversation that will fill context
|
|
110
|
+
# Each turn will add significant tokens
|
|
111
|
+
story_prompts = [
|
|
112
|
+
"Tell me the beginning of a space exploration story. Include details about the ship, crew, and their mission. (Make it 400+ words)",
|
|
113
|
+
"What happens when they encounter their first alien planet? Describe it in vivid detail.",
|
|
114
|
+
"Describe a tense first contact situation with aliens. What do they look like? How do they communicate?",
|
|
115
|
+
"The mission takes an unexpected turn. What crisis occurs and how does the crew respond?",
|
|
116
|
+
"Show me a dramatic action sequence involving the ship's technology and the alien environment.",
|
|
117
|
+
"Reveal a plot twist about one of the crew members or the mission itself.",
|
|
118
|
+
"Continue the story with escalating tension and more discoveries.",
|
|
119
|
+
"How do cultural differences between humans and aliens create conflicts?",
|
|
120
|
+
"Describe a major decision point for the crew captain. What are the stakes?",
|
|
121
|
+
"Bring the story to a climactic moment with high drama.",
|
|
122
|
+
]
|
|
123
|
+
|
|
124
|
+
turn = 0
|
|
125
|
+
for prompt in story_prompts:
|
|
126
|
+
turn += 1
|
|
127
|
+
print(f"\n--- Turn {turn} ---")
|
|
128
|
+
print(f"User: {prompt}\n")
|
|
129
|
+
|
|
130
|
+
response_text = ""
|
|
131
|
+
async for chunk in agent.chat([{"role": "user", "content": prompt}]):
|
|
132
|
+
if chunk.type == "content" and chunk.content:
|
|
133
|
+
response_text += chunk.content
|
|
134
|
+
|
|
135
|
+
print(f"Agent: {response_text[:200]}...")
|
|
136
|
+
print(f" [{len(response_text)} chars in response]")
|
|
137
|
+
|
|
138
|
+
# Check if compression occurred by examining conversation size
|
|
139
|
+
if conversation_memory:
|
|
140
|
+
size = await conversation_memory.size()
|
|
141
|
+
print(f" [Conversation memory: {size} messages]\n")
|
|
142
|
+
|
|
143
|
+
print("\n✅ Test completed!")
|
|
144
|
+
print(" Check the output above for compression logs:")
|
|
145
|
+
print(" - Look for: '📊 Context usage: ...'")
|
|
146
|
+
print(" - Look for: '📦 Compressed N messages into long-term memory'")
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
async def test_without_persistent_memory(config: dict):
|
|
150
|
+
"""Test context compression without persistent memory (warning case)."""
|
|
151
|
+
# Check if we should run this test
|
|
152
|
+
memory_config = config.get('memory', {})
|
|
153
|
+
persistent_enabled = memory_config.get('persistent_memory', {}).get('enabled', True)
|
|
154
|
+
|
|
155
|
+
if persistent_enabled:
|
|
156
|
+
# Skip if persistent memory is enabled - we already tested that scenario
|
|
157
|
+
print("\n⚠️ Skipping Test 2: persistent memory is enabled in config")
|
|
158
|
+
print(" To test without persistent memory, set memory.persistent_memory.enabled: false")
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
print("\n" + "=" * 70)
|
|
162
|
+
print("TEST 2: Context Window Management WITHOUT Persistent Memory")
|
|
163
|
+
print("=" * 70 + "\n")
|
|
164
|
+
|
|
165
|
+
# Create LLM backend
|
|
166
|
+
llm_backend = ChatCompletionsBackend(
|
|
167
|
+
type="openai",
|
|
168
|
+
model="gpt-4o-mini",
|
|
169
|
+
api_key=os.getenv("OPENAI_API_KEY"),
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Only conversation memory, NO persistent memory
|
|
173
|
+
conversation_memory = ConversationMemory()
|
|
174
|
+
|
|
175
|
+
# Create agent without persistent memory
|
|
176
|
+
agent = SingleAgent(
|
|
177
|
+
backend=llm_backend,
|
|
178
|
+
agent_id="storyteller_no_persist",
|
|
179
|
+
system_message="You are a creative storyteller.",
|
|
180
|
+
conversation_memory=conversation_memory,
|
|
181
|
+
persistent_memory=None, # No persistent memory!
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
print("⚠️ Agent initialized WITHOUT persistent memory")
|
|
185
|
+
print(f" - ConversationMemory: Active")
|
|
186
|
+
print(f" - PersistentMemory: NONE")
|
|
187
|
+
print(f" - This will trigger warning messages when context fills\n")
|
|
188
|
+
|
|
189
|
+
# Shorter test - just trigger compression
|
|
190
|
+
story_prompts = [
|
|
191
|
+
"Tell me a 500-word science fiction story about time travel.",
|
|
192
|
+
"Continue the story with 500 more words about paradoxes.",
|
|
193
|
+
"Add another 500 words with a plot twist.",
|
|
194
|
+
"Continue with 500 words about the resolution.",
|
|
195
|
+
"Write a 500-word epilogue.",
|
|
196
|
+
]
|
|
197
|
+
|
|
198
|
+
turn = 0
|
|
199
|
+
for prompt in story_prompts:
|
|
200
|
+
turn += 1
|
|
201
|
+
print(f"\n--- Turn {turn} ---")
|
|
202
|
+
print(f"User: {prompt}\n")
|
|
203
|
+
|
|
204
|
+
response_text = ""
|
|
205
|
+
async for chunk in agent.chat([{"role": "user", "content": prompt}]):
|
|
206
|
+
if chunk.type == "content" and chunk.content:
|
|
207
|
+
response_text += chunk.content
|
|
208
|
+
|
|
209
|
+
print(f"Agent: {response_text[:150]}...")
|
|
210
|
+
|
|
211
|
+
print("\n✅ Test completed!")
|
|
212
|
+
print(" Check the output above for warning messages:")
|
|
213
|
+
print(" - Look for: '⚠️ Warning: Dropping N messages'")
|
|
214
|
+
print(" - Look for: 'No persistent memory configured'")
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
async def main(config_path: str = None):
|
|
218
|
+
"""Run all tests."""
|
|
219
|
+
print("\n" + "=" * 70)
|
|
220
|
+
print("Context Window Management Test Suite")
|
|
221
|
+
print("=" * 70)
|
|
222
|
+
|
|
223
|
+
# Load configuration
|
|
224
|
+
config = load_config(config_path)
|
|
225
|
+
|
|
226
|
+
# Show memory configuration
|
|
227
|
+
memory_config = config.get('memory', {})
|
|
228
|
+
print(f"\n📋 Memory Configuration (from YAML):")
|
|
229
|
+
print(f" - Enabled: {memory_config.get('enabled', True)}")
|
|
230
|
+
print(f" - Conversation Memory: {memory_config.get('conversation_memory', {}).get('enabled', True)}")
|
|
231
|
+
print(f" - Persistent Memory: {memory_config.get('persistent_memory', {}).get('enabled', True)}")
|
|
232
|
+
|
|
233
|
+
if memory_config.get('persistent_memory', {}).get('enabled', True):
|
|
234
|
+
pm_config = memory_config.get('persistent_memory', {})
|
|
235
|
+
print(f" - Agent Name: {pm_config.get('agent_name', 'N/A')}")
|
|
236
|
+
print(f" - Session Name: {pm_config.get('session_name', 'N/A')}")
|
|
237
|
+
print(f" - On Disk: {pm_config.get('on_disk', True)}")
|
|
238
|
+
|
|
239
|
+
compression_config = memory_config.get('compression', {})
|
|
240
|
+
print(f" - Compression Trigger: {compression_config.get('trigger_threshold', 0.75)*100}%")
|
|
241
|
+
print(f" - Target After Compression: {compression_config.get('target_ratio', 0.40)*100}%\n")
|
|
242
|
+
|
|
243
|
+
# Check for API key
|
|
244
|
+
if not os.getenv("OPENAI_API_KEY"):
|
|
245
|
+
print("\n❌ Error: OPENAI_API_KEY environment variable not set")
|
|
246
|
+
print(" Please set your OpenAI API key:")
|
|
247
|
+
print(" export OPENAI_API_KEY='your-key-here'")
|
|
248
|
+
return
|
|
249
|
+
|
|
250
|
+
try:
|
|
251
|
+
# Test 1: With persistent memory (if enabled)
|
|
252
|
+
await test_with_persistent_memory(config)
|
|
253
|
+
|
|
254
|
+
# Wait between tests
|
|
255
|
+
print("\n" + "-" * 70)
|
|
256
|
+
print("Waiting 5 seconds before next test...")
|
|
257
|
+
print("-" * 70)
|
|
258
|
+
await asyncio.sleep(5)
|
|
259
|
+
|
|
260
|
+
# Test 2: Without persistent memory (if disabled in config)
|
|
261
|
+
await test_without_persistent_memory(config)
|
|
262
|
+
|
|
263
|
+
except KeyboardInterrupt:
|
|
264
|
+
print("\n\n⚠️ Test interrupted by user")
|
|
265
|
+
except Exception as e:
|
|
266
|
+
print(f"\n\n❌ Test failed with error: {e}")
|
|
267
|
+
import traceback
|
|
268
|
+
traceback.print_exc()
|
|
269
|
+
|
|
270
|
+
print("\n" + "=" * 70)
|
|
271
|
+
print("All tests completed!")
|
|
272
|
+
print("=" * 70 + "\n")
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
if __name__ == "__main__":
|
|
276
|
+
import argparse
|
|
277
|
+
|
|
278
|
+
parser = argparse.ArgumentParser(description="Test context window management with memory")
|
|
279
|
+
parser.add_argument(
|
|
280
|
+
"--config",
|
|
281
|
+
type=str,
|
|
282
|
+
help="Path to YAML config file (default: gpt5mini_gemini_context_window_management.yaml)"
|
|
283
|
+
)
|
|
284
|
+
args = parser.parse_args()
|
|
285
|
+
|
|
286
|
+
asyncio.run(main(args.config))
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# Example Configuration: Multimodal Self-Evolution Analysis
|
|
2
|
+
#
|
|
3
|
+
# Use Case: MassGen agents analyze their own documentation videos to extract insights
|
|
4
|
+
#
|
|
5
|
+
# This configuration demonstrates MassGen's self-evolution capabilities through multimodal
|
|
6
|
+
# understanding. Agents use understand_video and understand_image tools to analyze case study
|
|
7
|
+
# videos, extract technical insights, and provide recommendations for documentation improvements.
|
|
8
|
+
#
|
|
9
|
+
# Run with:
|
|
10
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml "Analyze the MassGen case study video and extract key technical insights about the multi-agent collaboration capabilities demonstrated."
|
|
11
|
+
|
|
12
|
+
agents:
|
|
13
|
+
- id: "agent_a"
|
|
14
|
+
backend:
|
|
15
|
+
type: "openai"
|
|
16
|
+
model: "gpt-5-mini"
|
|
17
|
+
text:
|
|
18
|
+
verbosity: "medium"
|
|
19
|
+
reasoning:
|
|
20
|
+
effort: "medium"
|
|
21
|
+
summary: "auto"
|
|
22
|
+
enable_web_search: true
|
|
23
|
+
custom_tools:
|
|
24
|
+
- name: ["understand_video"]
|
|
25
|
+
category: "multimodal"
|
|
26
|
+
path: "massgen/tool/_multimodal_tools/understand_video.py"
|
|
27
|
+
function: ["understand_video"]
|
|
28
|
+
- name: ["understand_image"]
|
|
29
|
+
category: "multimodal"
|
|
30
|
+
path: "massgen/tool/_multimodal_tools/understand_image.py"
|
|
31
|
+
function: ["understand_image"]
|
|
32
|
+
system_message: |
|
|
33
|
+
You are an AI assistant analyzing MassGen's documentation and case studies to provide
|
|
34
|
+
insights for self-evolution and improvement.
|
|
35
|
+
|
|
36
|
+
You have access to multimodal understanding tools:
|
|
37
|
+
- understand_video: Analyzes video content by extracting key frames
|
|
38
|
+
- understand_image: Analyzes image content in detail
|
|
39
|
+
|
|
40
|
+
Your goal is to extract technical insights, identify documentation quality patterns,
|
|
41
|
+
and provide actionable recommendations for improvement. Focus on understanding
|
|
42
|
+
how MassGen presents itself to users and how the documentation could better
|
|
43
|
+
demonstrate self-evolution capabilities.
|
|
44
|
+
|
|
45
|
+
- id: "agent_b"
|
|
46
|
+
backend:
|
|
47
|
+
type: "openai"
|
|
48
|
+
model: "gpt-5-nano"
|
|
49
|
+
text:
|
|
50
|
+
verbosity: "medium"
|
|
51
|
+
reasoning:
|
|
52
|
+
effort: "medium"
|
|
53
|
+
summary: "auto"
|
|
54
|
+
enable_web_search: true
|
|
55
|
+
custom_tools:
|
|
56
|
+
- name: ["understand_video"]
|
|
57
|
+
category: "multimodal"
|
|
58
|
+
path: "massgen/tool/_multimodal_tools/understand_video.py"
|
|
59
|
+
function: ["understand_video"]
|
|
60
|
+
- name: ["understand_image"]
|
|
61
|
+
category: "multimodal"
|
|
62
|
+
path: "massgen/tool/_multimodal_tools/understand_image.py"
|
|
63
|
+
function: ["understand_image"]
|
|
64
|
+
system_message: |
|
|
65
|
+
You are an AI assistant analyzing MassGen's documentation and case studies to provide
|
|
66
|
+
insights for self-evolution and improvement.
|
|
67
|
+
|
|
68
|
+
You have access to multimodal understanding tools:
|
|
69
|
+
- understand_video: Analyzes video content by extracting key frames
|
|
70
|
+
- understand_image: Analyzes image content in detail
|
|
71
|
+
|
|
72
|
+
Your goal is to extract technical insights, identify documentation quality patterns,
|
|
73
|
+
and provide actionable recommendations for improvement. Focus on understanding
|
|
74
|
+
how MassGen presents itself to users and how the documentation could better
|
|
75
|
+
demonstrate self-evolution capabilities.
|
|
76
|
+
|
|
77
|
+
# Orchestrator-level configuration
|
|
78
|
+
orchestrator:
|
|
79
|
+
snapshot_storage: "snapshots"
|
|
80
|
+
agent_temporary_workspace: "agent_temp"
|
|
81
|
+
|
|
82
|
+
# Context paths at orchestrator level (for read-only source files)
|
|
83
|
+
filesystem:
|
|
84
|
+
context_paths:
|
|
85
|
+
- path: "massgen/configs/resources/v0.1.3-example"
|
|
86
|
+
permission: "read"
|
|
87
|
+
|
|
88
|
+
ui:
|
|
89
|
+
display_type: "rich_terminal"
|
|
90
|
+
logging_enabled: true
|
|
91
|
+
|
|
92
|
+
# What happens:
|
|
93
|
+
# 1. Both agents receive the prompt to analyze a case study video
|
|
94
|
+
# 2. Agents use understand_video to extract key frames and analyze content
|
|
95
|
+
# 3. Agents use understand_image on specific frames for detailed analysis
|
|
96
|
+
# 4. Agents collaborate to synthesize insights about MassGen's capabilities
|
|
97
|
+
# 5. Final output includes technical insights and improvement recommendations
|
|
@@ -28,11 +28,9 @@ agents:
|
|
|
28
28
|
DISCORD_TOKEN: "${DISCORD_TOKEN}"
|
|
29
29
|
security:
|
|
30
30
|
level: "high"
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
- Message reading, sending, and management
|
|
35
|
-
- Channel and server information access
|
|
31
|
+
exclude_tools:
|
|
32
|
+
- mcp__discord__discord_send_webhook_message
|
|
33
|
+
- mcp__discord__discord_edit_webhook_message
|
|
36
34
|
|
|
37
35
|
- id: "openai_discord_agent"
|
|
38
36
|
backend:
|
|
@@ -50,11 +48,6 @@ agents:
|
|
|
50
48
|
exclude_tools:
|
|
51
49
|
- mcp__discord__discord_send_webhook_message
|
|
52
50
|
- mcp__discord__discord_edit_webhook_message
|
|
53
|
-
system_message: |
|
|
54
|
-
Available Discord Tools:
|
|
55
|
-
- Discord server interaction via MCP integration
|
|
56
|
-
- Message reading, sending, and management
|
|
57
|
-
- Channel and server information access
|
|
58
51
|
|
|
59
52
|
- id: "claude_code_discord_agent"
|
|
60
53
|
backend:
|
|
@@ -68,11 +61,6 @@ agents:
|
|
|
68
61
|
args: ["-y", "mcp-discord", "--config", "${DISCORD_TOKEN}"]
|
|
69
62
|
env:
|
|
70
63
|
DISCORD_TOKEN: "${DISCORD_TOKEN}"
|
|
71
|
-
system_message: |
|
|
72
|
-
Available Discord Tools:
|
|
73
|
-
- Discord server interaction via MCP integration
|
|
74
|
-
- Message reading, sending, and management
|
|
75
|
-
- Channel and server information access
|
|
76
64
|
|
|
77
65
|
- id: "claude_discord_agent"
|
|
78
66
|
backend:
|
|
@@ -90,11 +78,6 @@ agents:
|
|
|
90
78
|
exclude_tools:
|
|
91
79
|
- mcp__discord__discord_send_webhook_message
|
|
92
80
|
- mcp__discord__discord_edit_webhook_message
|
|
93
|
-
system_message: |
|
|
94
|
-
Available Discord Tools:
|
|
95
|
-
- Discord server interaction via MCP integration
|
|
96
|
-
- Message reading, sending, and management
|
|
97
|
-
- Channel and server information access
|
|
98
81
|
|
|
99
82
|
- id: "grok_discord_agent"
|
|
100
83
|
backend:
|
|
@@ -112,11 +95,6 @@ agents:
|
|
|
112
95
|
exclude_tools:
|
|
113
96
|
- mcp__discord__discord_send_webhook_message
|
|
114
97
|
- mcp__discord__discord_edit_webhook_message
|
|
115
|
-
system_message: |
|
|
116
|
-
Available Discord Tools:
|
|
117
|
-
- Discord server interaction via MCP integration
|
|
118
|
-
- Message reading, sending, and management
|
|
119
|
-
- Channel and server information access
|
|
120
98
|
|
|
121
99
|
ui:
|
|
122
100
|
display_type: "rich_terminal"
|
|
@@ -124,8 +102,8 @@ ui:
|
|
|
124
102
|
|
|
125
103
|
# Orchestrator Settings with Coordination Configuration
|
|
126
104
|
orchestrator:
|
|
127
|
-
snapshot_storage: "
|
|
128
|
-
agent_temporary_workspace: "
|
|
105
|
+
snapshot_storage: "snapshots" # Directory for workspace snapshots
|
|
106
|
+
agent_temporary_workspace: "temp_workspaces" # Directory for temporary agent workspaces
|
|
129
107
|
coordination:
|
|
130
108
|
enable_planning_mode: true
|
|
131
109
|
planning_mode_instruction: |
|
|
@@ -134,7 +112,7 @@ orchestrator:
|
|
|
134
112
|
1. Describe your intended actions and reasoning
|
|
135
113
|
2. Analyze other agents' proposals
|
|
136
114
|
3. Use only the 'vote' or 'new_answer' tools for coordination
|
|
137
|
-
4. DO NOT execute any
|
|
138
|
-
5. Save
|
|
115
|
+
4. Execute read-only actions - DO NOT execute any actions that have side effects (e.g., sending messages, modifying data)
|
|
116
|
+
5. Save actions that have side effects for the final presentation phase when the winning agent will implement the plan
|
|
139
117
|
|
|
140
118
|
Focus on planning, analysis, and coordination rather than execution.
|
|
@@ -134,8 +134,8 @@ ui:
|
|
|
134
134
|
|
|
135
135
|
# Orchestrator Settings with Coordination Configuration
|
|
136
136
|
orchestrator:
|
|
137
|
-
snapshot_storage: "
|
|
138
|
-
agent_temporary_workspace: "
|
|
137
|
+
snapshot_storage: "snapshots" # Directory for workspace snapshots
|
|
138
|
+
agent_temporary_workspace: "temp_workspaces" # Directory for temporary agent workspaces
|
|
139
139
|
coordination:
|
|
140
140
|
enable_planning_mode: true
|
|
141
141
|
planning_mode_instruction: |
|
|
@@ -144,8 +144,7 @@ orchestrator:
|
|
|
144
144
|
1. Describe your intended file operations and reasoning
|
|
145
145
|
2. Analyze other agents' proposals for the filesystem tasks
|
|
146
146
|
3. Use only the 'vote' or 'new_answer' tools for coordination
|
|
147
|
-
4. DO NOT execute any
|
|
148
|
-
5. Save
|
|
147
|
+
4. Execute read-only actions - DO NOT execute any actions that have side effects (e.g., sending messages, modifying data)
|
|
148
|
+
5. Save actions that have side effects for the final presentation phase when the winning agent will implement the plan
|
|
149
149
|
|
|
150
|
-
Focus on planning, analysis, and coordination rather than execution.
|
|
151
|
-
Example: "I would create a 'src' directory and write a main.py file..." rather than actually creating them.
|
|
150
|
+
Focus on planning, analysis, and coordination rather than execution.
|
|
@@ -135,8 +135,8 @@ ui:
|
|
|
135
135
|
|
|
136
136
|
# Orchestrator Settings with Coordination Configuration
|
|
137
137
|
orchestrator:
|
|
138
|
-
snapshot_storage: "
|
|
139
|
-
agent_temporary_workspace: "
|
|
138
|
+
snapshot_storage: "snapshots" # Directory for workspace snapshots
|
|
139
|
+
agent_temporary_workspace: "temp_workspaces" # Directory for temporary agent workspaces
|
|
140
140
|
coordination:
|
|
141
141
|
enable_planning_mode: true
|
|
142
142
|
planning_mode_instruction: |
|
|
@@ -145,7 +145,7 @@ orchestrator:
|
|
|
145
145
|
1. Describe your intended actions and reasoning
|
|
146
146
|
2. Analyze other agents' proposals
|
|
147
147
|
3. Use only the 'vote' or 'new_answer' tools for coordination
|
|
148
|
-
4. DO NOT execute any
|
|
149
|
-
5. Save
|
|
148
|
+
4. Execute read-only actions - DO NOT execute any actions that have side effects (e.g., sending messages, modifying data)
|
|
149
|
+
5. Save actions that have side effects for the final presentation phase when the winning agent will implement the plan
|
|
150
150
|
|
|
151
151
|
Focus on planning, analysis, and coordination rather than execution.
|
|
@@ -139,8 +139,8 @@ ui:
|
|
|
139
139
|
|
|
140
140
|
# Orchestrator Settings with Coordination Configuration
|
|
141
141
|
orchestrator:
|
|
142
|
-
snapshot_storage: "
|
|
143
|
-
agent_temporary_workspace: "
|
|
142
|
+
snapshot_storage: "snapshots" # Directory for workspace snapshots
|
|
143
|
+
agent_temporary_workspace: "temp_workspaces" # Directory for temporary agent workspaces
|
|
144
144
|
coordination:
|
|
145
145
|
enable_planning_mode: true
|
|
146
146
|
planning_mode_instruction: |
|
|
@@ -149,7 +149,7 @@ orchestrator:
|
|
|
149
149
|
1. Describe your intended actions and reasoning
|
|
150
150
|
2. Analyze other agents' proposals
|
|
151
151
|
3. Use only the 'vote' or 'new_answer' tools for coordination
|
|
152
|
-
4. DO NOT execute any
|
|
153
|
-
5. Save
|
|
152
|
+
4. Execute read-only actions - DO NOT execute any actions that have side effects (e.g., sending messages, modifying data)
|
|
153
|
+
5. Save actions that have side effects for the final presentation phase when the winning agent will implement the plan
|
|
154
154
|
|
|
155
155
|
Focus on planning, analysis, and coordination rather than execution.
|
|
@@ -67,7 +67,7 @@ orchestrator:
|
|
|
67
67
|
2. Analyze other agents' proposals
|
|
68
68
|
3. Use only the 'vote' or 'new_answer' tools for coordination
|
|
69
69
|
4. You CAN use web search for information gathering
|
|
70
|
-
5. DO NOT execute
|
|
71
|
-
6. Save
|
|
70
|
+
5. DO NOT execute any actions that have side effects (e.g., sending messages, modifying data)
|
|
71
|
+
6. Save actions that have side effects for the final presentation phase when the winning agent will implement the plan
|
|
72
72
|
|
|
73
73
|
Focus on planning, analysis, and coordination rather than execution.
|
massgen/docker/README.md
CHANGED
|
@@ -115,6 +115,7 @@ agents:
|
|
|
115
115
|
| `command_line_docker_memory_limit` | None | Memory limit (e.g., `"2g"`, `"512m"`) |
|
|
116
116
|
| `command_line_docker_cpu_limit` | None | CPU cores limit (e.g., `2.0`) |
|
|
117
117
|
| `command_line_docker_network_mode` | `"none"` | `"none"`, `"bridge"`, or `"host"` |
|
|
118
|
+
| `command_line_docker_enable_sudo` | `false` | Enable sudo in containers (isolated from host) |
|
|
118
119
|
|
|
119
120
|
## How It Works
|
|
120
121
|
|
|
@@ -204,6 +205,88 @@ docker build -t my-custom-runtime:latest -f Dockerfile.custom .
|
|
|
204
205
|
command_line_docker_image: "my-custom-runtime:latest"
|
|
205
206
|
```
|
|
206
207
|
|
|
208
|
+
### Sudo Variant (Runtime Package Installation)
|
|
209
|
+
|
|
210
|
+
The sudo variant allows agents to install system packages at runtime inside their Docker container.
|
|
211
|
+
|
|
212
|
+
**IMPORTANT: Build the image before first use:**
|
|
213
|
+
```bash
|
|
214
|
+
bash massgen/docker/build.sh --sudo
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
This builds `massgen/mcp-runtime-sudo:latest` with sudo access locally. (This image is not available on Docker Hub - you must build it yourself.)
|
|
218
|
+
|
|
219
|
+
**Enable in config:**
|
|
220
|
+
```yaml
|
|
221
|
+
agent:
|
|
222
|
+
backend:
|
|
223
|
+
cwd: "workspace"
|
|
224
|
+
enable_mcp_command_line: true
|
|
225
|
+
command_line_execution_mode: "docker"
|
|
226
|
+
command_line_docker_enable_sudo: true # Automatically uses sudo image
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
**What agents can do with sudo:**
|
|
230
|
+
```bash
|
|
231
|
+
# Install system packages at runtime
|
|
232
|
+
sudo apt-get update && sudo apt-get install -y ffmpeg
|
|
233
|
+
|
|
234
|
+
# Install additional Python packages
|
|
235
|
+
sudo pip install tensorflow
|
|
236
|
+
|
|
237
|
+
# Modify system configuration inside the container
|
|
238
|
+
sudo apt-get install -y postgresql-client
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
**Security model - Is this safe?**
|
|
242
|
+
|
|
243
|
+
**YES, it's still safe** because Docker container isolation is the primary security boundary:
|
|
244
|
+
|
|
245
|
+
✅ **Container is fully isolated from your host:**
|
|
246
|
+
- Sudo inside container ≠ sudo on your computer
|
|
247
|
+
- Agent can only access mounted volumes (workspace, context paths)
|
|
248
|
+
- Cannot access your host filesystem outside mounts
|
|
249
|
+
- Cannot affect host processes or system configuration
|
|
250
|
+
- Docker namespaces/cgroups provide strong isolation
|
|
251
|
+
|
|
252
|
+
✅ **What sudo can and cannot do:**
|
|
253
|
+
- ✅ Can: Install packages inside the container (apt, pip, npm)
|
|
254
|
+
- ✅ Can: Modify container system configuration
|
|
255
|
+
- ✅ Can: Read/write mounted workspace (same as without sudo)
|
|
256
|
+
- ❌ Cannot: Access your host filesystem outside mounts
|
|
257
|
+
- ❌ Cannot: Affect your host system
|
|
258
|
+
- ❌ Cannot: Break out of the container (unless Docker vulnerability exists)
|
|
259
|
+
|
|
260
|
+
ℹ️ **Note:**
|
|
261
|
+
- Container escape vulnerabilities (CVEs in Docker/kernel) are extremely rare and quickly patched
|
|
262
|
+
- Standard Docker security practices apply
|
|
263
|
+
|
|
264
|
+
❌ **Don't do this (makes it unsafe):**
|
|
265
|
+
- Enabling privileged mode (not exposed in MassGen, would need code changes)
|
|
266
|
+
- Mounting sensitive host paths like `/`, `/etc`, `/usr`
|
|
267
|
+
- Disabling security features like AppArmor/SELinux
|
|
268
|
+
|
|
269
|
+
**When to use sudo variant vs custom images:**
|
|
270
|
+
|
|
271
|
+
| Approach | Use When | Performance | Security |
|
|
272
|
+
|----------|----------|-------------|----------|
|
|
273
|
+
| **Sudo variant** | Need flexibility, unknown packages upfront, prototyping | Slower (runtime install) | Good (container isolated) |
|
|
274
|
+
| **Custom image** | Know packages needed, production use, performance matters | Fast (pre-installed) | Best (minimal attack surface) |
|
|
275
|
+
|
|
276
|
+
**Custom image example (recommended for production):**
|
|
277
|
+
```dockerfile
|
|
278
|
+
FROM massgen/mcp-runtime:latest
|
|
279
|
+
USER root
|
|
280
|
+
RUN apt-get update && apt-get install -y ffmpeg postgresql-client
|
|
281
|
+
USER massgen
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
Build: `docker build -t my-runtime:latest .`
|
|
285
|
+
|
|
286
|
+
Use: `command_line_docker_image: "my-runtime:latest"`
|
|
287
|
+
|
|
288
|
+
**Bottom line:** The sudo variant is safe for most use cases because Docker container isolation is strong. Custom images are preferred for production because they're faster and have a smaller attack surface, but sudo is fine for development and prototyping.
|
|
289
|
+
|
|
207
290
|
## Security Features
|
|
208
291
|
|
|
209
292
|
### Filesystem Isolation
|