massgen 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- massgen/__init__.py +1 -1
- massgen/agent_config.py +33 -7
- massgen/api_params_handler/_api_params_handler_base.py +3 -0
- massgen/api_params_handler/_chat_completions_api_params_handler.py +7 -1
- massgen/backend/azure_openai.py +9 -1
- massgen/backend/base.py +56 -0
- massgen/backend/base_with_custom_tool_and_mcp.py +4 -4
- massgen/backend/capabilities.py +6 -6
- massgen/backend/chat_completions.py +18 -11
- massgen/backend/claude_code.py +9 -1
- massgen/backend/gemini.py +71 -6
- massgen/backend/gemini_utils.py +30 -0
- massgen/backend/grok.py +39 -6
- massgen/backend/response.py +18 -11
- massgen/chat_agent.py +9 -3
- massgen/cli.py +319 -43
- massgen/config_builder.py +163 -18
- massgen/configs/README.md +78 -20
- massgen/configs/basic/multi/three_agents_default.yaml +2 -2
- massgen/configs/debug/restart_test_controlled.yaml +60 -0
- massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
- massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
- massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
- massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
- massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
- massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +67 -0
- massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +68 -0
- massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +98 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +54 -0
- massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
- massgen/configs/tools/memory/README.md +199 -0
- massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +131 -0
- massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +133 -0
- massgen/configs/tools/memory/test_context_window_management.py +286 -0
- massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +97 -0
- massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +7 -29
- massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +5 -6
- massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +4 -4
- massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +4 -4
- massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +2 -2
- massgen/docker/README.md +83 -0
- massgen/filesystem_manager/_code_execution_server.py +22 -7
- massgen/filesystem_manager/_docker_manager.py +21 -1
- massgen/filesystem_manager/_filesystem_manager.py +8 -0
- massgen/filesystem_manager/_workspace_tools_server.py +0 -997
- massgen/formatter/_gemini_formatter.py +73 -0
- massgen/frontend/coordination_ui.py +175 -257
- massgen/frontend/displays/base_display.py +29 -0
- massgen/frontend/displays/rich_terminal_display.py +155 -9
- massgen/frontend/displays/simple_display.py +21 -0
- massgen/frontend/displays/terminal_display.py +22 -2
- massgen/logger_config.py +50 -6
- massgen/message_templates.py +123 -3
- massgen/orchestrator.py +652 -44
- massgen/tests/test_code_execution.py +178 -0
- massgen/tests/test_intelligent_planning_mode.py +643 -0
- massgen/tests/test_orchestration_restart.py +204 -0
- massgen/token_manager/token_manager.py +13 -4
- massgen/tool/__init__.py +4 -0
- massgen/tool/_multimodal_tools/understand_audio.py +193 -0
- massgen/tool/_multimodal_tools/understand_file.py +550 -0
- massgen/tool/_multimodal_tools/understand_image.py +212 -0
- massgen/tool/_multimodal_tools/understand_video.py +313 -0
- massgen/tool/docs/multimodal_tools.md +779 -0
- massgen/tool/workflow_toolkits/__init__.py +26 -0
- massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
- massgen/utils.py +1 -0
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/METADATA +57 -52
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/RECORD +77 -49
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/WHEEL +0 -0
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/entry_points.txt +0 -0
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/top_level.txt +0 -0
massgen/config_builder.py
CHANGED
|
@@ -191,27 +191,27 @@ class ConfigBuilder:
|
|
|
191
191
|
},
|
|
192
192
|
"multimodal": {
|
|
193
193
|
"name": "Multimodal Analysis",
|
|
194
|
-
"description": "Analyze images, audio, and
|
|
194
|
+
"description": "Analyze images, audio, video, and documents",
|
|
195
195
|
"recommended_agents": 2,
|
|
196
196
|
"recommended_tools": ["image_understanding", "audio_understanding", "video_understanding"],
|
|
197
197
|
"agent_types": "all",
|
|
198
|
-
"notes": "
|
|
198
|
+
"notes": "Combines custom tools + built-in backend capabilities",
|
|
199
199
|
"info": """[bold cyan]Features auto-configured for this preset:[/bold cyan]
|
|
200
200
|
|
|
201
|
-
[green]✓[/green] [bold]
|
|
202
|
-
• Analyze images
|
|
203
|
-
•
|
|
204
|
-
•
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
• Transcribe and analyze audio
|
|
208
|
-
• Available for: Claude, ChatCompletion
|
|
201
|
+
[green]✓[/green] [bold]Custom Multimodal Tools (New v0.1.3+)[/bold]
|
|
202
|
+
• understand_image - Analyze workspace images with gpt-4.1
|
|
203
|
+
• understand_audio - Transcribe and analyze audio files
|
|
204
|
+
• understand_video - Extract frames and analyze videos
|
|
205
|
+
• understand_file - Process documents (PDF, DOCX, XLSX, PPTX)
|
|
206
|
+
• Works with any backend, processes workspace files
|
|
209
207
|
|
|
210
|
-
[green]✓[/green] [bold]
|
|
211
|
-
•
|
|
212
|
-
•
|
|
208
|
+
[green]✓[/green] [bold]Built-in Backend Capabilities[/bold] [dim](passive)[/dim]
|
|
209
|
+
• Image understanding via upload_files (OpenAI, Claude, Gemini, Azure)
|
|
210
|
+
• Audio understanding via upload_files (Claude, ChatCompletion)
|
|
211
|
+
• Video understanding via upload_files (Claude, ChatCompletion, OpenAI)
|
|
212
|
+
• Image/audio/video generation (where supported)
|
|
213
213
|
|
|
214
|
-
[dim]Use this for:[/dim] Image analysis,
|
|
214
|
+
[dim]Use this for:[/dim] Image analysis, audio transcription, video analysis, document processing.""",
|
|
215
215
|
},
|
|
216
216
|
}
|
|
217
217
|
|
|
@@ -617,6 +617,20 @@ class ConfigBuilder:
|
|
|
617
617
|
# Build choices for questionary - organized with tool hints
|
|
618
618
|
choices = []
|
|
619
619
|
|
|
620
|
+
# Add spacing before first option (using spaces to avoid line)
|
|
621
|
+
choices.append(questionary.Separator(" "))
|
|
622
|
+
|
|
623
|
+
# First option: Browse existing configs (most common for new users)
|
|
624
|
+
choices.append(
|
|
625
|
+
questionary.Choice(
|
|
626
|
+
title="📦 Browse ready-to-use configs / examples",
|
|
627
|
+
value="__browse_existing__",
|
|
628
|
+
),
|
|
629
|
+
)
|
|
630
|
+
choices.append(questionary.Separator(" "))
|
|
631
|
+
choices.append(questionary.Separator("┄┄ or build from template ┄┄"))
|
|
632
|
+
choices.append(questionary.Separator(" "))
|
|
633
|
+
|
|
620
634
|
# Define display with brief tool descriptions
|
|
621
635
|
display_info = [
|
|
622
636
|
("custom", "⚙️", "Custom Configuration", "Choose your own tools"),
|
|
@@ -643,12 +657,12 @@ class ConfigBuilder:
|
|
|
643
657
|
value=use_case_id,
|
|
644
658
|
),
|
|
645
659
|
)
|
|
660
|
+
|
|
646
661
|
except Exception as e:
|
|
647
662
|
console.print(f"[warning]⚠️ Could not display use case: {e}[/warning]")
|
|
648
663
|
|
|
649
664
|
# Add helpful context before the prompt
|
|
650
|
-
console.print("[dim]
|
|
651
|
-
console.print("[dim]You can customize everything in later steps.[/dim]\n")
|
|
665
|
+
console.print("[dim]Browse ready-to-use configs, or pick a template to build your own.[/dim]\n")
|
|
652
666
|
|
|
653
667
|
use_case_id = questionary.select(
|
|
654
668
|
"Select your use case:",
|
|
@@ -666,6 +680,10 @@ class ConfigBuilder:
|
|
|
666
680
|
if use_case_id is None:
|
|
667
681
|
raise KeyboardInterrupt # User cancelled, exit immediately
|
|
668
682
|
|
|
683
|
+
# Handle special value for browsing existing configs
|
|
684
|
+
if use_case_id == "__browse_existing__":
|
|
685
|
+
return "__browse_existing__"
|
|
686
|
+
|
|
669
687
|
# Show selection with description
|
|
670
688
|
selected_info = self.USE_CASES[use_case_id]
|
|
671
689
|
console.print(f"\n✅ Selected: [green]{selected_info.get('name', use_case_id)}[/green]")
|
|
@@ -1521,6 +1539,83 @@ class ConfigBuilder:
|
|
|
1521
1539
|
|
|
1522
1540
|
console.print(f"✅ Enabled {len(selected_gen)} generation capability(ies)")
|
|
1523
1541
|
|
|
1542
|
+
# Custom multimodal understanding tools (new in v0.1.3+)
|
|
1543
|
+
# Available for ALL use cases - these are active tools that process workspace files
|
|
1544
|
+
console.print()
|
|
1545
|
+
console.print("[cyan]Custom Multimodal Understanding Tools (New in v0.1.3+):[/cyan]")
|
|
1546
|
+
console.print("[dim]These tools let agents analyze workspace files using OpenAI's gpt-4.1 API:[/dim]")
|
|
1547
|
+
console.print("[dim] • Works with any backend (uses OpenAI for analysis)[/dim]")
|
|
1548
|
+
console.print("[dim] • Processes files agents generate or discover during execution[/dim]")
|
|
1549
|
+
console.print("[dim] • Returns structured JSON with detailed metadata[/dim]")
|
|
1550
|
+
console.print("[dim] • Requires OPENAI_API_KEY in your .env file[/dim]")
|
|
1551
|
+
|
|
1552
|
+
# Default to True for multimodal use case, False for others
|
|
1553
|
+
default_add_mm = use_case == "multimodal"
|
|
1554
|
+
|
|
1555
|
+
if questionary.confirm("Add custom multimodal understanding tools?", default=default_add_mm).ask():
|
|
1556
|
+
# Determine default selections based on use case
|
|
1557
|
+
if use_case == "multimodal":
|
|
1558
|
+
# For multimodal preset, select all by default
|
|
1559
|
+
pass
|
|
1560
|
+
elif use_case == "data_analysis":
|
|
1561
|
+
# For data analysis, suggest image and file tools
|
|
1562
|
+
pass
|
|
1563
|
+
else:
|
|
1564
|
+
# For other use cases, none selected by default (let user choose)
|
|
1565
|
+
pass
|
|
1566
|
+
|
|
1567
|
+
if use_case == "multimodal":
|
|
1568
|
+
multimodal_tool_choices = [
|
|
1569
|
+
questionary.Choice("understand_image - Analyze images (PNG, JPEG, JPG)", value="understand_image", checked=True),
|
|
1570
|
+
questionary.Choice("understand_audio - Transcribe and analyze audio", value="understand_audio", checked=True),
|
|
1571
|
+
questionary.Choice("understand_video - Extract frames and analyze video", value="understand_video", checked=True),
|
|
1572
|
+
questionary.Choice("understand_file - Process documents (PDF, DOCX, XLSX, PPTX)", value="understand_file", checked=True),
|
|
1573
|
+
]
|
|
1574
|
+
elif use_case == "data_analysis":
|
|
1575
|
+
multimodal_tool_choices = [
|
|
1576
|
+
questionary.Choice("understand_image - Analyze images (PNG, JPEG, JPG)", value="understand_image", checked=True),
|
|
1577
|
+
questionary.Choice("understand_audio - Transcribe and analyze audio", value="understand_audio", checked=False),
|
|
1578
|
+
questionary.Choice("understand_video - Extract frames and analyze video", value="understand_video", checked=False),
|
|
1579
|
+
questionary.Choice("understand_file - Process documents (PDF, DOCX, XLSX, PPTX)", value="understand_file", checked=True),
|
|
1580
|
+
]
|
|
1581
|
+
else:
|
|
1582
|
+
multimodal_tool_choices = [
|
|
1583
|
+
questionary.Choice("understand_image - Analyze images (PNG, JPEG, JPG)", value="understand_image", checked=False),
|
|
1584
|
+
questionary.Choice("understand_audio - Transcribe and analyze audio", value="understand_audio", checked=False),
|
|
1585
|
+
questionary.Choice("understand_video - Extract frames and analyze video", value="understand_video", checked=False),
|
|
1586
|
+
questionary.Choice("understand_file - Process documents (PDF, DOCX, XLSX, PPTX)", value="understand_file", checked=False),
|
|
1587
|
+
]
|
|
1588
|
+
|
|
1589
|
+
selected_mm_tools = questionary.checkbox(
|
|
1590
|
+
"Select custom multimodal tools (Space to select, Enter to confirm):",
|
|
1591
|
+
choices=multimodal_tool_choices,
|
|
1592
|
+
style=questionary.Style(
|
|
1593
|
+
[
|
|
1594
|
+
("selected", "fg:cyan"),
|
|
1595
|
+
("pointer", "fg:cyan bold"),
|
|
1596
|
+
("highlighted", "fg:cyan"),
|
|
1597
|
+
],
|
|
1598
|
+
),
|
|
1599
|
+
use_arrow_keys=True,
|
|
1600
|
+
).ask()
|
|
1601
|
+
|
|
1602
|
+
if selected_mm_tools:
|
|
1603
|
+
# Initialize custom_tools list if not exists
|
|
1604
|
+
if "custom_tools" not in agent["backend"]:
|
|
1605
|
+
agent["backend"]["custom_tools"] = []
|
|
1606
|
+
|
|
1607
|
+
# Add selected tools
|
|
1608
|
+
for tool_name in selected_mm_tools:
|
|
1609
|
+
tool_config = {
|
|
1610
|
+
"name": [tool_name],
|
|
1611
|
+
"category": "multimodal",
|
|
1612
|
+
"path": f"massgen/tool/_multimodal_tools/{tool_name}.py",
|
|
1613
|
+
"function": [tool_name],
|
|
1614
|
+
}
|
|
1615
|
+
agent["backend"]["custom_tools"].append(tool_config)
|
|
1616
|
+
|
|
1617
|
+
console.print(f"✅ Added {len(selected_mm_tools)} custom multimodal tool(s)")
|
|
1618
|
+
|
|
1524
1619
|
# MCP servers (custom only)
|
|
1525
1620
|
# Note: Filesystem is handled internally above, NOT as external MCP
|
|
1526
1621
|
if "mcp" in provider_info.get("supports", []):
|
|
@@ -1807,12 +1902,18 @@ class ConfigBuilder:
|
|
|
1807
1902
|
"code_execution": "💻 Code execution",
|
|
1808
1903
|
"web_search": "🔍 Web search",
|
|
1809
1904
|
"mcp": "🔌 MCP servers",
|
|
1905
|
+
"image_understanding": "📷 Image understanding (backend capability)",
|
|
1906
|
+
"audio_understanding": "🎵 Audio understanding (backend capability)",
|
|
1907
|
+
"video_understanding": "🎬 Video understanding (backend capability)",
|
|
1810
1908
|
}.get(tool, tool)
|
|
1811
1909
|
console.print(f" • {tool_display}")
|
|
1812
1910
|
|
|
1813
1911
|
if use_case == "coding_docker":
|
|
1814
1912
|
console.print(" • 🐳 Docker isolated execution")
|
|
1815
1913
|
|
|
1914
|
+
if use_case == "multimodal":
|
|
1915
|
+
console.print(" • 🎨 Custom multimodal tools (understand_image, understand_audio, understand_video, understand_file)")
|
|
1916
|
+
|
|
1816
1917
|
console.print()
|
|
1817
1918
|
|
|
1818
1919
|
# Let users select models for each agent
|
|
@@ -2302,7 +2403,7 @@ class ConfigBuilder:
|
|
|
2302
2403
|
orchestrator_config = {}
|
|
2303
2404
|
orchestrator_config["session_storage"] = "sessions"
|
|
2304
2405
|
console.print()
|
|
2305
|
-
console.print(" ✅ Multi-turn sessions enabled (supports persistent conversations
|
|
2406
|
+
console.print(" ✅ Multi-turn sessions enabled (supports persistent conversations)")
|
|
2306
2407
|
|
|
2307
2408
|
# Planning Mode (for MCP irreversible actions) - only ask if MCPs are configured
|
|
2308
2409
|
has_mcp = any(a.get("backend", {}).get("mcp_servers") for a in agents)
|
|
@@ -2321,6 +2422,30 @@ class ConfigBuilder:
|
|
|
2321
2422
|
console.print()
|
|
2322
2423
|
console.print(" ✅ Planning mode enabled - MCP tools will plan without executing during coordination")
|
|
2323
2424
|
|
|
2425
|
+
# Orchestration Restart Feature
|
|
2426
|
+
console.print()
|
|
2427
|
+
console.print(" [dim]Orchestration Restart: Automatic quality checks with self-correction[/dim]")
|
|
2428
|
+
console.print(" [dim]• Agent evaluates its own answer after coordination[/dim]")
|
|
2429
|
+
console.print(" [dim]• Can restart with specific improvement instructions if incomplete[/dim]")
|
|
2430
|
+
console.print(" [dim]• Each attempt gets isolated logs in attempt_1/, attempt_2/, etc.[/dim]")
|
|
2431
|
+
console.print(" [dim]• Works with all backends (OpenAI, Claude, Gemini, Grok, etc.)[/dim]")
|
|
2432
|
+
console.print(" [dim]• 0 = no restarts (default), 1-2 = recommended, 3 = maximum[/dim]")
|
|
2433
|
+
console.print()
|
|
2434
|
+
|
|
2435
|
+
restart_input = Prompt.ask(
|
|
2436
|
+
" [prompt]Max orchestration restarts (0-3)[/prompt]",
|
|
2437
|
+
choices=["0", "1", "2", "3"],
|
|
2438
|
+
default="0",
|
|
2439
|
+
)
|
|
2440
|
+
|
|
2441
|
+
max_restarts = int(restart_input)
|
|
2442
|
+
if max_restarts > 0:
|
|
2443
|
+
if "coordination" not in orchestrator_config:
|
|
2444
|
+
orchestrator_config["coordination"] = {}
|
|
2445
|
+
orchestrator_config["coordination"]["max_orchestration_restarts"] = max_restarts
|
|
2446
|
+
console.print()
|
|
2447
|
+
console.print(f" ✅ Orchestration restart enabled: up to {max_restarts} restart(s) allowed")
|
|
2448
|
+
|
|
2324
2449
|
# Voting Sensitivity - only ask for multi-agent setups
|
|
2325
2450
|
if len(agents) > 1:
|
|
2326
2451
|
console.print()
|
|
@@ -2634,6 +2759,21 @@ class ConfigBuilder:
|
|
|
2634
2759
|
console.print("[warning]⚠️ No use case selected.[/warning]")
|
|
2635
2760
|
return None
|
|
2636
2761
|
|
|
2762
|
+
# Handle special case: user wants to browse existing configs
|
|
2763
|
+
if use_case == "__browse_existing__":
|
|
2764
|
+
console.print("\n[cyan]Opening config selector...[/cyan]\n")
|
|
2765
|
+
# Import here to avoid circular dependency
|
|
2766
|
+
from .cli import interactive_config_selector
|
|
2767
|
+
|
|
2768
|
+
selected_config = interactive_config_selector()
|
|
2769
|
+
if selected_config:
|
|
2770
|
+
console.print(f"\n[green]✓ Selected config: {selected_config}[/green]\n")
|
|
2771
|
+
# Return the selected config as if it was created
|
|
2772
|
+
return (selected_config, None)
|
|
2773
|
+
else:
|
|
2774
|
+
console.print("\n[yellow]⚠️ No config selected[/yellow]\n")
|
|
2775
|
+
return None
|
|
2776
|
+
|
|
2637
2777
|
# Step 2: Configure agents
|
|
2638
2778
|
agents = self.configure_agents(use_case, api_keys)
|
|
2639
2779
|
if not agents:
|
|
@@ -2652,7 +2792,12 @@ class ConfigBuilder:
|
|
|
2652
2792
|
filepath = self.review_and_save(agents, orchestrator_config)
|
|
2653
2793
|
|
|
2654
2794
|
if filepath:
|
|
2655
|
-
#
|
|
2795
|
+
# In default_mode (first-run), skip "Run now?" and go straight to interactive mode
|
|
2796
|
+
if self.default_mode:
|
|
2797
|
+
# Config already saved by review_and_save(), just return to launch interactive mode
|
|
2798
|
+
return (filepath, None)
|
|
2799
|
+
|
|
2800
|
+
# In regular --init mode, ask if user wants to run now
|
|
2656
2801
|
run_choice = Confirm.ask("\n[prompt]Run MassGen with this configuration now?[/prompt]", default=True)
|
|
2657
2802
|
if run_choice is None:
|
|
2658
2803
|
raise KeyboardInterrupt # User cancelled
|
massgen/configs/README.md
CHANGED
|
@@ -227,44 +227,102 @@ Most configurations use environment variables for API keys:so
|
|
|
227
227
|
|
|
228
228
|
## Release History & Examples
|
|
229
229
|
|
|
230
|
-
### v0.1.
|
|
231
|
-
**New Features:**
|
|
230
|
+
### v0.1.3 - Latest
|
|
231
|
+
**New Features:** Post-Evaluation Workflow, Custom Multimodal Understanding Tools, Docker Sudo Mode
|
|
232
232
|
|
|
233
233
|
**Configuration Files:**
|
|
234
|
-
- `
|
|
235
|
-
- `
|
|
236
|
-
- `
|
|
234
|
+
- `configs/tools/custom_tools/multimodal_tools/understand_image.yaml` - Image analysis configuration
|
|
235
|
+
- `configs/tools/custom_tools/multimodal_tools/understand_audio.yaml` - Audio transcription configuration
|
|
236
|
+
- `configs/tools/custom_tools/multimodal_tools/understand_video.yaml` - Video analysis configuration
|
|
237
|
+
- `configs/tools/custom_tools/multimodal_tools/understand_file.yaml` - Document processing configuration
|
|
238
|
+
|
|
239
|
+
**Documentation:**
|
|
240
|
+
- `massgen/tool/docs/multimodal_tools.md` - Complete 779-line multimodal tools guide
|
|
241
|
+
- `docs/source/user_guide/multimodal.rst` - Updated multimodal documentation with custom tools
|
|
242
|
+
- `docs/source/user_guide/code_execution.rst` - Enhanced with 98 lines documenting sudo mode
|
|
243
|
+
- `massgen/docker/README.md` - Updated Docker documentation with sudo mode instructions
|
|
237
244
|
|
|
238
245
|
**Case Study:**
|
|
239
|
-
- [
|
|
246
|
+
- [Multimodal Video Understanding](../../docs/case_studies/multimodal-case-study-video-analysis.md)
|
|
247
|
+
|
|
248
|
+
**Example Resources:**
|
|
249
|
+
- `configs/resources/v0.1.3-example/multimodality.jpg` - Image example
|
|
250
|
+
- `configs/resources/v0.1.3-example/Sherlock_Holmes.mp3` - Audio example
|
|
251
|
+
- `configs/resources/v0.1.3-example/oppenheimer_trailer_1920.mp4` - Video example
|
|
252
|
+
- `configs/resources/v0.1.3-example/TUMIX.pdf` - PDF document example
|
|
240
253
|
|
|
241
254
|
**Key Features:**
|
|
242
|
-
- **
|
|
243
|
-
- **
|
|
244
|
-
- **
|
|
245
|
-
- **
|
|
246
|
-
- **Self-Evolution**: Agents autonomously analyze GitHub issues and market trends for data-driven roadmaps
|
|
247
|
-
- **Gemini Refactoring**: Enhanced architecture with separate MCP manager and tracking modules
|
|
255
|
+
- **Post-Evaluation Tools**: Submit and restart capabilities for winning agents with confidence assessments
|
|
256
|
+
- **Multimodal Understanding**: Analyze images, audio, video, and documents using GPT-4.1
|
|
257
|
+
- **Docker Sudo Mode**: Execute privileged commands in containerized environments
|
|
258
|
+
- **Config Builder**: Improved workflow with auto-detection and better provider handling
|
|
248
259
|
|
|
249
260
|
**Try it:**
|
|
250
261
|
```bash
|
|
251
262
|
# Install or upgrade
|
|
252
263
|
pip install --upgrade massgen
|
|
253
264
|
|
|
254
|
-
#
|
|
255
|
-
|
|
265
|
+
# Try multimodal image understanding
|
|
266
|
+
# (Requires OPENAI_API_KEY in .env)
|
|
267
|
+
massgen --config @examples/tools/custom_tools/multimodal_tools/understand_image \
|
|
268
|
+
"Please summarize the content in this image."
|
|
269
|
+
|
|
270
|
+
# Try multimodal audio understanding
|
|
271
|
+
massgen --config @examples/tools/custom_tools/multimodal_tools/understand_audio \
|
|
272
|
+
"Please summarize the content in this audio."
|
|
273
|
+
|
|
274
|
+
# Try multimodal video understanding
|
|
275
|
+
massgen --config @examples/tools/custom_tools/multimodal_tools/understand_video \
|
|
276
|
+
"What's happening in this video?"
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
### v0.1.2
|
|
280
|
+
**New Features:** Intelligent Planning Mode, Claude 4.5 Haiku Support, Grok Web Search Improvements
|
|
281
|
+
|
|
282
|
+
**Configuration Files:**
|
|
283
|
+
- `configs/tools/planning/` - 5 planning mode configurations with selective blocking
|
|
284
|
+
- `configs/basic/multi/three_agents_default.yaml` - Updated with Grok-4-fast model
|
|
285
|
+
|
|
286
|
+
**Documentation:**
|
|
287
|
+
- `docs/case_studies/INTELLIGENT_PLANNING_MODE.md` - Complete intelligent planning mode guide
|
|
288
|
+
|
|
289
|
+
**Key Features:**
|
|
290
|
+
- **Intelligent Planning Mode**: Automatic analysis of question irreversibility for dynamic MCP tool blocking
|
|
291
|
+
- **Selective Tool Blocking**: Granular control over which MCP tools are blocked during planning
|
|
292
|
+
- **Enhanced Safety**: Read-only operations allowed, write operations blocked during coordination
|
|
293
|
+
- **Latest Models**: Claude 4.5 Haiku support with updated model priorities
|
|
256
294
|
|
|
295
|
+
**Try it:**
|
|
296
|
+
```bash
|
|
297
|
+
# Try intelligent planning mode with MCP tools
|
|
298
|
+
# (Please read the YAML file for required API keys: DISCORD_TOKEN, OPENAI_API_KEY, etc.)
|
|
299
|
+
massgen --config @examples/tools/planning/five_agents_discord_mcp_planning_mode \
|
|
300
|
+
"Check recent messages in our development channel, summarize the discussion, and post a helpful response about the current topic."
|
|
301
|
+
|
|
302
|
+
# Use latest Claude 4.5 Haiku model
|
|
303
|
+
# (Requires ANTHROPIC_API_KEY in .env)
|
|
304
|
+
massgen --model claude-haiku-4-5-20251001 \
|
|
305
|
+
"Summarize the latest AI developments"
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
### v0.1.1
|
|
309
|
+
**New Features:** Custom Tools System, Voting Sensitivity Controls, Interactive Configuration Builder
|
|
310
|
+
|
|
311
|
+
**Key Features:**
|
|
312
|
+
- Custom tools registration using `ToolManager` class
|
|
313
|
+
- Three-tier voting system (lenient/balanced/strict)
|
|
314
|
+
- 40+ custom tool examples
|
|
315
|
+
- Backend capabilities registry
|
|
316
|
+
|
|
317
|
+
**Try it:**
|
|
318
|
+
```bash
|
|
257
319
|
# Try custom tools with agents
|
|
258
320
|
massgen --config @examples/tools/custom_tools/claude_custom_tool_example \
|
|
259
|
-
"
|
|
321
|
+
"whats the sum of 123 and 456?"
|
|
260
322
|
|
|
261
323
|
# Test voting sensitivity controls
|
|
262
324
|
massgen --config @examples/voting/gemini_gpt_voting_sensitivity \
|
|
263
|
-
"
|
|
264
|
-
|
|
265
|
-
# Self-evolution with GitHub issue analysis
|
|
266
|
-
massgen --config @examples/tools/custom_tools/github_issue_market_analysis \
|
|
267
|
-
"Analyze the massgen dir and MassGen GitHub issues to understand what features users are requesting. Also research current trends in multi-agent AI systems and LLM orchestration. Based on the existing code, the open issues and market trends, write a prioritized recommendation report for the next release."
|
|
325
|
+
"Your question here"
|
|
268
326
|
```
|
|
269
327
|
|
|
270
328
|
### v0.1.0
|
|
@@ -17,7 +17,7 @@ agents:
|
|
|
17
17
|
text:
|
|
18
18
|
verbosity: "medium"
|
|
19
19
|
reasoning:
|
|
20
|
-
effort: "
|
|
20
|
+
effort: "low"
|
|
21
21
|
summary: "auto"
|
|
22
22
|
enable_web_search: true
|
|
23
23
|
enable_code_interpreter: true
|
|
@@ -26,7 +26,7 @@ agents:
|
|
|
26
26
|
- id: "grok3mini"
|
|
27
27
|
backend:
|
|
28
28
|
type: "grok"
|
|
29
|
-
model: "grok-
|
|
29
|
+
model: "grok-4-fast"
|
|
30
30
|
enable_web_search: true
|
|
31
31
|
# system_message: "You are a helpful AI assistant with web search capabilities. For any question involving current events, recent information, or real-time data, ALWAYS use web search first."
|
|
32
32
|
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Two Agent Orchestration Restart Test Configuration (No Filesystem)
|
|
2
|
+
# This configuration demonstrates the orchestration restart feature for self-correcting coordination
|
|
3
|
+
# Uses debug_final_answer to force restart on first attempt for testing
|
|
4
|
+
#
|
|
5
|
+
# Setup Requirements:
|
|
6
|
+
# 1. Set environment variable in .env file:
|
|
7
|
+
# OPENAI_API_KEY="your_openai_key"
|
|
8
|
+
#
|
|
9
|
+
# Example usage:
|
|
10
|
+
# massgen --config @examples/configs/debug/restart_test_controlled.yaml "Describe two famous Beatles members: John Lennon and Paul McCartney. Include their birth year, role in the band, and notable contributions."
|
|
11
|
+
#
|
|
12
|
+
# Expected behavior:
|
|
13
|
+
# - Attempt 1: Agents coordinate, final presentation shows debug override (incomplete answer)
|
|
14
|
+
# - Post-evaluation detects incomplete answer and calls restart_orchestration()
|
|
15
|
+
# - Restart banner displays with reason and instructions
|
|
16
|
+
# - Attempt 2: Agents see restart context and produce complete answer
|
|
17
|
+
# - Post-evaluation approves and calls submit()
|
|
18
|
+
# - Inspection menu appears
|
|
19
|
+
|
|
20
|
+
agents:
|
|
21
|
+
- id: "agent_a"
|
|
22
|
+
backend:
|
|
23
|
+
type: "openai"
|
|
24
|
+
model: "gpt-5-nano"
|
|
25
|
+
# type: "gemini"
|
|
26
|
+
# model: "gemini-2.5-flash"
|
|
27
|
+
# type: "grok"
|
|
28
|
+
# model: "grok-4-fast"
|
|
29
|
+
# type: "claude"
|
|
30
|
+
# model: "claude-sonnet-4-5-20250929"
|
|
31
|
+
# type: "claude_code"
|
|
32
|
+
# model: "claude-sonnet-4-5-20250929"
|
|
33
|
+
# cwd: "workspace1"
|
|
34
|
+
|
|
35
|
+
- id: "agent_b"
|
|
36
|
+
backend:
|
|
37
|
+
type: "openai"
|
|
38
|
+
model: "gpt-5-nano"
|
|
39
|
+
|
|
40
|
+
orchestrator:
|
|
41
|
+
snapshot_storage: "snapshots"
|
|
42
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
43
|
+
|
|
44
|
+
# Restart configuration
|
|
45
|
+
coordination:
|
|
46
|
+
max_orchestration_restarts: 2 # Allow up to 2 restarts (3 total attempts)
|
|
47
|
+
|
|
48
|
+
# DEBUG MODE: Override final answer on attempt 1 to simulate incomplete response
|
|
49
|
+
debug_final_answer: |
|
|
50
|
+
John Lennon was born in 1940 in Liverpool, England. He was a founding member
|
|
51
|
+
of The Beatles and served as rhythm guitarist and vocalist. He co-wrote many
|
|
52
|
+
of the band's greatest hits with Paul McCartney. After the Beatles disbanded,
|
|
53
|
+
he pursued a successful solo career and became known for his peace activism.
|
|
54
|
+
|
|
55
|
+
# Use lenient voting for faster coordination in debug mode
|
|
56
|
+
voting_sensitivity: "lenient"
|
|
57
|
+
|
|
58
|
+
ui:
|
|
59
|
+
display_type: "rich_terminal"
|
|
60
|
+
logging_enabled: true
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# Two Agent Orchestration Restart Test Configuration with Filesystem
|
|
2
|
+
# This configuration demonstrates the orchestration restart feature with MCP filesystem tools
|
|
3
|
+
# Uses debug_final_answer to force restart on first attempt for testing file creation tasks
|
|
4
|
+
#
|
|
5
|
+
# Setup Requirements:
|
|
6
|
+
# 1. Set environment variable in .env file:
|
|
7
|
+
# OPENAI_API_KEY="your_openai_key"
|
|
8
|
+
# 2. Filesystem MCP servers will be automatically available via npx
|
|
9
|
+
#
|
|
10
|
+
# Example usage:
|
|
11
|
+
# massgen --config @examples/configs/debug/restart_test_controlled_filesystem.yaml "Create three Python files: main.py with a hello() function, utils.py with a helper() function, and tests.py with a test_hello() function."
|
|
12
|
+
#
|
|
13
|
+
# Expected behavior:
|
|
14
|
+
# - Attempt 1: Agents coordinate with planning mode (no file execution during coordination)
|
|
15
|
+
# - Final presentation shows debug override claiming only main.py was created
|
|
16
|
+
# - Post-evaluation uses MCP tools to verify files and detects missing files
|
|
17
|
+
# - Calls restart_orchestration() with specific instructions
|
|
18
|
+
# - Restart banner displays
|
|
19
|
+
# - Attempt 2: Agents coordinate with restart instructions to create ALL files
|
|
20
|
+
# - Post-evaluation verifies all files exist using MCP tools
|
|
21
|
+
# - Calls submit() - Inspection menu appears
|
|
22
|
+
|
|
23
|
+
agents:
|
|
24
|
+
- id: "agent_a"
|
|
25
|
+
backend:
|
|
26
|
+
type: "openai"
|
|
27
|
+
model: "gpt-5-nano"
|
|
28
|
+
# type: "gemini"
|
|
29
|
+
# model: "gemini-2.5-flash"
|
|
30
|
+
# type: "grok"
|
|
31
|
+
# model: "grok-4-fast"
|
|
32
|
+
# type: "claude"
|
|
33
|
+
# model: "claude-sonnet-4-5-20250929"
|
|
34
|
+
# type: "claude_code"
|
|
35
|
+
# model: "claude-sonnet-4-5-20250929"
|
|
36
|
+
cwd: "workspace1"
|
|
37
|
+
|
|
38
|
+
# - id: "agent_b"
|
|
39
|
+
# backend:
|
|
40
|
+
# type: "openai"
|
|
41
|
+
# model: "gpt-5-nano"
|
|
42
|
+
# cwd: "workspace2"
|
|
43
|
+
|
|
44
|
+
orchestrator:
|
|
45
|
+
snapshot_storage: "snapshots"
|
|
46
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
47
|
+
|
|
48
|
+
# Restart configuration
|
|
49
|
+
coordination:
|
|
50
|
+
max_orchestration_restarts: 2 # Allow up to 2 restarts (3 total attempts)
|
|
51
|
+
|
|
52
|
+
# DEBUG MODE: Override final answer on attempt 1 to simulate incomplete file creation
|
|
53
|
+
debug_final_answer: |
|
|
54
|
+
I created a hello.txt file.
|
|
55
|
+
|
|
56
|
+
# file with the following content:
|
|
57
|
+
|
|
58
|
+
# ```python
|
|
59
|
+
# def hello():
|
|
60
|
+
# print("Hello, World!")
|
|
61
|
+
|
|
62
|
+
# if __name__ == "__main__":
|
|
63
|
+
# hello()
|
|
64
|
+
# ```
|
|
65
|
+
|
|
66
|
+
# The file has been created successfully.
|
|
67
|
+
|
|
68
|
+
# Use lenient voting for faster coordination in debug mode
|
|
69
|
+
voting_sensitivity: "lenient"
|
|
70
|
+
|
|
71
|
+
ui:
|
|
72
|
+
display_type: "rich_terminal"
|
|
73
|
+
logging_enabled: true
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Docker Isolation with Sudo Support
|
|
2
|
+
# Run with: uv run python -m massgen.cli --config massgen/configs/tools/code-execution/docker_with_sudo.yaml "Write a factorial function and test it"
|
|
3
|
+
# Expected behavior: Commands execute in isolated Docker container with sudo access for runtime package installation
|
|
4
|
+
#
|
|
5
|
+
# Prerequisites:
|
|
6
|
+
# 1. Docker installed and running
|
|
7
|
+
# 2. Build sudo image FIRST: bash massgen/docker/build.sh --sudo
|
|
8
|
+
#
|
|
9
|
+
# Security: Safe for most use cases - Docker container isolation prevents host access even with sudo.
|
|
10
|
+
|
|
11
|
+
agent:
|
|
12
|
+
id: "docker_sudo_agent"
|
|
13
|
+
backend:
|
|
14
|
+
type: "openai"
|
|
15
|
+
model: "gpt-5-mini"
|
|
16
|
+
cwd: "workspace1"
|
|
17
|
+
|
|
18
|
+
# Enable command execution with Docker isolation and sudo
|
|
19
|
+
enable_mcp_command_line: true
|
|
20
|
+
command_line_execution_mode: "docker"
|
|
21
|
+
command_line_docker_enable_sudo: true # Enables sudo in container
|
|
22
|
+
|
|
23
|
+
# Optional: Specify custom image (default auto-switches to sudo variant)
|
|
24
|
+
# command_line_docker_image: "massgen/mcp-runtime-sudo:latest"
|
|
25
|
+
|
|
26
|
+
orchestrator:
|
|
27
|
+
snapshot_storage: "snapshots"
|
|
28
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
29
|
+
|
|
30
|
+
timeout_settings:
|
|
31
|
+
orchestrator_timeout_seconds: 180 # Docker startup adds ~5-10s overhead
|
|
32
|
+
|
|
33
|
+
ui:
|
|
34
|
+
display_type: "rich_terminal"
|
|
35
|
+
logging_enabled: true
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# MassGen Configuration: Computer Use - Browser Automation
|
|
2
|
+
#
|
|
3
|
+
# This configuration is optimized for browser-based tasks using Playwright.
|
|
4
|
+
#
|
|
5
|
+
# Usage:
|
|
6
|
+
# massgen --config @examples/tools/custom_tools/computer_use_browser_example "Check the latest OpenAI news on bing.com"
|
|
7
|
+
#
|
|
8
|
+
# Prerequisites:
|
|
9
|
+
# 1. Set OPENAI_API_KEY in your .env file
|
|
10
|
+
# 2. Install Playwright: pip install playwright && playwright install
|
|
11
|
+
|
|
12
|
+
agents:
|
|
13
|
+
- id: "browser_automation_agent"
|
|
14
|
+
backend:
|
|
15
|
+
type: "openai"
|
|
16
|
+
model: "computer-use-preview"
|
|
17
|
+
custom_tools:
|
|
18
|
+
- name: ["computer_use"]
|
|
19
|
+
category: "automation"
|
|
20
|
+
path: "massgen/tool/_computer_use/computer_use_tool.py"
|
|
21
|
+
function: ["computer_use"]
|
|
22
|
+
# Default parameters for browser automation
|
|
23
|
+
default_params:
|
|
24
|
+
environment: "browser"
|
|
25
|
+
display_width: 1280
|
|
26
|
+
display_height: 720
|
|
27
|
+
max_iterations: 30
|
|
28
|
+
include_reasoning: true
|
|
29
|
+
|
|
30
|
+
system_message: |
|
|
31
|
+
You are a browser automation specialist with access to the computer_use tool.
|
|
32
|
+
|
|
33
|
+
Your capabilities include:
|
|
34
|
+
- Web navigation and search
|
|
35
|
+
- Form filling and submission
|
|
36
|
+
- Data extraction from websites
|
|
37
|
+
- Multi-step web workflows
|
|
38
|
+
- Screenshot capture and analysis
|
|
39
|
+
|
|
40
|
+
When using the computer_use tool:
|
|
41
|
+
1. Provide clear, specific task descriptions
|
|
42
|
+
2. The tool operates in a browser environment (Playwright)
|
|
43
|
+
3. You'll receive screenshots after each action
|
|
44
|
+
4. Acknowledge safety warnings when they appear
|
|
45
|
+
5. Be patient - complex tasks may require multiple iterations
|
|
46
|
+
|
|
47
|
+
Best practices:
|
|
48
|
+
- Break complex tasks into simple steps
|
|
49
|
+
- Verify each step completed successfully before proceeding
|
|
50
|
+
- Use wait actions when pages are loading
|
|
51
|
+
- Be specific about what you're looking for on the page
|
|
52
|
+
|
|
53
|
+
ui:
|
|
54
|
+
display_type: "detailed"
|
|
55
|
+
logging_enabled: true
|
|
56
|
+
show_screenshots: true
|