massgen 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (82) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +33 -7
  3. massgen/api_params_handler/_api_params_handler_base.py +3 -0
  4. massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
  5. massgen/api_params_handler/_claude_api_params_handler.py +4 -0
  6. massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
  7. massgen/api_params_handler/_response_api_params_handler.py +4 -0
  8. massgen/backend/azure_openai.py +9 -1
  9. massgen/backend/base.py +4 -0
  10. massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
  11. massgen/backend/claude_code.py +9 -1
  12. massgen/backend/docs/permissions_and_context_files.md +2 -2
  13. massgen/backend/gemini.py +35 -6
  14. massgen/backend/gemini_utils.py +30 -0
  15. massgen/backend/response.py +2 -0
  16. massgen/chat_agent.py +9 -3
  17. massgen/cli.py +291 -43
  18. massgen/config_builder.py +163 -18
  19. massgen/configs/README.md +69 -14
  20. massgen/configs/debug/restart_test_controlled.yaml +60 -0
  21. massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
  22. massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
  23. massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
  24. massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
  25. massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
  26. massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
  27. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
  28. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
  29. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
  30. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
  31. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
  32. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
  33. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
  34. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
  35. massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
  36. massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
  37. massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
  38. massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
  39. massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
  40. massgen/docker/README.md +83 -0
  41. massgen/filesystem_manager/_code_execution_server.py +22 -7
  42. massgen/filesystem_manager/_docker_manager.py +21 -1
  43. massgen/filesystem_manager/_filesystem_manager.py +9 -0
  44. massgen/filesystem_manager/_path_permission_manager.py +148 -0
  45. massgen/filesystem_manager/_workspace_tools_server.py +0 -997
  46. massgen/formatter/_gemini_formatter.py +73 -0
  47. massgen/frontend/coordination_ui.py +175 -257
  48. massgen/frontend/displays/base_display.py +29 -0
  49. massgen/frontend/displays/rich_terminal_display.py +155 -9
  50. massgen/frontend/displays/simple_display.py +21 -0
  51. massgen/frontend/displays/terminal_display.py +22 -2
  52. massgen/logger_config.py +50 -6
  53. massgen/message_templates.py +283 -15
  54. massgen/orchestrator.py +335 -38
  55. massgen/tests/test_binary_file_blocking.py +274 -0
  56. massgen/tests/test_case_studies.md +12 -12
  57. massgen/tests/test_code_execution.py +178 -0
  58. massgen/tests/test_multimodal_size_limits.py +407 -0
  59. massgen/tests/test_orchestration_restart.py +204 -0
  60. massgen/tool/__init__.py +4 -0
  61. massgen/tool/_manager.py +7 -2
  62. massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
  63. massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
  64. massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
  65. massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
  66. massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
  67. massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
  68. massgen/tool/_multimodal_tools/understand_audio.py +211 -0
  69. massgen/tool/_multimodal_tools/understand_file.py +555 -0
  70. massgen/tool/_multimodal_tools/understand_image.py +316 -0
  71. massgen/tool/_multimodal_tools/understand_video.py +340 -0
  72. massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
  73. massgen/tool/docs/multimodal_tools.md +1368 -0
  74. massgen/tool/workflow_toolkits/__init__.py +26 -0
  75. massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
  76. massgen/utils.py +1 -0
  77. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/METADATA +101 -69
  78. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/RECORD +82 -46
  79. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/WHEEL +0 -0
  80. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/entry_points.txt +0 -0
  81. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/licenses/LICENSE +0 -0
  82. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/top_level.txt +0 -0
massgen/config_builder.py CHANGED
@@ -191,27 +191,27 @@ class ConfigBuilder:
191
191
  },
192
192
  "multimodal": {
193
193
  "name": "Multimodal Analysis",
194
- "description": "Analyze images, audio, and video content",
194
+ "description": "Analyze images, audio, video, and documents",
195
195
  "recommended_agents": 2,
196
196
  "recommended_tools": ["image_understanding", "audio_understanding", "video_understanding"],
197
197
  "agent_types": "all",
198
- "notes": "Different backends support different modalities",
198
+ "notes": "Combines custom tools + built-in backend capabilities",
199
199
  "info": """[bold cyan]Features auto-configured for this preset:[/bold cyan]
200
200
 
201
- [green]✓[/green] [bold]Image Understanding[/bold]
202
- • Analyze images, screenshots, charts
203
- OCR and text extraction
204
- Available for: OpenAI, Claude Code, Gemini, Azure OpenAI
205
-
206
- [green]✓[/green] [bold]Audio Understanding[/bold] [dim](where supported)[/dim]
207
- • Transcribe and analyze audio
208
- • Available for: Claude, ChatCompletion
201
+ [green]✓[/green] [bold]Custom Multimodal Tools (New v0.1.3+)[/bold]
202
+ understand_image - Analyze workspace images with gpt-4.1
203
+ understand_audio - Transcribe and analyze audio files
204
+ understand_video - Extract frames and analyze videos
205
+ • understand_file - Process documents (PDF, DOCX, XLSX, PPTX)
206
+ Works with any backend, processes workspace files
209
207
 
210
- [green]✓[/green] [bold]Video Understanding[/bold] [dim](where supported)[/dim]
211
- Analyze video content
212
- Available for: Claude, ChatCompletion, OpenAI
208
+ [green]✓[/green] [bold]Built-in Backend Capabilities[/bold] [dim](passive)[/dim]
209
+ Image understanding via upload_files (OpenAI, Claude, Gemini, Azure)
210
+ Audio understanding via upload_files (Claude, ChatCompletion)
211
+ • Video understanding via upload_files (Claude, ChatCompletion, OpenAI)
212
+ • Image/audio/video generation (where supported)
213
213
 
214
- [dim]Use this for:[/dim] Image analysis, screenshot interpretation, multimedia content analysis.""",
214
+ [dim]Use this for:[/dim] Image analysis, audio transcription, video analysis, document processing.""",
215
215
  },
216
216
  }
217
217
 
@@ -617,6 +617,20 @@ class ConfigBuilder:
617
617
  # Build choices for questionary - organized with tool hints
618
618
  choices = []
619
619
 
620
+ # Add spacing before first option (using spaces to avoid line)
621
+ choices.append(questionary.Separator(" "))
622
+
623
+ # First option: Browse existing configs (most common for new users)
624
+ choices.append(
625
+ questionary.Choice(
626
+ title="📦 Browse ready-to-use configs / examples",
627
+ value="__browse_existing__",
628
+ ),
629
+ )
630
+ choices.append(questionary.Separator(" "))
631
+ choices.append(questionary.Separator("┄┄ or build from template ┄┄"))
632
+ choices.append(questionary.Separator(" "))
633
+
620
634
  # Define display with brief tool descriptions
621
635
  display_info = [
622
636
  ("custom", "⚙️", "Custom Configuration", "Choose your own tools"),
@@ -643,12 +657,12 @@ class ConfigBuilder:
643
657
  value=use_case_id,
644
658
  ),
645
659
  )
660
+
646
661
  except Exception as e:
647
662
  console.print(f"[warning]⚠️ Could not display use case: {e}[/warning]")
648
663
 
649
664
  # Add helpful context before the prompt
650
- console.print("[dim]Choose a preset that matches your task. Each preset auto-configures tools and capabilities.[/dim]")
651
- console.print("[dim]You can customize everything in later steps.[/dim]\n")
665
+ console.print("[dim]Browse ready-to-use configs, or pick a template to build your own.[/dim]\n")
652
666
 
653
667
  use_case_id = questionary.select(
654
668
  "Select your use case:",
@@ -666,6 +680,10 @@ class ConfigBuilder:
666
680
  if use_case_id is None:
667
681
  raise KeyboardInterrupt # User cancelled, exit immediately
668
682
 
683
+ # Handle special value for browsing existing configs
684
+ if use_case_id == "__browse_existing__":
685
+ return "__browse_existing__"
686
+
669
687
  # Show selection with description
670
688
  selected_info = self.USE_CASES[use_case_id]
671
689
  console.print(f"\n✅ Selected: [green]{selected_info.get('name', use_case_id)}[/green]")
@@ -1521,6 +1539,83 @@ class ConfigBuilder:
1521
1539
 
1522
1540
  console.print(f"✅ Enabled {len(selected_gen)} generation capability(ies)")
1523
1541
 
1542
+ # Custom multimodal understanding tools (new in v0.1.3+)
1543
+ # Available for ALL use cases - these are active tools that process workspace files
1544
+ console.print()
1545
+ console.print("[cyan]Custom Multimodal Understanding Tools (New in v0.1.3+):[/cyan]")
1546
+ console.print("[dim]These tools let agents analyze workspace files using OpenAI's gpt-4.1 API:[/dim]")
1547
+ console.print("[dim] • Works with any backend (uses OpenAI for analysis)[/dim]")
1548
+ console.print("[dim] • Processes files agents generate or discover during execution[/dim]")
1549
+ console.print("[dim] • Returns structured JSON with detailed metadata[/dim]")
1550
+ console.print("[dim] • Requires OPENAI_API_KEY in your .env file[/dim]")
1551
+
1552
+ # Default to True for multimodal use case, False for others
1553
+ default_add_mm = use_case == "multimodal"
1554
+
1555
+ if questionary.confirm("Add custom multimodal understanding tools?", default=default_add_mm).ask():
1556
+ # Determine default selections based on use case
1557
+ if use_case == "multimodal":
1558
+ # For multimodal preset, select all by default
1559
+ pass
1560
+ elif use_case == "data_analysis":
1561
+ # For data analysis, suggest image and file tools
1562
+ pass
1563
+ else:
1564
+ # For other use cases, none selected by default (let user choose)
1565
+ pass
1566
+
1567
+ if use_case == "multimodal":
1568
+ multimodal_tool_choices = [
1569
+ questionary.Choice("understand_image - Analyze images (PNG, JPEG, JPG)", value="understand_image", checked=True),
1570
+ questionary.Choice("understand_audio - Transcribe and analyze audio", value="understand_audio", checked=True),
1571
+ questionary.Choice("understand_video - Extract frames and analyze video", value="understand_video", checked=True),
1572
+ questionary.Choice("understand_file - Process documents (PDF, DOCX, XLSX, PPTX)", value="understand_file", checked=True),
1573
+ ]
1574
+ elif use_case == "data_analysis":
1575
+ multimodal_tool_choices = [
1576
+ questionary.Choice("understand_image - Analyze images (PNG, JPEG, JPG)", value="understand_image", checked=True),
1577
+ questionary.Choice("understand_audio - Transcribe and analyze audio", value="understand_audio", checked=False),
1578
+ questionary.Choice("understand_video - Extract frames and analyze video", value="understand_video", checked=False),
1579
+ questionary.Choice("understand_file - Process documents (PDF, DOCX, XLSX, PPTX)", value="understand_file", checked=True),
1580
+ ]
1581
+ else:
1582
+ multimodal_tool_choices = [
1583
+ questionary.Choice("understand_image - Analyze images (PNG, JPEG, JPG)", value="understand_image", checked=False),
1584
+ questionary.Choice("understand_audio - Transcribe and analyze audio", value="understand_audio", checked=False),
1585
+ questionary.Choice("understand_video - Extract frames and analyze video", value="understand_video", checked=False),
1586
+ questionary.Choice("understand_file - Process documents (PDF, DOCX, XLSX, PPTX)", value="understand_file", checked=False),
1587
+ ]
1588
+
1589
+ selected_mm_tools = questionary.checkbox(
1590
+ "Select custom multimodal tools (Space to select, Enter to confirm):",
1591
+ choices=multimodal_tool_choices,
1592
+ style=questionary.Style(
1593
+ [
1594
+ ("selected", "fg:cyan"),
1595
+ ("pointer", "fg:cyan bold"),
1596
+ ("highlighted", "fg:cyan"),
1597
+ ],
1598
+ ),
1599
+ use_arrow_keys=True,
1600
+ ).ask()
1601
+
1602
+ if selected_mm_tools:
1603
+ # Initialize custom_tools list if not exists
1604
+ if "custom_tools" not in agent["backend"]:
1605
+ agent["backend"]["custom_tools"] = []
1606
+
1607
+ # Add selected tools
1608
+ for tool_name in selected_mm_tools:
1609
+ tool_config = {
1610
+ "name": [tool_name],
1611
+ "category": "multimodal",
1612
+ "path": f"massgen/tool/_multimodal_tools/{tool_name}.py",
1613
+ "function": [tool_name],
1614
+ }
1615
+ agent["backend"]["custom_tools"].append(tool_config)
1616
+
1617
+ console.print(f"✅ Added {len(selected_mm_tools)} custom multimodal tool(s)")
1618
+
1524
1619
  # MCP servers (custom only)
1525
1620
  # Note: Filesystem is handled internally above, NOT as external MCP
1526
1621
  if "mcp" in provider_info.get("supports", []):
@@ -1807,12 +1902,18 @@ class ConfigBuilder:
1807
1902
  "code_execution": "💻 Code execution",
1808
1903
  "web_search": "🔍 Web search",
1809
1904
  "mcp": "🔌 MCP servers",
1905
+ "image_understanding": "📷 Image understanding (backend capability)",
1906
+ "audio_understanding": "🎵 Audio understanding (backend capability)",
1907
+ "video_understanding": "🎬 Video understanding (backend capability)",
1810
1908
  }.get(tool, tool)
1811
1909
  console.print(f" • {tool_display}")
1812
1910
 
1813
1911
  if use_case == "coding_docker":
1814
1912
  console.print(" • 🐳 Docker isolated execution")
1815
1913
 
1914
+ if use_case == "multimodal":
1915
+ console.print(" • 🎨 Custom multimodal tools (understand_image, understand_audio, understand_video, understand_file)")
1916
+
1816
1917
  console.print()
1817
1918
 
1818
1919
  # Let users select models for each agent
@@ -2302,7 +2403,7 @@ class ConfigBuilder:
2302
2403
  orchestrator_config = {}
2303
2404
  orchestrator_config["session_storage"] = "sessions"
2304
2405
  console.print()
2305
- console.print(" ✅ Multi-turn sessions enabled (supports persistent conversations with memory)")
2406
+ console.print(" ✅ Multi-turn sessions enabled (supports persistent conversations)")
2306
2407
 
2307
2408
  # Planning Mode (for MCP irreversible actions) - only ask if MCPs are configured
2308
2409
  has_mcp = any(a.get("backend", {}).get("mcp_servers") for a in agents)
@@ -2321,6 +2422,30 @@ class ConfigBuilder:
2321
2422
  console.print()
2322
2423
  console.print(" ✅ Planning mode enabled - MCP tools will plan without executing during coordination")
2323
2424
 
2425
+ # Orchestration Restart Feature
2426
+ console.print()
2427
+ console.print(" [dim]Orchestration Restart: Automatic quality checks with self-correction[/dim]")
2428
+ console.print(" [dim]• Agent evaluates its own answer after coordination[/dim]")
2429
+ console.print(" [dim]• Can restart with specific improvement instructions if incomplete[/dim]")
2430
+ console.print(" [dim]• Each attempt gets isolated logs in attempt_1/, attempt_2/, etc.[/dim]")
2431
+ console.print(" [dim]• Works with all backends (OpenAI, Claude, Gemini, Grok, etc.)[/dim]")
2432
+ console.print(" [dim]• 0 = no restarts (default), 1-2 = recommended, 3 = maximum[/dim]")
2433
+ console.print()
2434
+
2435
+ restart_input = Prompt.ask(
2436
+ " [prompt]Max orchestration restarts (0-3)[/prompt]",
2437
+ choices=["0", "1", "2", "3"],
2438
+ default="0",
2439
+ )
2440
+
2441
+ max_restarts = int(restart_input)
2442
+ if max_restarts > 0:
2443
+ if "coordination" not in orchestrator_config:
2444
+ orchestrator_config["coordination"] = {}
2445
+ orchestrator_config["coordination"]["max_orchestration_restarts"] = max_restarts
2446
+ console.print()
2447
+ console.print(f" ✅ Orchestration restart enabled: up to {max_restarts} restart(s) allowed")
2448
+
2324
2449
  # Voting Sensitivity - only ask for multi-agent setups
2325
2450
  if len(agents) > 1:
2326
2451
  console.print()
@@ -2634,6 +2759,21 @@ class ConfigBuilder:
2634
2759
  console.print("[warning]⚠️ No use case selected.[/warning]")
2635
2760
  return None
2636
2761
 
2762
+ # Handle special case: user wants to browse existing configs
2763
+ if use_case == "__browse_existing__":
2764
+ console.print("\n[cyan]Opening config selector...[/cyan]\n")
2765
+ # Import here to avoid circular dependency
2766
+ from .cli import interactive_config_selector
2767
+
2768
+ selected_config = interactive_config_selector()
2769
+ if selected_config:
2770
+ console.print(f"\n[green]✓ Selected config: {selected_config}[/green]\n")
2771
+ # Return the selected config as if it was created
2772
+ return (selected_config, None)
2773
+ else:
2774
+ console.print("\n[yellow]⚠️ No config selected[/yellow]\n")
2775
+ return None
2776
+
2637
2777
  # Step 2: Configure agents
2638
2778
  agents = self.configure_agents(use_case, api_keys)
2639
2779
  if not agents:
@@ -2652,7 +2792,12 @@ class ConfigBuilder:
2652
2792
  filepath = self.review_and_save(agents, orchestrator_config)
2653
2793
 
2654
2794
  if filepath:
2655
- # Ask if user wants to run now
2795
+ # In default_mode (first-run), skip "Run now?" and go straight to interactive mode
2796
+ if self.default_mode:
2797
+ # Config already saved by review_and_save(), just return to launch interactive mode
2798
+ return (filepath, None)
2799
+
2800
+ # In regular --init mode, ask if user wants to run now
2656
2801
  run_choice = Confirm.ask("\n[prompt]Run MassGen with this configuration now?[/prompt]", default=True)
2657
2802
  if run_choice is None:
2658
2803
  raise KeyboardInterrupt # User cancelled
massgen/configs/README.md CHANGED
@@ -227,15 +227,73 @@ Most configurations use environment variables for API keys:so
227
227
 
228
228
  ## Release History & Examples
229
229
 
230
- ### v0.1.2 - Latest
230
+ ### v0.1.4 - Latest
231
+ **New Features:** Multimodal Generation Tools, Binary File Protection, Crawl4AI Integration
232
+
233
+ **Configuration Files:**
234
+ - `text_to_image_generation_single.yaml` / `text_to_image_generation_multi.yaml` - Image generation
235
+ - `text_to_video_generation_single.yaml` / `text_to_video_generation_multi.yaml` - Video generation
236
+ - `text_to_speech_generation_single.yaml` / `text_to_speech_generation_multi.yaml` - Audio generation
237
+ - `text_to_file_generation_single.yaml` / `text_to_file_generation_multi.yaml` - Document generation
238
+ - `crawl4ai_example.yaml` - Web scraping configuration
239
+
240
+ **Documentation:**
241
+ - `README_PYPI.md` - Standalone PyPI package documentation
242
+ - `docs/dev_notes/release_checklist.md` - Release workflow guide
243
+ - `docs/source/user_guide/protected_paths.rst` - Binary file protection documentation
244
+ - `.github/workflows/docs-automation.yml` - Documentation CI/CD automation
245
+
246
+ **Key Features:**
247
+ - **Generation Tools**: Create images, videos, audio, and documents using OpenAI APIs
248
+ - **Binary File Protection**: Automatic blocking prevents text tools from reading 40+ binary file types
249
+ - **Web Scraping**: Crawl4AI integration for intelligent content extraction
250
+ - **Enhanced Security**: Smart tool suggestions guide users to appropriate specialized tools
251
+
252
+ **Try it:**
253
+ ```bash
254
+ # Install or upgrade
255
+ pip install --upgrade massgen
256
+
257
+ # Generate an image from text
258
+ massgen --config @examples/tools/custom_tools/multimodal_tools/text_to_image_generation_single \
259
+ "Please generate an image of a cat in space."
260
+
261
+ # Generate a video from text
262
+ massgen --config @examples/tools/custom_tools/multimodal_tools/text_to_video_generation_single \
263
+ "Generate a 4 seconds video with neon-lit alley at night, light rain, slow push-in, cinematic."
264
+
265
+ # Generate documents (PDF, DOCX, etc.)
266
+ massgen --config @examples/tools/custom_tools/multimodal_tools/text_to_file_generation_single \
267
+ "Please generate a comprehensive technical report about the latest developments in Large Language Models (LLMs)."
268
+ ```
269
+
270
+ ### v0.1.3
271
+ **New Features:** Post-Evaluation Workflow, Custom Multimodal Understanding Tools, Docker Sudo Mode
272
+
273
+ **Configuration Files:**
274
+ - `understand_image.yaml`, `understand_audio.yaml`, `understand_video.yaml`, `understand_file.yaml`
275
+
276
+ **Key Features:**
277
+ - **Post-Evaluation Tools**: Submit and restart capabilities for winning agents
278
+ - **Multimodal Understanding**: Analyze images, audio, video, and documents
279
+ - **Docker Sudo Mode**: Execute privileged commands in containers
280
+
281
+ **Try it:**
282
+ ```bash
283
+ # Try multimodal image understanding
284
+ massgen --config @examples/tools/custom_tools/multimodal_tools/understand_image \
285
+ "Please summarize the content in this image."
286
+ ```
287
+
288
+ ### v0.1.2
231
289
  **New Features:** Intelligent Planning Mode, Claude 4.5 Haiku Support, Grok Web Search Improvements
232
290
 
233
291
  **Configuration Files:**
234
- - `massgen/configs/tools/planning/` - 5 planning mode configurations with selective blocking
235
- - `massgen/configs/basic/multi/three_agents_default.yaml` - Updated with Grok-4-fast model
292
+ - `configs/tools/planning/` - 5 planning mode configurations with selective blocking
293
+ - `configs/basic/multi/three_agents_default.yaml` - Updated with Grok-4-fast model
236
294
 
237
295
  **Documentation:**
238
- - `docs/case_studies/INTELLIGENT_PLANNING_MODE.md` - Complete intelligent planning mode guide
296
+ - `docs/dev_notes/intelligent_planning_mode.md` - Complete intelligent planning mode guide
239
297
 
240
298
  **Key Features:**
241
299
  - **Intelligent Planning Mode**: Automatic analysis of question irreversibility for dynamic MCP tool blocking
@@ -245,9 +303,6 @@ Most configurations use environment variables for API keys:so
245
303
 
246
304
  **Try it:**
247
305
  ```bash
248
- # Install or upgrade
249
- pip install --upgrade massgen
250
-
251
306
  # Try intelligent planning mode with MCP tools
252
307
  # (Please read the YAML file for required API keys: DISCORD_TOKEN, OPENAI_API_KEY, etc.)
253
308
  massgen --config @examples/tools/planning/five_agents_discord_mcp_planning_mode \
@@ -346,7 +401,7 @@ massgen --config @examples/tools/code-execution/docker_with_resource_limits \
346
401
  - `massgen/configs/basic/single/single_gpt4o_video_generation.yaml` - Video generation with OpenAI Sora-2
347
402
 
348
403
  **Case Study:**
349
- - [Universal Code Execution via MCP](../../docs/case_studies/universal-code-execution-mcp.md)
404
+ - [Universal Code Execution via MCP](../../docs/source/examples/case_studies/universal-code-execution-mcp.md)
350
405
 
351
406
  **Key Features:**
352
407
  - Universal `execute_command` tool works across Claude, Gemini, OpenAI (Response API), and Chat Completions providers (Grok, ZAI, etc.)
@@ -419,7 +474,7 @@ massgen --config @examples/tools/filesystem/cc_gpt5_gemini_filesystem \
419
474
  - New `FileOperationTracker` class for read-before-delete enforcement
420
475
  - Enhanced PathPermissionManager with operation tracking methods
421
476
 
422
- **Case Study:** [MCP Planning Mode](../../docs/case_studies/mcp-planning-mode.md)
477
+ **Case Study:** [MCP Planning Mode](../../docs/source/examples/case_studies/mcp-planning-mode.md)
423
478
 
424
479
  **Try it:**
425
480
  ```bash
@@ -446,7 +501,7 @@ massgen --config @examples/tools/planning/five_agents_twitter_mcp_planning_mode
446
501
  - New `ExternalAgentBackend` class bridging MassGen with external frameworks
447
502
  - Multiple code executor types: LocalCommandLineCodeExecutor, DockerCommandLineCodeExecutor, JupyterCodeExecutor, YepCodeCodeExecutor
448
503
 
449
- **Case Study:** [AG2 Framework Integration](../../docs/case_studies/ag2-framework-integration.md)
504
+ **Case Study:** [AG2 Framework Integration](../../docs/source/examples/case_studies/ag2-framework-integration.md)
450
505
 
451
506
  **Try it:**
452
507
  ```bash
@@ -515,7 +570,7 @@ massgen --config @examples/tools/filesystem/gemini_gpt5nano_file_context_path \
515
570
  - Automatic `.massgen` directory management for persistent conversation context
516
571
  - Enhanced path permissions with `will_be_writable` flag and smart exclusion patterns
517
572
 
518
- **Case Study:** [Multi-Turn Filesystem Support](../../docs/case_studies/multi-turn-filesystem-support.md)
573
+ **Case Study:** [Multi-Turn Filesystem Support](../../docs/source/examples/case_studies/multi-turn-filesystem-support.md)
519
574
  ```bash
520
575
  # Turn 1 - Initial creation
521
576
  Turn 1: Make a website about Bob Dylan
@@ -553,7 +608,7 @@ massgen --config @examples/basic/multi/two_qwen_vllm \
553
608
  - All configs now organized by provider & use case (basic/, providers/, tools/, teams/)
554
609
  - Use same configs as v0.0.21 for compatibility, but now with improved performance
555
610
 
556
- **Case Study:** [Advanced Filesystem with User Context Path Support](../../docs/case_studies/v0.0.21-v0.0.22-filesystem-permissions.md)
611
+ **Case Study:** [Advanced Filesystem with User Context Path Support](../../docs/source/examples/case_studies/v0.0.21-v0.0.22-filesystem-permissions.md)
557
612
  ```bash
558
613
  # Multi-agent collaboration with granular filesystem permissions
559
614
  massgen --config @examples/tools/filesystem/gpt5mini_cc_fs_context_path "Enhance the website in massgen/configs/resources with: 1) A dark/light theme toggle with smooth transitions, 2) An interactive feature that helps users engage with the blog content (your choice - could be search, filtering by topic, reading time estimates, social sharing, reactions, etc.), and 3) Visual polish with CSS animations or transitions that make the site feel more modern and responsive. Use vanilla JavaScript and be creative with the implementation details."
@@ -599,7 +654,7 @@ massgen --config @examples/tools/mcp/gpt5_nano_mcp_example \
599
654
 
600
655
  ### v0.0.16
601
656
  **New Features:** Unified Filesystem Support with MCP Integration
602
- **Case Study:** [Cross-Backend Collaboration with Gemini MCP Filesystem](../../docs/case_studies/unified-filesystem-mcp-integration.md)
657
+ **Case Study:** [Cross-Backend Collaboration with Gemini MCP Filesystem](../../docs/source/examples/case_studies/unified-filesystem-mcp-integration.md)
603
658
  ```bash
604
659
  # Gemini and Claude Code agents with unified filesystem via MCP
605
660
  massgen --config @examples/tools/mcp/gemini_mcp_filesystem_test_with_claude_code "Create a presentation that teaches a reinforcement learning algorithm and output it in LaTeX Beamer format. No figures should be added."
@@ -612,7 +667,7 @@ massgen --config @examples/tools/mcp/gemini_mcp_filesystem_test_with_claude_code
612
667
 
613
668
  ### v0.0.12 - v0.0.14
614
669
  **New Features:** Enhanced Logging and Workspace Management
615
- **Case Study:** [Claude Code Workspace Management with Comprehensive Logging](../../docs/case_studies/claude-code-workspace-management.md)
670
+ **Case Study:** [Claude Code Workspace Management with Comprehensive Logging](../../docs/source/examples/case_studies/claude-code-workspace-management.md)
616
671
  ```bash
617
672
  # Multi-agent Claude Code collaboration with enhanced workspace isolation
618
673
  massgen --config @examples/tools/filesystem/claude_code_context_sharing "Create a website about a diverse set of fun facts about LLMs, placing the output in one index.html file"
@@ -0,0 +1,60 @@
1
+ # Two Agent Orchestration Restart Test Configuration (No Filesystem)
2
+ # This configuration demonstrates the orchestration restart feature for self-correcting coordination
3
+ # Uses debug_final_answer to force restart on first attempt for testing
4
+ #
5
+ # Setup Requirements:
6
+ # 1. Set environment variable in .env file:
7
+ # OPENAI_API_KEY="your_openai_key"
8
+ #
9
+ # Example usage:
10
+ # massgen --config @examples/configs/debug/restart_test_controlled.yaml "Describe two famous Beatles members: John Lennon and Paul McCartney. Include their birth year, role in the band, and notable contributions."
11
+ #
12
+ # Expected behavior:
13
+ # - Attempt 1: Agents coordinate, final presentation shows debug override (incomplete answer)
14
+ # - Post-evaluation detects incomplete answer and calls restart_orchestration()
15
+ # - Restart banner displays with reason and instructions
16
+ # - Attempt 2: Agents see restart context and produce complete answer
17
+ # - Post-evaluation approves and calls submit()
18
+ # - Inspection menu appears
19
+
20
+ agents:
21
+ - id: "agent_a"
22
+ backend:
23
+ type: "openai"
24
+ model: "gpt-5-nano"
25
+ # type: "gemini"
26
+ # model: "gemini-2.5-flash"
27
+ # type: "grok"
28
+ # model: "grok-4-fast"
29
+ # type: "claude"
30
+ # model: "claude-sonnet-4-5-20250929"
31
+ # type: "claude_code"
32
+ # model: "claude-sonnet-4-5-20250929"
33
+ # cwd: "workspace1"
34
+
35
+ - id: "agent_b"
36
+ backend:
37
+ type: "openai"
38
+ model: "gpt-5-nano"
39
+
40
+ orchestrator:
41
+ snapshot_storage: "snapshots"
42
+ agent_temporary_workspace: "temp_workspaces"
43
+
44
+ # Restart configuration
45
+ coordination:
46
+ max_orchestration_restarts: 2 # Allow up to 2 restarts (3 total attempts)
47
+
48
+ # DEBUG MODE: Override final answer on attempt 1 to simulate incomplete response
49
+ debug_final_answer: |
50
+ John Lennon was born in 1940 in Liverpool, England. He was a founding member
51
+ of The Beatles and served as rhythm guitarist and vocalist. He co-wrote many
52
+ of the band's greatest hits with Paul McCartney. After the Beatles disbanded,
53
+ he pursued a successful solo career and became known for his peace activism.
54
+
55
+ # Use lenient voting for faster coordination in debug mode
56
+ voting_sensitivity: "lenient"
57
+
58
+ ui:
59
+ display_type: "rich_terminal"
60
+ logging_enabled: true
@@ -0,0 +1,73 @@
1
+ # Two Agent Orchestration Restart Test Configuration with Filesystem
2
+ # This configuration demonstrates the orchestration restart feature with MCP filesystem tools
3
+ # Uses debug_final_answer to force restart on first attempt for testing file creation tasks
4
+ #
5
+ # Setup Requirements:
6
+ # 1. Set environment variable in .env file:
7
+ # OPENAI_API_KEY="your_openai_key"
8
+ # 2. Filesystem MCP servers will be automatically available via npx
9
+ #
10
+ # Example usage:
11
+ # massgen --config @examples/configs/debug/restart_test_controlled_filesystem.yaml "Create three Python files: main.py with a hello() function, utils.py with a helper() function, and tests.py with a test_hello() function."
12
+ #
13
+ # Expected behavior:
14
+ # - Attempt 1: Agents coordinate with planning mode (no file execution during coordination)
15
+ # - Final presentation shows debug override claiming only main.py was created
16
+ # - Post-evaluation uses MCP tools to verify files and detects missing files
17
+ # - Calls restart_orchestration() with specific instructions
18
+ # - Restart banner displays
19
+ # - Attempt 2: Agents coordinate with restart instructions to create ALL files
20
+ # - Post-evaluation verifies all files exist using MCP tools
21
+ # - Calls submit() - Inspection menu appears
22
+
23
+ agents:
24
+ - id: "agent_a"
25
+ backend:
26
+ type: "openai"
27
+ model: "gpt-5-nano"
28
+ # type: "gemini"
29
+ # model: "gemini-2.5-flash"
30
+ # type: "grok"
31
+ # model: "grok-4-fast"
32
+ # type: "claude"
33
+ # model: "claude-sonnet-4-5-20250929"
34
+ # type: "claude_code"
35
+ # model: "claude-sonnet-4-5-20250929"
36
+ cwd: "workspace1"
37
+
38
+ # - id: "agent_b"
39
+ # backend:
40
+ # type: "openai"
41
+ # model: "gpt-5-nano"
42
+ # cwd: "workspace2"
43
+
44
+ orchestrator:
45
+ snapshot_storage: "snapshots"
46
+ agent_temporary_workspace: "temp_workspaces"
47
+
48
+ # Restart configuration
49
+ coordination:
50
+ max_orchestration_restarts: 2 # Allow up to 2 restarts (3 total attempts)
51
+
52
+ # DEBUG MODE: Override final answer on attempt 1 to simulate incomplete file creation
53
+ debug_final_answer: |
54
+ I created a hello.txt file.
55
+
56
+ # file with the following content:
57
+
58
+ # ```python
59
+ # def hello():
60
+ # print("Hello, World!")
61
+
62
+ # if __name__ == "__main__":
63
+ # hello()
64
+ # ```
65
+
66
+ # The file has been created successfully.
67
+
68
+ # Use lenient voting for faster coordination in debug mode
69
+ voting_sensitivity: "lenient"
70
+
71
+ ui:
72
+ display_type: "rich_terminal"
73
+ logging_enabled: true
@@ -0,0 +1,35 @@
1
+ # Docker Isolation with Sudo Support
2
+ # Run with: uv run python -m massgen.cli --config massgen/configs/tools/code-execution/docker_with_sudo.yaml "Write a factorial function and test it"
3
+ # Expected behavior: Commands execute in isolated Docker container with sudo access for runtime package installation
4
+ #
5
+ # Prerequisites:
6
+ # 1. Docker installed and running
7
+ # 2. Build sudo image FIRST: bash massgen/docker/build.sh --sudo
8
+ #
9
+ # Security: Safe for most use cases - Docker container isolation prevents host access even with sudo.
10
+
11
+ agent:
12
+ id: "docker_sudo_agent"
13
+ backend:
14
+ type: "openai"
15
+ model: "gpt-5-mini"
16
+ cwd: "workspace1"
17
+
18
+ # Enable command execution with Docker isolation and sudo
19
+ enable_mcp_command_line: true
20
+ command_line_execution_mode: "docker"
21
+ command_line_docker_enable_sudo: true # Enables sudo in container
22
+
23
+ # Optional: Specify custom image (default auto-switches to sudo variant)
24
+ # command_line_docker_image: "massgen/mcp-runtime-sudo:latest"
25
+
26
+ orchestrator:
27
+ snapshot_storage: "snapshots"
28
+ agent_temporary_workspace: "temp_workspaces"
29
+
30
+ timeout_settings:
31
+ orchestrator_timeout_seconds: 180 # Docker startup adds ~5-10s overhead
32
+
33
+ ui:
34
+ display_type: "rich_terminal"
35
+ logging_enabled: true
@@ -0,0 +1,56 @@
1
+ # MassGen Configuration: Computer Use - Browser Automation
2
+ #
3
+ # This configuration is optimized for browser-based tasks using Playwright.
4
+ #
5
+ # Usage:
6
+ # massgen --config @examples/tools/custom_tools/computer_use_browser_example "Check the latest OpenAI news on bing.com"
7
+ #
8
+ # Prerequisites:
9
+ # 1. Set OPENAI_API_KEY in your .env file
10
+ # 2. Install Playwright: pip install playwright && playwright install
11
+
12
+ agents:
13
+ - id: "browser_automation_agent"
14
+ backend:
15
+ type: "openai"
16
+ model: "computer-use-preview"
17
+ custom_tools:
18
+ - name: ["computer_use"]
19
+ category: "automation"
20
+ path: "massgen/tool/_computer_use/computer_use_tool.py"
21
+ function: ["computer_use"]
22
+ # Default parameters for browser automation
23
+ default_params:
24
+ environment: "browser"
25
+ display_width: 1280
26
+ display_height: 720
27
+ max_iterations: 30
28
+ include_reasoning: true
29
+
30
+ system_message: |
31
+ You are a browser automation specialist with access to the computer_use tool.
32
+
33
+ Your capabilities include:
34
+ - Web navigation and search
35
+ - Form filling and submission
36
+ - Data extraction from websites
37
+ - Multi-step web workflows
38
+ - Screenshot capture and analysis
39
+
40
+ When using the computer_use tool:
41
+ 1. Provide clear, specific task descriptions
42
+ 2. The tool operates in a browser environment (Playwright)
43
+ 3. You'll receive screenshots after each action
44
+ 4. Acknowledge safety warnings when they appear
45
+ 5. Be patient - complex tasks may require multiple iterations
46
+
47
+ Best practices:
48
+ - Break complex tasks into simple steps
49
+ - Verify each step completed successfully before proceeding
50
+ - Use wait actions when pages are loading
51
+ - Be specific about what you're looking for on the page
52
+
53
+ ui:
54
+ display_type: "detailed"
55
+ logging_enabled: true
56
+ show_screenshots: true