massgen 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (82) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +33 -7
  3. massgen/api_params_handler/_api_params_handler_base.py +3 -0
  4. massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
  5. massgen/api_params_handler/_claude_api_params_handler.py +4 -0
  6. massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
  7. massgen/api_params_handler/_response_api_params_handler.py +4 -0
  8. massgen/backend/azure_openai.py +9 -1
  9. massgen/backend/base.py +4 -0
  10. massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
  11. massgen/backend/claude_code.py +9 -1
  12. massgen/backend/docs/permissions_and_context_files.md +2 -2
  13. massgen/backend/gemini.py +35 -6
  14. massgen/backend/gemini_utils.py +30 -0
  15. massgen/backend/response.py +2 -0
  16. massgen/chat_agent.py +9 -3
  17. massgen/cli.py +291 -43
  18. massgen/config_builder.py +163 -18
  19. massgen/configs/README.md +69 -14
  20. massgen/configs/debug/restart_test_controlled.yaml +60 -0
  21. massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
  22. massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
  23. massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
  24. massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
  25. massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
  26. massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
  27. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
  28. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
  29. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
  30. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
  31. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
  32. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
  33. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
  34. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
  35. massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
  36. massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
  37. massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
  38. massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
  39. massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
  40. massgen/docker/README.md +83 -0
  41. massgen/filesystem_manager/_code_execution_server.py +22 -7
  42. massgen/filesystem_manager/_docker_manager.py +21 -1
  43. massgen/filesystem_manager/_filesystem_manager.py +9 -0
  44. massgen/filesystem_manager/_path_permission_manager.py +148 -0
  45. massgen/filesystem_manager/_workspace_tools_server.py +0 -997
  46. massgen/formatter/_gemini_formatter.py +73 -0
  47. massgen/frontend/coordination_ui.py +175 -257
  48. massgen/frontend/displays/base_display.py +29 -0
  49. massgen/frontend/displays/rich_terminal_display.py +155 -9
  50. massgen/frontend/displays/simple_display.py +21 -0
  51. massgen/frontend/displays/terminal_display.py +22 -2
  52. massgen/logger_config.py +50 -6
  53. massgen/message_templates.py +283 -15
  54. massgen/orchestrator.py +335 -38
  55. massgen/tests/test_binary_file_blocking.py +274 -0
  56. massgen/tests/test_case_studies.md +12 -12
  57. massgen/tests/test_code_execution.py +178 -0
  58. massgen/tests/test_multimodal_size_limits.py +407 -0
  59. massgen/tests/test_orchestration_restart.py +204 -0
  60. massgen/tool/__init__.py +4 -0
  61. massgen/tool/_manager.py +7 -2
  62. massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
  63. massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
  64. massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
  65. massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
  66. massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
  67. massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
  68. massgen/tool/_multimodal_tools/understand_audio.py +211 -0
  69. massgen/tool/_multimodal_tools/understand_file.py +555 -0
  70. massgen/tool/_multimodal_tools/understand_image.py +316 -0
  71. massgen/tool/_multimodal_tools/understand_video.py +340 -0
  72. massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
  73. massgen/tool/docs/multimodal_tools.md +1368 -0
  74. massgen/tool/workflow_toolkits/__init__.py +26 -0
  75. massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
  76. massgen/utils.py +1 -0
  77. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/METADATA +101 -69
  78. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/RECORD +82 -46
  79. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/WHEEL +0 -0
  80. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/entry_points.txt +0 -0
  81. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/licenses/LICENSE +0 -0
  82. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,7 @@ from typing import Dict, List, Optional
7
7
 
8
8
  from .base import BaseToolkit, ToolType
9
9
  from .new_answer import NewAnswerToolkit
10
+ from .post_evaluation import PostEvaluationToolkit
10
11
  from .vote import VoteToolkit
11
12
 
12
13
  __all__ = [
@@ -14,7 +15,9 @@ __all__ = [
14
15
  "ToolType",
15
16
  "NewAnswerToolkit",
16
17
  "VoteToolkit",
18
+ "PostEvaluationToolkit",
17
19
  "get_workflow_tools",
20
+ "get_post_evaluation_tools",
18
21
  ]
19
22
 
20
23
 
@@ -55,3 +58,26 @@ def get_workflow_tools(
55
58
  tools.extend(vote_toolkit.get_tools(config))
56
59
 
57
60
  return tools
61
+
62
+
63
+ def get_post_evaluation_tools(
64
+ template_overrides: Optional[Dict] = None,
65
+ api_format: str = "chat_completions",
66
+ ) -> List[Dict]:
67
+ """
68
+ Get post-evaluation tool definitions (submit and restart_orchestration).
69
+
70
+ Args:
71
+ template_overrides: Optional template overrides
72
+ api_format: API format to use (chat_completions, claude, response)
73
+
74
+ Returns:
75
+ List of tool definitions [submit, restart_orchestration]
76
+ """
77
+ config = {
78
+ "api_format": api_format,
79
+ "enable_post_evaluation_tools": True,
80
+ }
81
+
82
+ post_eval_toolkit = PostEvaluationToolkit(template_overrides=template_overrides)
83
+ return post_eval_toolkit.get_tools(config)
@@ -0,0 +1,216 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Post-evaluation toolkit for MassGen orchestration restart feature.
4
+
5
+ This toolkit provides tools for post-evaluation phase where the winning agent
6
+ evaluates its own answer and decides whether to submit or restart with improvements.
7
+ """
8
+
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from .base import BaseToolkit, ToolType
12
+
13
+
14
+ class PostEvaluationToolkit(BaseToolkit):
15
+ """Post-evaluation toolkit for orchestration restart feature."""
16
+
17
+ def __init__(self, template_overrides: Optional[Dict[str, Any]] = None):
18
+ """
19
+ Initialize the PostEvaluation toolkit.
20
+
21
+ Args:
22
+ template_overrides: Optional template overrides for customization
23
+ """
24
+ self._template_overrides = template_overrides or {}
25
+
26
+ @property
27
+ def toolkit_id(self) -> str:
28
+ """Unique identifier for post-evaluation toolkit."""
29
+ return "post_evaluation"
30
+
31
+ @property
32
+ def toolkit_type(self) -> ToolType:
33
+ """Type of this toolkit."""
34
+ return ToolType.WORKFLOW
35
+
36
+ def is_enabled(self, config: Dict[str, Any]) -> bool:
37
+ """
38
+ Check if post-evaluation is enabled in configuration.
39
+
40
+ Args:
41
+ config: Configuration dictionary.
42
+
43
+ Returns:
44
+ True if post-evaluation tools are enabled.
45
+ """
46
+ return config.get("enable_post_evaluation_tools", True)
47
+
48
+ def get_tools(self, config: Dict[str, Any]) -> List[Dict[str, Any]]:
49
+ """
50
+ Get post-evaluation tool definitions based on API format.
51
+
52
+ Args:
53
+ config: Configuration including api_format.
54
+
55
+ Returns:
56
+ List containing submit and restart_orchestration tool definitions.
57
+ """
58
+ api_format = config.get("api_format", "chat_completions")
59
+
60
+ if api_format == "claude":
61
+ # Claude native format
62
+ return self._get_claude_tools()
63
+ elif api_format == "response":
64
+ # Response API format
65
+ return self._get_response_tools()
66
+ else:
67
+ # Default Chat Completions format
68
+ return self._get_chat_completions_tools()
69
+
70
+ def _get_claude_tools(self) -> List[Dict[str, Any]]:
71
+ """Get Claude native format tools."""
72
+ submit_tool = {
73
+ "name": "submit",
74
+ "description": "Confirm that the final answer fully addresses the original task and submit it to the user. Use this when the answer is complete, accurate, and satisfactory.",
75
+ "input_schema": {
76
+ "type": "object",
77
+ "properties": {
78
+ "confirmed": {
79
+ "type": "boolean",
80
+ "description": "Set to true to confirm the answer is satisfactory",
81
+ "enum": [True],
82
+ },
83
+ },
84
+ "required": ["confirmed"],
85
+ },
86
+ }
87
+
88
+ restart_tool = {
89
+ "name": "restart_orchestration",
90
+ "description": "Restart the orchestration process with specific guidance for improvement. Use this when the answer is incomplete, incorrect, or does not fully address the original task.",
91
+ "input_schema": {
92
+ "type": "object",
93
+ "properties": {
94
+ "reason": {
95
+ "type": "string",
96
+ "description": "Clear explanation of why the answer is insufficient (e.g., 'The task required descriptions of two Beatles, but only John Lennon was described')",
97
+ },
98
+ "instructions": {
99
+ "type": "string",
100
+ "description": (
101
+ "Detailed, actionable guidance for agents on the next attempt "
102
+ "(e.g., 'Provide two descriptions (John Lennon AND Paul McCartney). "
103
+ "Each should include: birth year, role in band, notable songs, impact on music. "
104
+ "Use 4-6 sentences per person.')"
105
+ ),
106
+ },
107
+ },
108
+ "required": ["reason", "instructions"],
109
+ },
110
+ }
111
+
112
+ return [submit_tool, restart_tool]
113
+
114
+ def _get_response_tools(self) -> List[Dict[str, Any]]:
115
+ """Get Response API format tools."""
116
+ submit_tool = {
117
+ "type": "function",
118
+ "function": {
119
+ "name": "submit",
120
+ "description": "Confirm that the final answer fully addresses the original task and submit it to the user. Use this when the answer is complete, accurate, and satisfactory.",
121
+ "parameters": {
122
+ "type": "object",
123
+ "properties": {
124
+ "confirmed": {
125
+ "type": "boolean",
126
+ "description": "Set to true to confirm the answer is satisfactory",
127
+ "enum": [True],
128
+ },
129
+ },
130
+ "required": ["confirmed"],
131
+ },
132
+ },
133
+ }
134
+
135
+ restart_tool = {
136
+ "type": "function",
137
+ "function": {
138
+ "name": "restart_orchestration",
139
+ "description": (
140
+ "Restart the orchestration process with specific guidance for improvement. " "Use this when the answer is incomplete, incorrect, or does not fully address the original task."
141
+ ),
142
+ "parameters": {
143
+ "type": "object",
144
+ "properties": {
145
+ "reason": {
146
+ "type": "string",
147
+ "description": "Clear explanation of why the answer is insufficient (e.g., 'The task required descriptions of two Beatles, but only John Lennon was described')",
148
+ },
149
+ "instructions": {
150
+ "type": "string",
151
+ "description": (
152
+ "Detailed, actionable guidance for agents on the next attempt "
153
+ "(e.g., 'Provide two descriptions (John Lennon AND Paul McCartney). "
154
+ "Each should include: birth year, role in band, notable songs, impact on music. "
155
+ "Use 4-6 sentences per person.')"
156
+ ),
157
+ },
158
+ },
159
+ "required": ["reason", "instructions"],
160
+ },
161
+ },
162
+ }
163
+
164
+ return [submit_tool, restart_tool]
165
+
166
+ def _get_chat_completions_tools(self) -> List[Dict[str, Any]]:
167
+ """Get Chat Completions format tools."""
168
+ submit_tool = {
169
+ "type": "function",
170
+ "function": {
171
+ "name": "submit",
172
+ "description": "Confirm that the final answer fully addresses the original task and submit it to the user. Use this when the answer is complete, accurate, and satisfactory.",
173
+ "parameters": {
174
+ "type": "object",
175
+ "properties": {
176
+ "confirmed": {
177
+ "type": "boolean",
178
+ "description": "Set to true to confirm the answer is satisfactory",
179
+ "enum": [True],
180
+ },
181
+ },
182
+ "required": ["confirmed"],
183
+ },
184
+ },
185
+ }
186
+
187
+ restart_tool = {
188
+ "type": "function",
189
+ "function": {
190
+ "name": "restart_orchestration",
191
+ "description": (
192
+ "Restart the orchestration process with specific guidance for improvement. " "Use this when the answer is incomplete, incorrect, or does not fully address the original task."
193
+ ),
194
+ "parameters": {
195
+ "type": "object",
196
+ "properties": {
197
+ "reason": {
198
+ "type": "string",
199
+ "description": "Clear explanation of why the answer is insufficient (e.g., 'The task required descriptions of two Beatles, but only John Lennon was described')",
200
+ },
201
+ "instructions": {
202
+ "type": "string",
203
+ "description": (
204
+ "Detailed, actionable guidance for agents on the next attempt "
205
+ "(e.g., 'Provide two descriptions (John Lennon AND Paul McCartney). "
206
+ "Each should include: birth year, role in band, notable songs, impact on music. "
207
+ "Use 4-6 sentences per person.')"
208
+ ),
209
+ },
210
+ },
211
+ "required": ["reason", "instructions"],
212
+ },
213
+ },
214
+ }
215
+
216
+ return [submit_tool, restart_tool]
massgen/utils.py CHANGED
@@ -31,6 +31,7 @@ class CoordinationStage(Enum):
31
31
  INITIAL_ANSWER = "initial_answer" # initial answer generation
32
32
  ENFORCEMENT = "enforcement"
33
33
  PRESENTATION = "presentation"
34
+ POST_EVALUATION = "post_evaluation" # post-evaluation phase (MCP tools enabled)
34
35
 
35
36
 
36
37
  MODEL_MAPPINGS = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: massgen
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: Multi-Agent Scaling System - A powerful framework for collaborative AI
5
5
  Author-email: MassGen Team <contact@massgen.dev>
6
6
  License: Apache-2.0
@@ -49,6 +49,12 @@ Requires-Dist: ag2>=0.9.10
49
49
  Requires-Dist: pyautogen>=0.10.0
50
50
  Requires-Dist: vertexai>=1.71.1
51
51
  Requires-Dist: pytest>=8.4.2
52
+ Requires-Dist: python-docx>=1.2.0
53
+ Requires-Dist: openpyxl>=3.1.5
54
+ Requires-Dist: python-pptx>=1.0.2
55
+ Requires-Dist: opencv-python>=4.12.0.88
56
+ Requires-Dist: pypdf2>=3.0.1
57
+ Requires-Dist: reportlab>=4.0.0
52
58
  Provides-Extra: dev
53
59
  Requires-Dist: pytest>=7.0.0; extra == "dev"
54
60
  Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
@@ -83,23 +89,29 @@ Provides-Extra: all
83
89
  Dynamic: license-file
84
90
 
85
91
  <p align="center">
86
- <img src="assets/logo.png" alt="MassGen Logo" width="360" />
92
+ <picture>
93
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/Leezekun/MassGen/main/assets/logo-dark.png">
94
+ <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/Leezekun/MassGen/main/assets/logo.png">
95
+ <img src="https://raw.githubusercontent.com/Leezekun/MassGen/main/assets/logo.png" alt="MassGen Logo" width="360" />
96
+ </picture>
87
97
  </p>
88
98
 
89
- <p align="center">
90
- <a href="https://www.python.org/downloads/">
91
- <img src="https://img.shields.io/badge/python-3.11+-blue.svg" alt="Python 3.11+" style="margin-right: 5px;">
92
- </a>
93
- <a href="LICENSE">
94
- <img src="https://img.shields.io/badge/license-Apache%202.0-blue.svg" alt="License" style="margin-right: 5px;">
95
- </a>
96
- <a href="https://docs.massgen.ai">
97
- <img src="https://img.shields.io/badge/docs-massgen.ai-blue.svg" alt="Documentation" style="margin-right: 5px;">
98
- </a>
99
- <a href="https://discord.massgen.ai">
100
- <img src="https://img.shields.io/discord/1153072414184452236?color=7289da&label=chat&logo=discord&style=flat-square" alt="Join our Discord">
101
- </a>
102
- </p>
99
+ <div align="center">
100
+
101
+ [![Docs](https://img.shields.io/badge/docs-massgen.ai-blue?style=flat-square&logo=readthedocs&logoColor=white)](https://docs.massgen.ai)
102
+ [![GitHub Stars](https://img.shields.io/github/stars/Leezekun/MassGen?style=flat-square&logo=github&color=181717&logoColor=white)](https://github.com/Leezekun/MassGen)
103
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-3776AB?style=flat-square&logo=python&logoColor=white)](https://www.python.org/downloads/)
104
+ [![License](https://img.shields.io/badge/license-Apache%202.0-green?style=flat-square)](LICENSE)
105
+
106
+ </div>
107
+
108
+ <div align="center">
109
+
110
+ [![Follow on X](https://img.shields.io/badge/FOLLOW%20ON%20X-000000?style=for-the-badge&logo=x&logoColor=white)](https://x.massgen.ai)
111
+ [![Follow on LinkedIn](https://img.shields.io/badge/FOLLOW%20ON%20LINKEDIN-0A66C2?style=for-the-badge&logo=linkedin&logoColor=white)](https://www.linkedin.com/company/massgen-ai)
112
+ [![Join our Discord](https://img.shields.io/badge/JOIN%20OUR%20DISCORD-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.massgen.ai)
113
+
114
+ </div>
103
115
 
104
116
  <h1 align="center">🚀 MassGen: Multi-Agent Scaling System for GenAI</h1>
105
117
 
@@ -109,7 +121,7 @@ Dynamic: license-file
109
121
 
110
122
  <p align="center">
111
123
  <a href="https://www.youtube.com/watch?v=Dp2oldJJImw">
112
- <img src="assets/massgen-demo.gif" alt="MassGen case study -- Berkeley Agentic AI Summit Question" width="800">
124
+ <img src="assets/thumbnail.png" alt="MassGen case study -- Berkeley Agentic AI Summit Question" width="800">
113
125
  </a>
114
126
  </p>
115
127
 
@@ -138,7 +150,7 @@ This project started with the "threads of thought" and "iterative refinement" id
138
150
  <details open>
139
151
  <summary><h3>🆕 Latest Features</h3></summary>
140
152
 
141
- - [v0.1.2 Features](#-latest-features-v012)
153
+ - [v0.1.4 Features](#-latest-features-v014)
142
154
  </details>
143
155
 
144
156
  <details open>
@@ -183,15 +195,16 @@ This project started with the "threads of thought" and "iterative refinement" id
183
195
  <summary><h3>🗺️ Roadmap</h3></summary>
184
196
 
185
197
  - Recent Achievements
186
- - [v0.1.2](#recent-achievements-v012)
187
- - [v0.0.3 - v0.1.1](#previous-achievements-v003---v011)
198
+ - [v0.1.4](#recent-achievements-v014)
199
+ - [v0.1.3](#recent-achievements-v013)
200
+ - [v0.0.3 - v0.1.2](#previous-achievements-v003---v012)
188
201
  - [Key Future Enhancements](#key-future-enhancements)
189
202
  - Bug Fixes & Backend Improvements
190
203
  - Advanced Agent Collaboration
191
204
  - Expanded Model, Tool & Agent Integrations
192
205
  - Improved Performance & Scalability
193
206
  - Enhanced Developer Experience
194
- - [v0.1.3 Roadmap](#v013-roadmap)
207
+ - [v0.1.5 Roadmap](#v015-roadmap)
195
208
  </details>
196
209
 
197
210
  <details open>
@@ -216,36 +229,37 @@ This project started with the "threads of thought" and "iterative refinement" id
216
229
 
217
230
  ---
218
231
 
219
- ## 🆕 Latest Features (v0.1.2)
232
+ ## 🆕 Latest Features (v0.1.4)
220
233
 
221
- **🎉 Released: October 22, 2025**
234
+ **🎉 Released: October 27, 2025**
222
235
 
223
- **What's New in v0.1.2:**
224
- - **🧠 Intelligent Planning Mode** - Automatic question analysis for safe MCP tool blocking
225
- - **🎭 Claude 4.5 Haiku Support** - Access to latest Claude Haiku model
226
- - **🔍 Grok Web Search Fix** - Improved web search functionality in Grok backend
236
+ **What's New in v0.1.4:**
237
+ - **🎨 Multimodal Generation Tools** - Create images, videos, audio, and documents with AI
238
+ - **🔒 Binary File Protection** - Automatic security preventing accidental binary file reads
239
+ - **🕷️ Crawl4AI Integration** - Intelligent web scraping with LLM-powered extraction
227
240
 
228
241
  **Key Improvements:**
229
- - Automatically determines if questions require irreversible operations
230
- - Read-only MCP operations allowed during coordination for better decisions
231
- - Write operations automatically blocked for safety
232
- - Zero configuration required - works transparently
233
- - Enhanced model support with latest Claude 4.5 Haiku
242
+ - 6 new generation tools: text-to-image, text-to-video, text-to-speech, text-to-file, image-to-image
243
+ - Binary file protection for 40+ file types with smart tool suggestions
244
+ - Web crawling with customizable extraction patterns
245
+ - Enhanced documentation and automation infrastructure
234
246
 
235
- **Get Started with v0.1.2:**
247
+ **Get Started with v0.1.4:**
236
248
  ```bash
237
249
  # Install or upgrade from PyPI
238
250
  pip install --upgrade massgen
239
251
 
240
- # Try intelligent planning mode with MCP tools
241
- # (Please read the YAML file for required API keys: DISCORD_TOKEN, OPENAI_API_KEY, etc.)
242
- massgen --config @examples/tools/planning/five_agents_discord_mcp_planning_mode \
243
- "Check recent messages in our development channel, summarize the discussion, and post a helpful response about the current topic."
252
+ # Generate an image from text
253
+ massgen --config @examples/tools/custom_tools/multimodal_tools/text_to_image_generation_single \
254
+ "Please generate an image of a cat in space."
244
255
 
245
- # Use latest Claude 4.5 Haiku model
246
- # (Requires ANTHROPIC_API_KEY in .env)
247
- massgen --model claude-haiku-4-5-20251001 \
248
- "Summarize the latest AI developments"
256
+ # Generate a video from text
257
+ massgen --config @examples/tools/custom_tools/multimodal_tools/text_to_video_generation_single \
258
+ "Generate a 4 seconds video with neon-lit alley at night, light rain, slow push-in, cinematic."
259
+
260
+ # Generate documents (PDF, DOCX, etc.)
261
+ massgen --config @examples/tools/custom_tools/multimodal_tools/text_to_file_generation_single \
262
+ "Please generate a comprehensive technical report about the latest developments in Large Language Models (LLMs)."
249
263
  ```
250
264
 
251
265
  → [See full release history and examples](massgen/configs/README.md#release-history--examples)
@@ -949,7 +963,7 @@ massgen --config @examples/tools/code-execution/multi_agent_playwright_automatio
949
963
  "Navigate to https://news.ycombinator.com, extract the top 10 stories, and create a summary report"
950
964
  ```
951
965
 
952
- → [**See detailed case studies**](docs/case_studies/README.md) with real session logs and outcomes
966
+ → [**See detailed case studies**](docs/source/examples/case_studies/README.md) with real session logs and outcomes
953
967
 
954
968
  #### Interactive Mode & Advanced Usage
955
969
 
@@ -1070,7 +1084,7 @@ All sessions are automatically logged with detailed information for debugging an
1070
1084
 
1071
1085
  To see how MassGen works in practice, check out these detailed case studies based on real session logs:
1072
1086
 
1073
- - [**MassGen Case Studies**](docs/case_studies/README.md)
1087
+ - [**MassGen Case Studies**](docs/source/examples/case_studies/README.md)
1074
1088
  - [**Case Studies Documentation**](https://docs.massgen.ai/en/latest/examples/case_studies.html) - Browse case studies online
1075
1089
 
1076
1090
  ---
@@ -1078,33 +1092,49 @@ To see how MassGen works in practice, check out these detailed case studies base
1078
1092
 
1079
1093
  ## 🗺️ Roadmap
1080
1094
 
1081
- MassGen is currently in its foundational stage, with a focus on parallel, asynchronous multi-agent collaboration and orchestration. Our roadmap is centered on transforming this foundation into a highly robust, intelligent, and user-friendly system, while enabling frontier research and exploration. An earlier version of MassGen can be found [here](./massgen/v1).
1095
+ MassGen is currently in its foundational stage, with a focus on parallel, asynchronous multi-agent collaboration and orchestration. Our roadmap is centered on transforming this foundation into a highly robust, intelligent, and user-friendly system, while enabling frontier research and exploration.
1082
1096
 
1083
1097
  ⚠️ **Early Stage Notice:** As MassGen is in active development, please expect upcoming breaking architecture changes as we continue to refine and improve the system.
1084
1098
 
1085
- ### Recent Achievements (v0.1.2)
1099
+ ### Recent Achievements (v0.1.4)
1100
+
1101
+ **🎉 Released: October 27, 2025**
1102
+
1103
+ #### Multimodal Generation Tools
1104
+ - **Text-to-Image**: `text_to_image_generation` tool creates images from text prompts via DALL-E API
1105
+ - **Text-to-Video**: `text_to_video_generation` tool generates videos from text descriptions
1106
+ - **Text-to-Speech**: `text_to_speech_continue_generation` and `text_to_speech_transcription_generation` tools for audio generation and transcription
1107
+ - **Text-to-File**: `text_to_file_generation` tool creates documents in PDF, DOCX, XLSX, and PPTX formats
1108
+ - **Image-to-Image**: `image_to_image_generation` tool transforms existing images
1086
1109
 
1087
- **🎉 Released: October 22, 2025**
1110
+ #### Binary File Protection
1111
+ - **Automatic Blocking**: `PathPermissionManager` now prevents text-based read tools from accessing binary files
1112
+ - **Protected File Types**: 40+ extensions including images (.jpg, .png), videos (.mp4, .avi), audio (.mp3, .wav), archives (.zip, .tar), executables (.exe, .dll), and Office documents (.pdf, .docx, .xlsx, .pptx)
1113
+ - **Intelligent Guidance**: Error messages automatically suggest appropriate specialized tools (e.g., "use understand_image tool" for .jpg files)
1114
+ - **Test Coverage**: `test_binary_file_blocking.py`
1088
1115
 
1089
- #### Intelligent Planning Mode
1090
- - **Automatic Question Analysis**: New `_analyze_question_irreversibility()` method in orchestrator determines if MCP operations are reversible
1091
- - **Selective Tool Blocking**: Granular control with `set_planning_mode_blocked_tools()`, `get_planning_mode_blocked_tools()`, and `is_mcp_tool_blocked()` methods
1092
- - **Dynamic Behavior**: Read-only MCP operations allowed during coordination, write operations blocked for safety
1093
- - **Zero Configuration**: Works transparently without setup
1094
- - **Multi-Workspace Support**: Planning mode works across different workspaces without conflicts
1095
- - **Test Coverage**: Comprehensive tests in `massgen/tests/test_intelligent_planning_mode.py`
1096
- - **Documentation**: Complete guide in `docs/case_studies/INTELLIGENT_PLANNING_MODE.md`
1116
+ #### Web Scraping Capabilities
1117
+ - **Crawl4AI Tool**: `crawl4ai_tool` enables intelligent web scraping with LLM-powered content extraction and customizable patterns
1097
1118
 
1098
- #### Model Support & Improvements
1099
- - **Claude 4.5 Haiku**: Added latest Claude Haiku model `claude-haiku-4-5-20251001`
1100
- - **Model Priority Updates**: Reorganized Claude model list with updated defaults (`claude-sonnet-4-5-20250929`)
1101
- - **Grok Web Search Fix**: Resolved `extra_body` parameter handling for Grok's Live Search API with new `_add_grok_search_params()` method
1119
+ #### Documentation & Infrastructure
1120
+ - **Generation Tools**: 8 multimodal generation configurations
1121
+ - `text_to_image_generation_single.yaml` and `text_to_image_generation_multi.yaml`
1122
+ - `text_to_video_generation_single.yaml` and `text_to_video_generation_multi.yaml`
1123
+ - `text_to_speech_generation_single.yaml` and `text_to_speech_generation_multi.yaml`
1124
+ - `text_to_file_generation_single.yaml` and `text_to_file_generation_multi.yaml`
1125
+ - **Web Scraping**: `crawl4ai_example.yaml` for Crawl4AI integration
1102
1126
 
1103
- #### Configuration Updates
1104
- - **Planning Mode Configs**: Updated 5 configurations in `massgen/configs/tools/planning/` with selective blocking examples
1105
- - **Default Configuration**: Updated `three_agents_default.yaml` with Grok-4-fast model
1127
+ ### Previous Achievements (v0.0.3 - v0.1.3)
1106
1128
 
1107
- ### Previous Achievements (v0.0.3 - v0.1.1)
1129
+ **Post-Evaluation Workflow (v0.1.3)**: `PostEvaluationToolkit` class with submit tool for confirming final answers and restart tool for orchestration restart with feedback, winning agent evaluates answer before submission, universal backend support (Claude, Response API, Chat Completions), opt-in via `enable_post_evaluation_tools` parameter
1130
+
1131
+ ✅ **Multimodal Understanding Tools (v0.1.3)**: `understand_image` for PNG/JPEG analysis, `understand_audio` for WAV/MP3 transcription, `understand_video` for MP4/AVI frame extraction, `understand_file` for PDF/DOCX processing, cross-backend support via OpenAI GPT-4.1, structured JSON output, configurations: `understand_image.yaml`, `understand_audio.yaml`, `understand_video.yaml`, `understand_file.yaml`
1132
+
1133
+ ✅ **Docker Sudo Mode (v0.1.3)**: `use_sudo` parameter for privileged Docker execution, system-level command support in containers, enhanced security documentation, test coverage in `test_code_execution.py`
1134
+
1135
+ ✅ **Intelligent Planning Mode (v0.1.2)**: Automatic question analysis determining operation irreversibility via `_analyze_question_irreversibility()` in orchestrator, selective tool blocking with `set_planning_mode_blocked_tools()` and `is_mcp_tool_blocked()` methods, read-only MCP operations during coordination with write operations blocked, zero-configuration transparent operation, multi-workspace support, comprehensive tests in `test_intelligent_planning_mode.py`, complete guide in `docs/dev_notes/intelligent_planning_mode.md`
1136
+
1137
+ ✅ **Model Updates (v0.1.2)**: Claude 4.5 Haiku model `claude-haiku-4-5-20251001`, reorganized Claude model priorities with `claude-sonnet-4-5-20250929` default, Grok web search fix with `_add_grok_search_params()` method for proper `extra_body` parameter handling, 5 updated planning mode configurations in `configs/tools/planning/`, updated `three_agents_default.yaml` with Grok-4-fast
1108
1138
 
1109
1139
  ✅ **Custom Tools System (v0.1.1)**: User-defined Python function registration using `ToolManager` class in `massgen/tool/_manager.py`, cross-backend support alongside MCP servers, builtin/MCP/custom tool categories with automatic discovery, 40+ examples in `massgen/configs/tools/custom_tools/`, voting sensitivity controls with three-tier quality system (lenient/balanced/strict), answer novelty detection preventing duplicates
1110
1140
 
@@ -1208,19 +1238,21 @@ MassGen is currently in its foundational stage, with a focus on parallel, asynch
1208
1238
 
1209
1239
  We welcome community contributions to achieve these goals.
1210
1240
 
1211
- ### v0.1.3 Roadmap
1241
+ ### v0.1.5 Roadmap
1212
1242
 
1213
- Version 0.1.3 focuses on general interoperability and enterprise collaboration:
1243
+ Version 0.1.5 focuses on Docker integration for MCP tools and backend code refactoring:
1214
1244
 
1215
1245
  #### Required Features
1216
- - **General Interoperability**: Enable MassGen to orchestrate agents from multiple external frameworks with unified interface
1217
- - **Final Agent Submit/Restart Tools**: Enable final agent to decide whether to submit or restart orchestration
1246
+ - **Running MCP Tools in Docker**: Containerized execution environment for MCP tools with enhanced security and isolation
1247
+ - **Backend Code Refactoring**: Major code refactoring for improved maintainability and developer experience
1218
1248
 
1219
1249
  Key technical approach:
1220
- - **Framework Integration**: Multi-agent coordination supporting external agent frameworks with specialized agent roles (researcher, analyst, critic, synthesizer)
1221
- - **Submit/Restart**: Multi-step task verification with access to previous agents' responses and workspaces
1250
+ - **Docker Integration**: Secure execution of third-party MCP tools in isolated Docker containers with resource limits and network isolation
1251
+ - **Backend Improvements**: Enhanced code organization, modularity, and architectural improvements for better maintainability
1252
+
1253
+ **Target Release**: October 30, 2025 (Wednesday @ 9am PT)
1222
1254
 
1223
- For detailed milestones and technical specifications, see the [full v0.1.3 roadmap](ROADMAP.md).
1255
+ For detailed milestones and technical specifications, see the [full v0.1.5 roadmap](ROADMAP_v0.1.5.md).
1224
1256
 
1225
1257
  ---
1226
1258