massgen 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of massgen might be problematic. Click here for more details.
- massgen/__init__.py +1 -1
- massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
- massgen/api_params_handler/_claude_api_params_handler.py +4 -0
- massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
- massgen/api_params_handler/_response_api_params_handler.py +4 -0
- massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
- massgen/backend/docs/permissions_and_context_files.md +2 -2
- massgen/backend/response.py +2 -0
- massgen/configs/README.md +49 -40
- massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +1 -1
- massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +1 -1
- massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +1 -1
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +1 -1
- massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +1 -1
- massgen/filesystem_manager/_filesystem_manager.py +1 -0
- massgen/filesystem_manager/_path_permission_manager.py +148 -0
- massgen/message_templates.py +160 -12
- massgen/orchestrator.py +16 -0
- massgen/tests/test_binary_file_blocking.py +274 -0
- massgen/tests/test_case_studies.md +12 -12
- massgen/tests/test_multimodal_size_limits.py +407 -0
- massgen/tool/_manager.py +7 -2
- massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
- massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
- massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
- massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
- massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
- massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
- massgen/tool/_multimodal_tools/understand_audio.py +19 -1
- massgen/tool/_multimodal_tools/understand_file.py +6 -1
- massgen/tool/_multimodal_tools/understand_image.py +112 -8
- massgen/tool/_multimodal_tools/understand_video.py +32 -5
- massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
- massgen/tool/docs/multimodal_tools.md +589 -0
- {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/METADATA +96 -69
- {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/RECORD +49 -40
- massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +0 -67
- massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +0 -68
- massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +0 -98
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +0 -54
- massgen/configs/tools/memory/README.md +0 -199
- massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +0 -131
- massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +0 -133
- massgen/configs/tools/memory/test_context_window_management.py +0 -286
- massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +0 -97
- {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/WHEEL +0 -0
- {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/entry_points.txt +0 -0
- {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/top_level.txt +0 -0
massgen/__init__.py
CHANGED
|
@@ -24,6 +24,10 @@ class ChatCompletionsAPIParamsHandler(APIParamsHandlerBase):
|
|
|
24
24
|
"allowed_tools",
|
|
25
25
|
"exclude_tools",
|
|
26
26
|
"custom_tools", # Custom tools configuration (processed separately)
|
|
27
|
+
"enable_file_generation", # Internal flag for file generation (used in system messages only)
|
|
28
|
+
"enable_image_generation", # Internal flag for image generation (used in system messages only)
|
|
29
|
+
"enable_audio_generation", # Internal flag for audio generation (used in system messages only)
|
|
30
|
+
"enable_video_generation", # Internal flag for video generation (used in system messages only)
|
|
27
31
|
},
|
|
28
32
|
)
|
|
29
33
|
|
|
@@ -24,6 +24,10 @@ class ClaudeAPIParamsHandler(APIParamsHandlerBase):
|
|
|
24
24
|
"exclude_tools",
|
|
25
25
|
"custom_tools", # Custom tools configuration (processed separately)
|
|
26
26
|
"_has_files_api_files",
|
|
27
|
+
"enable_file_generation", # Internal flag for file generation (used in system messages only)
|
|
28
|
+
"enable_image_generation", # Internal flag for image generation (used in system messages only)
|
|
29
|
+
"enable_audio_generation", # Internal flag for audio generation (used in system messages only)
|
|
30
|
+
"enable_video_generation", # Internal flag for video generation (used in system messages only)
|
|
27
31
|
},
|
|
28
32
|
)
|
|
29
33
|
|
|
@@ -19,6 +19,10 @@ class GeminiAPIParamsHandler(APIParamsHandlerBase):
|
|
|
19
19
|
"allowed_tools",
|
|
20
20
|
"exclude_tools",
|
|
21
21
|
"custom_tools",
|
|
22
|
+
"enable_file_generation", # Internal flag for file generation (used in system messages only)
|
|
23
|
+
"enable_image_generation", # Internal flag for image generation (used in system messages only)
|
|
24
|
+
"enable_audio_generation", # Internal flag for audio generation (used in system messages only)
|
|
25
|
+
"enable_video_generation", # Internal flag for video generation (used in system messages only)
|
|
22
26
|
}
|
|
23
27
|
return set(base) | extra
|
|
24
28
|
|
|
@@ -24,6 +24,10 @@ class ResponseAPIParamsHandler(APIParamsHandlerBase):
|
|
|
24
24
|
"exclude_tools",
|
|
25
25
|
"custom_tools", # Custom tools configuration (processed separately)
|
|
26
26
|
"_has_file_search_files", # Internal flag for file search tracking
|
|
27
|
+
"enable_file_generation", # Internal flag for file generation (used in system messages only)
|
|
28
|
+
"enable_image_generation", # Internal flag for image generation (used in system messages only)
|
|
29
|
+
"enable_audio_generation", # Internal flag for audio generation (used in system messages only)
|
|
30
|
+
"enable_video_generation", # Internal flag for video generation (used in system messages only)
|
|
27
31
|
},
|
|
28
32
|
)
|
|
29
33
|
|
|
@@ -284,9 +284,19 @@ class CustomToolAndMCPBackend(LLMBackend):
|
|
|
284
284
|
|
|
285
285
|
# Register each function with its corresponding values
|
|
286
286
|
for i, func in enumerate(functions):
|
|
287
|
+
# Inject agent_cwd into preset_args if filesystem_manager is available
|
|
288
|
+
final_preset_args = preset_args_list[i].copy() if preset_args_list[i] else {}
|
|
289
|
+
if self.filesystem_manager and self.filesystem_manager.cwd:
|
|
290
|
+
final_preset_args["agent_cwd"] = self.filesystem_manager.cwd
|
|
291
|
+
logger.info(f"Injecting agent_cwd for {func}: {self.filesystem_manager.cwd}")
|
|
292
|
+
elif self.filesystem_manager:
|
|
293
|
+
logger.warning(f"filesystem_manager exists but cwd is None for {func}")
|
|
294
|
+
else:
|
|
295
|
+
logger.warning(f"No filesystem_manager available for {func}")
|
|
296
|
+
|
|
287
297
|
# Load the function first if custom name is needed
|
|
288
298
|
if names[i] and names[i] != func:
|
|
289
|
-
#
|
|
299
|
+
# Load function to apply custom name
|
|
290
300
|
if path:
|
|
291
301
|
loaded_func = self.custom_tool_manager._load_function_from_path(path, func)
|
|
292
302
|
else:
|
|
@@ -296,7 +306,6 @@ class CustomToolAndMCPBackend(LLMBackend):
|
|
|
296
306
|
logger.error(f"Could not load function '{func}' from path: {path}")
|
|
297
307
|
continue
|
|
298
308
|
|
|
299
|
-
# Apply custom name by modifying __name__ attribute
|
|
300
309
|
loaded_func.__name__ = names[i]
|
|
301
310
|
|
|
302
311
|
# Register with loaded function (no path needed)
|
|
@@ -304,7 +313,7 @@ class CustomToolAndMCPBackend(LLMBackend):
|
|
|
304
313
|
path=None,
|
|
305
314
|
func=loaded_func,
|
|
306
315
|
category=category,
|
|
307
|
-
preset_args=
|
|
316
|
+
preset_args=final_preset_args,
|
|
308
317
|
description=descriptions[i],
|
|
309
318
|
)
|
|
310
319
|
else:
|
|
@@ -313,7 +322,7 @@ class CustomToolAndMCPBackend(LLMBackend):
|
|
|
313
322
|
path=path,
|
|
314
323
|
func=func,
|
|
315
324
|
category=category,
|
|
316
|
-
preset_args=
|
|
325
|
+
preset_args=final_preset_args,
|
|
317
326
|
description=descriptions[i],
|
|
318
327
|
)
|
|
319
328
|
|
|
@@ -404,9 +413,19 @@ class CustomToolAndMCPBackend(LLMBackend):
|
|
|
404
413
|
"""
|
|
405
414
|
import json
|
|
406
415
|
|
|
416
|
+
# Parse arguments
|
|
417
|
+
arguments = json.loads(call["arguments"]) if isinstance(call["arguments"], str) else call["arguments"]
|
|
418
|
+
|
|
419
|
+
# Ensure agent_cwd is always injected if filesystem_manager is available
|
|
420
|
+
# This provides a fallback in case preset_args didn't work during registration
|
|
421
|
+
if self.filesystem_manager and self.filesystem_manager.cwd:
|
|
422
|
+
if "agent_cwd" not in arguments or arguments.get("agent_cwd") is None:
|
|
423
|
+
arguments["agent_cwd"] = self.filesystem_manager.cwd
|
|
424
|
+
logger.info(f"Dynamically injected agent_cwd at execution time: {self.filesystem_manager.cwd}")
|
|
425
|
+
|
|
407
426
|
tool_request = {
|
|
408
427
|
"name": call["name"],
|
|
409
|
-
"input":
|
|
428
|
+
"input": arguments,
|
|
410
429
|
}
|
|
411
430
|
|
|
412
431
|
result_text = ""
|
|
@@ -1120,6 +1139,7 @@ class CustomToolAndMCPBackend(LLMBackend):
|
|
|
1120
1139
|
**kwargs,
|
|
1121
1140
|
) -> AsyncGenerator[StreamChunk, None]:
|
|
1122
1141
|
"""Simple passthrough streaming without MCP processing."""
|
|
1142
|
+
|
|
1123
1143
|
agent_id = kwargs.get("agent_id", None)
|
|
1124
1144
|
all_params = {**self.config, **kwargs}
|
|
1125
1145
|
processed_messages = await self._process_upload_files(messages, all_params)
|
|
@@ -1067,8 +1067,8 @@ Files delivered:
|
|
|
1067
1067
|
- **Multi-Turn Design**: `docs/dev_notes/multi_turn_filesystem_design.md` - Detailed architecture for session persistence and turn-based workflows
|
|
1068
1068
|
- **MCP Integration**: `docs/dev_notes/gemini_filesystem_mcp_design.md` - How filesystem access works through Model Context Protocol
|
|
1069
1069
|
- **Context Sharing**: `docs/dev_notes/v0.0.14-context.md` - Original context sharing design
|
|
1070
|
-
- **User Context Paths**: `docs/case_studies/user-context-path-support-with-copy-mcp.md` - Case study on adding user-specified paths
|
|
1071
|
-
- **Claude Code Workspace**: `docs/case_studies/claude-code-workspace-management.md` - Native filesystem integration patterns
|
|
1070
|
+
- **User Context Paths**: `docs/source/examples/case_studies/user-context-path-support-with-copy-mcp.md` - Case study on adding user-specified paths
|
|
1071
|
+
- **Claude Code Workspace**: `docs/source/examples/case_studies/claude-code-workspace-management.md` - Native filesystem integration patterns
|
|
1072
1072
|
|
|
1073
1073
|
## Conclusion
|
|
1074
1074
|
|
massgen/backend/response.py
CHANGED
|
@@ -57,6 +57,7 @@ class ResponseBackend(CustomToolAndMCPBackend):
|
|
|
57
57
|
|
|
58
58
|
Wraps parent implementation to ensure File Search cleanup happens after streaming completes.
|
|
59
59
|
"""
|
|
60
|
+
|
|
60
61
|
try:
|
|
61
62
|
async for chunk in super().stream_with_tools(messages, tools, **kwargs):
|
|
62
63
|
yield chunk
|
|
@@ -145,6 +146,7 @@ class ResponseBackend(CustomToolAndMCPBackend):
|
|
|
145
146
|
**kwargs,
|
|
146
147
|
) -> AsyncGenerator[StreamChunk, None]:
|
|
147
148
|
"""Recursively stream MCP responses, executing function calls as needed."""
|
|
149
|
+
|
|
148
150
|
agent_id = kwargs.get("agent_id")
|
|
149
151
|
|
|
150
152
|
# Build API params for this iteration
|
massgen/configs/README.md
CHANGED
|
@@ -227,53 +227,62 @@ Most configurations use environment variables for API keys:so
|
|
|
227
227
|
|
|
228
228
|
## Release History & Examples
|
|
229
229
|
|
|
230
|
-
### v0.1.
|
|
231
|
-
**New Features:**
|
|
230
|
+
### v0.1.4 - Latest
|
|
231
|
+
**New Features:** Multimodal Generation Tools, Binary File Protection, Crawl4AI Integration
|
|
232
232
|
|
|
233
233
|
**Configuration Files:**
|
|
234
|
-
- `
|
|
235
|
-
- `
|
|
236
|
-
- `
|
|
237
|
-
- `
|
|
234
|
+
- `text_to_image_generation_single.yaml` / `text_to_image_generation_multi.yaml` - Image generation
|
|
235
|
+
- `text_to_video_generation_single.yaml` / `text_to_video_generation_multi.yaml` - Video generation
|
|
236
|
+
- `text_to_speech_generation_single.yaml` / `text_to_speech_generation_multi.yaml` - Audio generation
|
|
237
|
+
- `text_to_file_generation_single.yaml` / `text_to_file_generation_multi.yaml` - Document generation
|
|
238
|
+
- `crawl4ai_example.yaml` - Web scraping configuration
|
|
238
239
|
|
|
239
240
|
**Documentation:**
|
|
240
|
-
- `
|
|
241
|
-
- `docs/
|
|
242
|
-
- `docs/source/user_guide/
|
|
243
|
-
-
|
|
244
|
-
|
|
245
|
-
**Case Study:**
|
|
246
|
-
- [Multimodal Video Understanding](../../docs/case_studies/multimodal-case-study-video-analysis.md)
|
|
247
|
-
|
|
248
|
-
**Example Resources:**
|
|
249
|
-
- `configs/resources/v0.1.3-example/multimodality.jpg` - Image example
|
|
250
|
-
- `configs/resources/v0.1.3-example/Sherlock_Holmes.mp3` - Audio example
|
|
251
|
-
- `configs/resources/v0.1.3-example/oppenheimer_trailer_1920.mp4` - Video example
|
|
252
|
-
- `configs/resources/v0.1.3-example/TUMIX.pdf` - PDF document example
|
|
241
|
+
- `README_PYPI.md` - Standalone PyPI package documentation
|
|
242
|
+
- `docs/dev_notes/release_checklist.md` - Release workflow guide
|
|
243
|
+
- `docs/source/user_guide/protected_paths.rst` - Binary file protection documentation
|
|
244
|
+
- `.github/workflows/docs-automation.yml` - Documentation CI/CD automation
|
|
253
245
|
|
|
254
246
|
**Key Features:**
|
|
255
|
-
- **
|
|
256
|
-
- **
|
|
257
|
-
- **
|
|
258
|
-
- **
|
|
247
|
+
- **Generation Tools**: Create images, videos, audio, and documents using OpenAI APIs
|
|
248
|
+
- **Binary File Protection**: Automatic blocking prevents text tools from reading 40+ binary file types
|
|
249
|
+
- **Web Scraping**: Crawl4AI integration for intelligent content extraction
|
|
250
|
+
- **Enhanced Security**: Smart tool suggestions guide users to appropriate specialized tools
|
|
259
251
|
|
|
260
252
|
**Try it:**
|
|
261
253
|
```bash
|
|
262
254
|
# Install or upgrade
|
|
263
255
|
pip install --upgrade massgen
|
|
264
256
|
|
|
257
|
+
# Generate an image from text
|
|
258
|
+
massgen --config @examples/tools/custom_tools/multimodal_tools/text_to_image_generation_single \
|
|
259
|
+
"Please generate an image of a cat in space."
|
|
260
|
+
|
|
261
|
+
# Generate a video from text
|
|
262
|
+
massgen --config @examples/tools/custom_tools/multimodal_tools/text_to_video_generation_single \
|
|
263
|
+
"Generate a 4 seconds video with neon-lit alley at night, light rain, slow push-in, cinematic."
|
|
264
|
+
|
|
265
|
+
# Generate documents (PDF, DOCX, etc.)
|
|
266
|
+
massgen --config @examples/tools/custom_tools/multimodal_tools/text_to_file_generation_single \
|
|
267
|
+
"Please generate a comprehensive technical report about the latest developments in Large Language Models (LLMs)."
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### v0.1.3
|
|
271
|
+
**New Features:** Post-Evaluation Workflow, Custom Multimodal Understanding Tools, Docker Sudo Mode
|
|
272
|
+
|
|
273
|
+
**Configuration Files:**
|
|
274
|
+
- `understand_image.yaml`, `understand_audio.yaml`, `understand_video.yaml`, `understand_file.yaml`
|
|
275
|
+
|
|
276
|
+
**Key Features:**
|
|
277
|
+
- **Post-Evaluation Tools**: Submit and restart capabilities for winning agents
|
|
278
|
+
- **Multimodal Understanding**: Analyze images, audio, video, and documents
|
|
279
|
+
- **Docker Sudo Mode**: Execute privileged commands in containers
|
|
280
|
+
|
|
281
|
+
**Try it:**
|
|
282
|
+
```bash
|
|
265
283
|
# Try multimodal image understanding
|
|
266
|
-
# (Requires OPENAI_API_KEY in .env)
|
|
267
284
|
massgen --config @examples/tools/custom_tools/multimodal_tools/understand_image \
|
|
268
285
|
"Please summarize the content in this image."
|
|
269
|
-
|
|
270
|
-
# Try multimodal audio understanding
|
|
271
|
-
massgen --config @examples/tools/custom_tools/multimodal_tools/understand_audio \
|
|
272
|
-
"Please summarize the content in this audio."
|
|
273
|
-
|
|
274
|
-
# Try multimodal video understanding
|
|
275
|
-
massgen --config @examples/tools/custom_tools/multimodal_tools/understand_video \
|
|
276
|
-
"What's happening in this video?"
|
|
277
286
|
```
|
|
278
287
|
|
|
279
288
|
### v0.1.2
|
|
@@ -284,7 +293,7 @@ massgen --config @examples/tools/custom_tools/multimodal_tools/understand_video
|
|
|
284
293
|
- `configs/basic/multi/three_agents_default.yaml` - Updated with Grok-4-fast model
|
|
285
294
|
|
|
286
295
|
**Documentation:**
|
|
287
|
-
- `docs/
|
|
296
|
+
- `docs/dev_notes/intelligent_planning_mode.md` - Complete intelligent planning mode guide
|
|
288
297
|
|
|
289
298
|
**Key Features:**
|
|
290
299
|
- **Intelligent Planning Mode**: Automatic analysis of question irreversibility for dynamic MCP tool blocking
|
|
@@ -392,7 +401,7 @@ massgen --config @examples/tools/code-execution/docker_with_resource_limits \
|
|
|
392
401
|
- `massgen/configs/basic/single/single_gpt4o_video_generation.yaml` - Video generation with OpenAI Sora-2
|
|
393
402
|
|
|
394
403
|
**Case Study:**
|
|
395
|
-
- [Universal Code Execution via MCP](../../docs/case_studies/universal-code-execution-mcp.md)
|
|
404
|
+
- [Universal Code Execution via MCP](../../docs/source/examples/case_studies/universal-code-execution-mcp.md)
|
|
396
405
|
|
|
397
406
|
**Key Features:**
|
|
398
407
|
- Universal `execute_command` tool works across Claude, Gemini, OpenAI (Response API), and Chat Completions providers (Grok, ZAI, etc.)
|
|
@@ -465,7 +474,7 @@ massgen --config @examples/tools/filesystem/cc_gpt5_gemini_filesystem \
|
|
|
465
474
|
- New `FileOperationTracker` class for read-before-delete enforcement
|
|
466
475
|
- Enhanced PathPermissionManager with operation tracking methods
|
|
467
476
|
|
|
468
|
-
**Case Study:** [MCP Planning Mode](../../docs/case_studies/mcp-planning-mode.md)
|
|
477
|
+
**Case Study:** [MCP Planning Mode](../../docs/source/examples/case_studies/mcp-planning-mode.md)
|
|
469
478
|
|
|
470
479
|
**Try it:**
|
|
471
480
|
```bash
|
|
@@ -492,7 +501,7 @@ massgen --config @examples/tools/planning/five_agents_twitter_mcp_planning_mode
|
|
|
492
501
|
- New `ExternalAgentBackend` class bridging MassGen with external frameworks
|
|
493
502
|
- Multiple code executor types: LocalCommandLineCodeExecutor, DockerCommandLineCodeExecutor, JupyterCodeExecutor, YepCodeCodeExecutor
|
|
494
503
|
|
|
495
|
-
**Case Study:** [AG2 Framework Integration](../../docs/case_studies/ag2-framework-integration.md)
|
|
504
|
+
**Case Study:** [AG2 Framework Integration](../../docs/source/examples/case_studies/ag2-framework-integration.md)
|
|
496
505
|
|
|
497
506
|
**Try it:**
|
|
498
507
|
```bash
|
|
@@ -561,7 +570,7 @@ massgen --config @examples/tools/filesystem/gemini_gpt5nano_file_context_path \
|
|
|
561
570
|
- Automatic `.massgen` directory management for persistent conversation context
|
|
562
571
|
- Enhanced path permissions with `will_be_writable` flag and smart exclusion patterns
|
|
563
572
|
|
|
564
|
-
**Case Study:** [Multi-Turn Filesystem Support](../../docs/case_studies/multi-turn-filesystem-support.md)
|
|
573
|
+
**Case Study:** [Multi-Turn Filesystem Support](../../docs/source/examples/case_studies/multi-turn-filesystem-support.md)
|
|
565
574
|
```bash
|
|
566
575
|
# Turn 1 - Initial creation
|
|
567
576
|
Turn 1: Make a website about Bob Dylan
|
|
@@ -599,7 +608,7 @@ massgen --config @examples/basic/multi/two_qwen_vllm \
|
|
|
599
608
|
- All configs now organized by provider & use case (basic/, providers/, tools/, teams/)
|
|
600
609
|
- Use same configs as v0.0.21 for compatibility, but now with improved performance
|
|
601
610
|
|
|
602
|
-
**Case Study:** [Advanced Filesystem with User Context Path Support](../../docs/case_studies/v0.0.21-v0.0.22-filesystem-permissions.md)
|
|
611
|
+
**Case Study:** [Advanced Filesystem with User Context Path Support](../../docs/source/examples/case_studies/v0.0.21-v0.0.22-filesystem-permissions.md)
|
|
603
612
|
```bash
|
|
604
613
|
# Multi-agent collaboration with granular filesystem permissions
|
|
605
614
|
massgen --config @examples/tools/filesystem/gpt5mini_cc_fs_context_path "Enhance the website in massgen/configs/resources with: 1) A dark/light theme toggle with smooth transitions, 2) An interactive feature that helps users engage with the blog content (your choice - could be search, filtering by topic, reading time estimates, social sharing, reactions, etc.), and 3) Visual polish with CSS animations or transitions that make the site feel more modern and responsive. Use vanilla JavaScript and be creative with the implementation details."
|
|
@@ -645,7 +654,7 @@ massgen --config @examples/tools/mcp/gpt5_nano_mcp_example \
|
|
|
645
654
|
|
|
646
655
|
### v0.0.16
|
|
647
656
|
**New Features:** Unified Filesystem Support with MCP Integration
|
|
648
|
-
**Case Study:** [Cross-Backend Collaboration with Gemini MCP Filesystem](../../docs/case_studies/unified-filesystem-mcp-integration.md)
|
|
657
|
+
**Case Study:** [Cross-Backend Collaboration with Gemini MCP Filesystem](../../docs/source/examples/case_studies/unified-filesystem-mcp-integration.md)
|
|
649
658
|
```bash
|
|
650
659
|
# Gemini and Claude Code agents with unified filesystem via MCP
|
|
651
660
|
massgen --config @examples/tools/mcp/gemini_mcp_filesystem_test_with_claude_code "Create a presentation that teaches a reinforcement learning algorithm and output it in LaTeX Beamer format. No figures should be added."
|
|
@@ -658,7 +667,7 @@ massgen --config @examples/tools/mcp/gemini_mcp_filesystem_test_with_claude_code
|
|
|
658
667
|
|
|
659
668
|
### v0.0.12 - v0.0.14
|
|
660
669
|
**New Features:** Enhanced Logging and Workspace Management
|
|
661
|
-
**Case Study:** [Claude Code Workspace Management with Comprehensive Logging](../../docs/case_studies/claude-code-workspace-management.md)
|
|
670
|
+
**Case Study:** [Claude Code Workspace Management with Comprehensive Logging](../../docs/source/examples/case_studies/claude-code-workspace-management.md)
|
|
662
671
|
```bash
|
|
663
672
|
# Multi-agent Claude Code collaboration with enhanced workspace isolation
|
|
664
673
|
massgen --config @examples/tools/filesystem/claude_code_context_sharing "Create a website about a diverse set of fun facts about LLMs, placing the output in one index.html file"
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# MassGen Configuration: Crawl4AI Web Scraping via Custom Tools
|
|
2
|
+
#
|
|
3
|
+
# Prerequisites:
|
|
4
|
+
# 1. Start crawl4ai Docker container (one-time setup):
|
|
5
|
+
# docker pull unclecode/crawl4ai:latest
|
|
6
|
+
# docker run -d -p 11235:11235 --name crawl4ai --shm-size=1g unclecode/crawl4ai:latest
|
|
7
|
+
#
|
|
8
|
+
# 2. Verify container is running:
|
|
9
|
+
# docker ps | grep crawl4ai
|
|
10
|
+
#
|
|
11
|
+
# 3. Test REST API endpoint (optional):
|
|
12
|
+
# curl -X POST http://localhost:11235/md -H "Content-Type: application/json" -d '{"url": "https://example.com", "f": "fit"}'
|
|
13
|
+
#
|
|
14
|
+
# Usage:
|
|
15
|
+
# massgen --config @examples/configs/tools/custom_tools/crawl4ai_example.yaml "Please search for the MassGen docs, take a screenshot of the website, and explain that screenshot"
|
|
16
|
+
#
|
|
17
|
+
# Available Tools (via Custom Tools):
|
|
18
|
+
# - crawl4ai_md: Generate markdown from web content
|
|
19
|
+
# - crawl4ai_html: Extract preprocessed HTML
|
|
20
|
+
# - crawl4ai_screenshot: Capture webpage screenshots
|
|
21
|
+
# - crawl4ai_pdf: Generate PDF documents
|
|
22
|
+
# - crawl4ai_execute_js: Run JavaScript on web pages
|
|
23
|
+
# - crawl4ai_crawl: Perform multi-URL crawling
|
|
24
|
+
# - crawl4ai_ask: Query the Crawl4AI library context
|
|
25
|
+
#
|
|
26
|
+
# Note: Multiple agents can use these tools concurrently.
|
|
27
|
+
# The server handles up to 5 concurrent crawls by default.
|
|
28
|
+
|
|
29
|
+
agents:
|
|
30
|
+
- id: "web_scraper_agent"
|
|
31
|
+
backend:
|
|
32
|
+
type: "openai" # Works with any backend: openai, gemini, claude_code, etc.
|
|
33
|
+
model: "gpt-5-mini"
|
|
34
|
+
cwd: "workspace1"
|
|
35
|
+
|
|
36
|
+
# Register crawl4ai custom tools
|
|
37
|
+
custom_tools:
|
|
38
|
+
- name: ["crawl4ai_md", "crawl4ai_html", "crawl4ai_screenshot", "crawl4ai_pdf", "crawl4ai_execute_js", "crawl4ai_crawl"]
|
|
39
|
+
category: "web_scraping"
|
|
40
|
+
path: "massgen/tool/_web_tools/crawl4ai_tool.py"
|
|
41
|
+
function: ["crawl4ai_md", "crawl4ai_html", "crawl4ai_screenshot", "crawl4ai_pdf", "crawl4ai_execute_js", "crawl4ai_crawl"]
|
|
42
|
+
- name: ["understand_image"]
|
|
43
|
+
category: "multimodal"
|
|
44
|
+
path: "massgen/tool/_multimodal_tools/understand_image.py"
|
|
45
|
+
function: ["understand_image"]
|
|
46
|
+
|
|
47
|
+
orchestrator:
|
|
48
|
+
snapshot_storage: "snapshots"
|
|
49
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
50
|
+
coordination:
|
|
51
|
+
max_orchestration_restarts: 2 # Default: 0 (allows 3 total attempts: initial + 2 restarts)
|
|
52
|
+
|
|
53
|
+
ui:
|
|
54
|
+
display_type: "rich_terminal"
|
|
55
|
+
logging_enabled: true
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# MassGen Configuration: Text to File Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml "Please generate a comprehensive business presentation about Artificial Intelligence in Healthcare for our upcoming board meeting. The presentation should include the following slides: 1) Title slide with presentation title and date, 2) Executive Summary highlighting key findings, 3) Market Overview showing the current AI healthcare market size and growth trends, 4) Technology Applications including AI in diagnostics, drug discovery, and patient care, 5) Case Studies showcasing 3-4 successful implementations with metrics, 6) Competitive Landscape analyzing major players and their solutions, 7) Implementation Roadmap with timeline and milestones, 8) ROI Analysis with projected costs and benefits, 9) Risk Assessment and mitigation strategies, 10) Recommendations and next steps. Please make it professional with approximately 15-20 slides, use clear bullet points, include suggested visual elements for each slide, and save it as a PPTX file with a modern business layout."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_file_generation_tool1"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_file_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_file_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
|
|
15
|
+
function: ["text_to_file_generation"]
|
|
16
|
+
- name: ["understand_file"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_file.py"
|
|
19
|
+
function: ["understand_file"]
|
|
20
|
+
system_message: |
|
|
21
|
+
You are an AI assistant with access to text-to-file generation capabilities.
|
|
22
|
+
|
|
23
|
+
When generating PPTX presentations, format your content with:
|
|
24
|
+
- Use "# Title" or "## Title" for slide titles
|
|
25
|
+
- Use "---" to separate slides
|
|
26
|
+
- Use "- Item" for bullet points
|
|
27
|
+
- Use " - Subitem" for sub-bullets (two spaces indent)
|
|
28
|
+
- Structure content in a slide-friendly format with clear, concise points
|
|
29
|
+
|
|
30
|
+
- id: "text_to_file_generation_tool2"
|
|
31
|
+
backend:
|
|
32
|
+
type: "openai"
|
|
33
|
+
model: "gpt-4o"
|
|
34
|
+
cwd: "workspace2"
|
|
35
|
+
enable_file_generation: true
|
|
36
|
+
custom_tools:
|
|
37
|
+
- name: ["text_to_file_generation"]
|
|
38
|
+
category: "multimodal"
|
|
39
|
+
path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
|
|
40
|
+
function: ["text_to_file_generation"]
|
|
41
|
+
- name: ["understand_file"]
|
|
42
|
+
category: "multimodal"
|
|
43
|
+
path: "massgen/tool/_multimodal_tools/understand_file.py"
|
|
44
|
+
function: ["understand_file"]
|
|
45
|
+
system_message: |
|
|
46
|
+
You are an AI assistant with access to text-to-file generation capabilities.
|
|
47
|
+
|
|
48
|
+
When generating PPTX presentations, format your content with:
|
|
49
|
+
- Use "# Title" or "## Title" for slide titles
|
|
50
|
+
- Use "---" to separate slides
|
|
51
|
+
- Use "- Item" for bullet points
|
|
52
|
+
- Use " - Subitem" for sub-bullets (two spaces indent)
|
|
53
|
+
- Structure content in a slide-friendly format with clear, concise points
|
|
54
|
+
|
|
55
|
+
orchestrator:
|
|
56
|
+
snapshot_storage: "snapshots"
|
|
57
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
58
|
+
|
|
59
|
+
ui:
|
|
60
|
+
display_type: "rich_terminal"
|
|
61
|
+
logging_enabled: true
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# MassGen Configuration: Text to File Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml "Please generate a comprehensive technical report about the latest developments in Large Language Models (LLMs) and Generative AI. The report should include the following sections: 1) Executive Summary, 2) Introduction to LLMs and their architecture, 3) Recent breakthroughs in 2024-2025, 4) Applications in industry including healthcare, finance, and education, 5) Ethical considerations and limitations, 6) Future directions and research opportunities. Please make the report approximately 10-15 pages long with proper citations and references, and save it as a PDF file with a professional layout."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_file_generation_tool"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_file_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_file_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
|
|
15
|
+
function: ["text_to_file_generation"]
|
|
16
|
+
- name: ["understand_file"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_file.py"
|
|
19
|
+
function: ["understand_file"]
|
|
20
|
+
system_message: |
|
|
21
|
+
You are an AI assistant with access to text-to-file generation capabilities.
|
|
22
|
+
|
|
23
|
+
orchestrator:
|
|
24
|
+
snapshot_storage: "snapshots"
|
|
25
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
26
|
+
|
|
27
|
+
ui:
|
|
28
|
+
display_type: "simple"
|
|
29
|
+
logging_enabled: true
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# MassGen Configuration: Text to Image Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml "Please generate an image of a cat in space."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_image_generation_tool1"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_image_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_image_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
|
|
15
|
+
function: ["text_to_image_generation"]
|
|
16
|
+
- name: ["understand_image"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_image.py"
|
|
19
|
+
function: ["understand_image"]
|
|
20
|
+
- name: ["image_to_image_generation"]
|
|
21
|
+
category: "multimodal"
|
|
22
|
+
path: "massgen/tool/_multimodal_tools/image_to_image_generation.py"
|
|
23
|
+
function: ["image_to_image_generation"]
|
|
24
|
+
system_message: |
|
|
25
|
+
You are an AI assistant with access to text-to-image generation capabilities.
|
|
26
|
+
|
|
27
|
+
- id: "text_to_image_generation_tool2"
|
|
28
|
+
backend:
|
|
29
|
+
type: "openai"
|
|
30
|
+
model: "gpt-4o"
|
|
31
|
+
cwd: "workspace2"
|
|
32
|
+
enable_image_generation: true
|
|
33
|
+
custom_tools:
|
|
34
|
+
- name: ["text_to_image_generation"]
|
|
35
|
+
category: "multimodal"
|
|
36
|
+
path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
|
|
37
|
+
function: ["text_to_image_generation"]
|
|
38
|
+
- name: ["understand_image"]
|
|
39
|
+
category: "multimodal"
|
|
40
|
+
path: "massgen/tool/_multimodal_tools/understand_image.py"
|
|
41
|
+
function: ["understand_image"]
|
|
42
|
+
system_message: |
|
|
43
|
+
You are an AI assistant with access to text-to-image generation capabilities.
|
|
44
|
+
|
|
45
|
+
orchestrator:
|
|
46
|
+
snapshot_storage: "snapshots"
|
|
47
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
48
|
+
|
|
49
|
+
ui:
|
|
50
|
+
display_type: "rich_terminal"
|
|
51
|
+
logging_enabled: true
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# MassGen Configuration: Text to Image Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml "Please generate an image of a cat in space."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_image_generation_tool"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_image_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_image_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
|
|
15
|
+
function: ["text_to_image_generation"]
|
|
16
|
+
- name: ["understand_image"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_image.py"
|
|
19
|
+
function: ["understand_image"]
|
|
20
|
+
- name: ["image_to_image_generation"]
|
|
21
|
+
category: "multimodal"
|
|
22
|
+
path: "massgen/tool/_multimodal_tools/image_to_image_generation.py"
|
|
23
|
+
function: ["image_to_image_generation"]
|
|
24
|
+
system_message: |
|
|
25
|
+
You are an AI assistant with access to text-to-image generation capabilities.
|
|
26
|
+
|
|
27
|
+
orchestrator:
|
|
28
|
+
snapshot_storage: "snapshots"
|
|
29
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
30
|
+
|
|
31
|
+
ui:
|
|
32
|
+
display_type: "simple"
|
|
33
|
+
logging_enabled: true
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# MassGen Configuration: Text to Speech Continue Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml "I want to you tell me a very short introduction about Sherlock Homes in one sentence, and I want you to use emotion voice to read it out loud."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_speech_continue_generation_tool1"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_audio_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_speech_transcription_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py"
|
|
15
|
+
function: ["text_to_speech_transcription_generation"]
|
|
16
|
+
- name: ["understand_audio"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_audio.py"
|
|
19
|
+
function: ["understand_audio"]
|
|
20
|
+
- name: ["text_to_speech_continue_generation"]
|
|
21
|
+
category: "multimodal"
|
|
22
|
+
path: "massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py"
|
|
23
|
+
function: ["text_to_speech_continue_generation"]
|
|
24
|
+
system_message: |
|
|
25
|
+
You are an AI assistant with access to text-to-speech generation capabilities.
|
|
26
|
+
|
|
27
|
+
- id: "text_to_speech_continue_generation_tool2"
|
|
28
|
+
backend:
|
|
29
|
+
type: "openai"
|
|
30
|
+
model: "gpt-4o"
|
|
31
|
+
cwd: "workspace2"
|
|
32
|
+
enable_audio_generation: true
|
|
33
|
+
custom_tools:
|
|
34
|
+
- name: ["text_to_speech_transcription_generation"]
|
|
35
|
+
category: "multimodal"
|
|
36
|
+
path: "massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py"
|
|
37
|
+
function: ["text_to_speech_transcription_generation"]
|
|
38
|
+
- name: ["understand_audio"]
|
|
39
|
+
category: "multimodal"
|
|
40
|
+
path: "massgen/tool/_multimodal_tools/understand_audio.py"
|
|
41
|
+
function: ["understand_audio"]
|
|
42
|
+
- name: ["text_to_speech_continue_generation"]
|
|
43
|
+
category: "multimodal"
|
|
44
|
+
path: "massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py"
|
|
45
|
+
function: ["text_to_speech_continue_generation"]
|
|
46
|
+
system_message: |
|
|
47
|
+
You are an AI assistant with access to text-to-speech generation capabilities.
|
|
48
|
+
|
|
49
|
+
orchestrator:
|
|
50
|
+
snapshot_storage: "snapshots"
|
|
51
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
52
|
+
|
|
53
|
+
ui:
|
|
54
|
+
display_type: "rich_terminal"
|
|
55
|
+
logging_enabled: true
|