massgen 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (58) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
  3. massgen/api_params_handler/_claude_api_params_handler.py +4 -0
  4. massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
  5. massgen/api_params_handler/_response_api_params_handler.py +4 -0
  6. massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
  7. massgen/backend/docs/permissions_and_context_files.md +2 -2
  8. massgen/backend/response.py +2 -0
  9. massgen/configs/README.md +49 -40
  10. massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
  11. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
  12. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
  13. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
  14. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
  15. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
  16. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
  17. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
  18. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
  19. massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +1 -1
  20. massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +1 -1
  21. massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +1 -1
  22. massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +1 -1
  23. massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +1 -1
  24. massgen/filesystem_manager/_filesystem_manager.py +1 -0
  25. massgen/filesystem_manager/_path_permission_manager.py +148 -0
  26. massgen/message_templates.py +160 -12
  27. massgen/orchestrator.py +16 -0
  28. massgen/tests/test_binary_file_blocking.py +274 -0
  29. massgen/tests/test_case_studies.md +12 -12
  30. massgen/tests/test_multimodal_size_limits.py +407 -0
  31. massgen/tool/_manager.py +7 -2
  32. massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
  33. massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
  34. massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
  35. massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
  36. massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
  37. massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
  38. massgen/tool/_multimodal_tools/understand_audio.py +19 -1
  39. massgen/tool/_multimodal_tools/understand_file.py +6 -1
  40. massgen/tool/_multimodal_tools/understand_image.py +112 -8
  41. massgen/tool/_multimodal_tools/understand_video.py +32 -5
  42. massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
  43. massgen/tool/docs/multimodal_tools.md +589 -0
  44. {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/METADATA +96 -69
  45. {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/RECORD +49 -40
  46. massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +0 -67
  47. massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +0 -68
  48. massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +0 -98
  49. massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +0 -54
  50. massgen/configs/tools/memory/README.md +0 -199
  51. massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +0 -131
  52. massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +0 -133
  53. massgen/configs/tools/memory/test_context_window_management.py +0 -286
  54. massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +0 -97
  55. {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/WHEEL +0 -0
  56. {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/entry_points.txt +0 -0
  57. {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/licenses/LICENSE +0 -0
  58. {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/top_level.txt +0 -0
massgen/__init__.py CHANGED
@@ -68,7 +68,7 @@ from .chat_agent import (
68
68
  from .message_templates import MessageTemplates, get_templates
69
69
  from .orchestrator import Orchestrator, create_orchestrator
70
70
 
71
- __version__ = "0.1.3"
71
+ __version__ = "0.1.4"
72
72
  __author__ = "MassGen Contributors"
73
73
 
74
74
 
@@ -24,6 +24,10 @@ class ChatCompletionsAPIParamsHandler(APIParamsHandlerBase):
24
24
  "allowed_tools",
25
25
  "exclude_tools",
26
26
  "custom_tools", # Custom tools configuration (processed separately)
27
+ "enable_file_generation", # Internal flag for file generation (used in system messages only)
28
+ "enable_image_generation", # Internal flag for image generation (used in system messages only)
29
+ "enable_audio_generation", # Internal flag for audio generation (used in system messages only)
30
+ "enable_video_generation", # Internal flag for video generation (used in system messages only)
27
31
  },
28
32
  )
29
33
 
@@ -24,6 +24,10 @@ class ClaudeAPIParamsHandler(APIParamsHandlerBase):
24
24
  "exclude_tools",
25
25
  "custom_tools", # Custom tools configuration (processed separately)
26
26
  "_has_files_api_files",
27
+ "enable_file_generation", # Internal flag for file generation (used in system messages only)
28
+ "enable_image_generation", # Internal flag for image generation (used in system messages only)
29
+ "enable_audio_generation", # Internal flag for audio generation (used in system messages only)
30
+ "enable_video_generation", # Internal flag for video generation (used in system messages only)
27
31
  },
28
32
  )
29
33
 
@@ -19,6 +19,10 @@ class GeminiAPIParamsHandler(APIParamsHandlerBase):
19
19
  "allowed_tools",
20
20
  "exclude_tools",
21
21
  "custom_tools",
22
+ "enable_file_generation", # Internal flag for file generation (used in system messages only)
23
+ "enable_image_generation", # Internal flag for image generation (used in system messages only)
24
+ "enable_audio_generation", # Internal flag for audio generation (used in system messages only)
25
+ "enable_video_generation", # Internal flag for video generation (used in system messages only)
22
26
  }
23
27
  return set(base) | extra
24
28
 
@@ -24,6 +24,10 @@ class ResponseAPIParamsHandler(APIParamsHandlerBase):
24
24
  "exclude_tools",
25
25
  "custom_tools", # Custom tools configuration (processed separately)
26
26
  "_has_file_search_files", # Internal flag for file search tracking
27
+ "enable_file_generation", # Internal flag for file generation (used in system messages only)
28
+ "enable_image_generation", # Internal flag for image generation (used in system messages only)
29
+ "enable_audio_generation", # Internal flag for audio generation (used in system messages only)
30
+ "enable_video_generation", # Internal flag for video generation (used in system messages only)
27
31
  },
28
32
  )
29
33
 
@@ -284,9 +284,19 @@ class CustomToolAndMCPBackend(LLMBackend):
284
284
 
285
285
  # Register each function with its corresponding values
286
286
  for i, func in enumerate(functions):
287
+ # Inject agent_cwd into preset_args if filesystem_manager is available
288
+ final_preset_args = preset_args_list[i].copy() if preset_args_list[i] else {}
289
+ if self.filesystem_manager and self.filesystem_manager.cwd:
290
+ final_preset_args["agent_cwd"] = self.filesystem_manager.cwd
291
+ logger.info(f"Injecting agent_cwd for {func}: {self.filesystem_manager.cwd}")
292
+ elif self.filesystem_manager:
293
+ logger.warning(f"filesystem_manager exists but cwd is None for {func}")
294
+ else:
295
+ logger.warning(f"No filesystem_manager available for {func}")
296
+
287
297
  # Load the function first if custom name is needed
288
298
  if names[i] and names[i] != func:
289
- # Need to load function and apply custom name
299
+ # Load function to apply custom name
290
300
  if path:
291
301
  loaded_func = self.custom_tool_manager._load_function_from_path(path, func)
292
302
  else:
@@ -296,7 +306,6 @@ class CustomToolAndMCPBackend(LLMBackend):
296
306
  logger.error(f"Could not load function '{func}' from path: {path}")
297
307
  continue
298
308
 
299
- # Apply custom name by modifying __name__ attribute
300
309
  loaded_func.__name__ = names[i]
301
310
 
302
311
  # Register with loaded function (no path needed)
@@ -304,7 +313,7 @@ class CustomToolAndMCPBackend(LLMBackend):
304
313
  path=None,
305
314
  func=loaded_func,
306
315
  category=category,
307
- preset_args=preset_args_list[i],
316
+ preset_args=final_preset_args,
308
317
  description=descriptions[i],
309
318
  )
310
319
  else:
@@ -313,7 +322,7 @@ class CustomToolAndMCPBackend(LLMBackend):
313
322
  path=path,
314
323
  func=func,
315
324
  category=category,
316
- preset_args=preset_args_list[i],
325
+ preset_args=final_preset_args,
317
326
  description=descriptions[i],
318
327
  )
319
328
 
@@ -404,9 +413,19 @@ class CustomToolAndMCPBackend(LLMBackend):
404
413
  """
405
414
  import json
406
415
 
416
+ # Parse arguments
417
+ arguments = json.loads(call["arguments"]) if isinstance(call["arguments"], str) else call["arguments"]
418
+
419
+ # Ensure agent_cwd is always injected if filesystem_manager is available
420
+ # This provides a fallback in case preset_args didn't work during registration
421
+ if self.filesystem_manager and self.filesystem_manager.cwd:
422
+ if "agent_cwd" not in arguments or arguments.get("agent_cwd") is None:
423
+ arguments["agent_cwd"] = self.filesystem_manager.cwd
424
+ logger.info(f"Dynamically injected agent_cwd at execution time: {self.filesystem_manager.cwd}")
425
+
407
426
  tool_request = {
408
427
  "name": call["name"],
409
- "input": json.loads(call["arguments"]) if isinstance(call["arguments"], str) else call["arguments"],
428
+ "input": arguments,
410
429
  }
411
430
 
412
431
  result_text = ""
@@ -1120,6 +1139,7 @@ class CustomToolAndMCPBackend(LLMBackend):
1120
1139
  **kwargs,
1121
1140
  ) -> AsyncGenerator[StreamChunk, None]:
1122
1141
  """Simple passthrough streaming without MCP processing."""
1142
+
1123
1143
  agent_id = kwargs.get("agent_id", None)
1124
1144
  all_params = {**self.config, **kwargs}
1125
1145
  processed_messages = await self._process_upload_files(messages, all_params)
@@ -1067,8 +1067,8 @@ Files delivered:
1067
1067
  - **Multi-Turn Design**: `docs/dev_notes/multi_turn_filesystem_design.md` - Detailed architecture for session persistence and turn-based workflows
1068
1068
  - **MCP Integration**: `docs/dev_notes/gemini_filesystem_mcp_design.md` - How filesystem access works through Model Context Protocol
1069
1069
  - **Context Sharing**: `docs/dev_notes/v0.0.14-context.md` - Original context sharing design
1070
- - **User Context Paths**: `docs/case_studies/user-context-path-support-with-copy-mcp.md` - Case study on adding user-specified paths
1071
- - **Claude Code Workspace**: `docs/case_studies/claude-code-workspace-management.md` - Native filesystem integration patterns
1070
+ - **User Context Paths**: `docs/source/examples/case_studies/user-context-path-support-with-copy-mcp.md` - Case study on adding user-specified paths
1071
+ - **Claude Code Workspace**: `docs/source/examples/case_studies/claude-code-workspace-management.md` - Native filesystem integration patterns
1072
1072
 
1073
1073
  ## Conclusion
1074
1074
 
@@ -57,6 +57,7 @@ class ResponseBackend(CustomToolAndMCPBackend):
57
57
 
58
58
  Wraps parent implementation to ensure File Search cleanup happens after streaming completes.
59
59
  """
60
+
60
61
  try:
61
62
  async for chunk in super().stream_with_tools(messages, tools, **kwargs):
62
63
  yield chunk
@@ -145,6 +146,7 @@ class ResponseBackend(CustomToolAndMCPBackend):
145
146
  **kwargs,
146
147
  ) -> AsyncGenerator[StreamChunk, None]:
147
148
  """Recursively stream MCP responses, executing function calls as needed."""
149
+
148
150
  agent_id = kwargs.get("agent_id")
149
151
 
150
152
  # Build API params for this iteration
massgen/configs/README.md CHANGED
@@ -227,53 +227,62 @@ Most configurations use environment variables for API keys:so
227
227
 
228
228
  ## Release History & Examples
229
229
 
230
- ### v0.1.3 - Latest
231
- **New Features:** Post-Evaluation Workflow, Custom Multimodal Understanding Tools, Docker Sudo Mode
230
+ ### v0.1.4 - Latest
231
+ **New Features:** Multimodal Generation Tools, Binary File Protection, Crawl4AI Integration
232
232
 
233
233
  **Configuration Files:**
234
- - `configs/tools/custom_tools/multimodal_tools/understand_image.yaml` - Image analysis configuration
235
- - `configs/tools/custom_tools/multimodal_tools/understand_audio.yaml` - Audio transcription configuration
236
- - `configs/tools/custom_tools/multimodal_tools/understand_video.yaml` - Video analysis configuration
237
- - `configs/tools/custom_tools/multimodal_tools/understand_file.yaml` - Document processing configuration
234
+ - `text_to_image_generation_single.yaml` / `text_to_image_generation_multi.yaml` - Image generation
235
+ - `text_to_video_generation_single.yaml` / `text_to_video_generation_multi.yaml` - Video generation
236
+ - `text_to_speech_generation_single.yaml` / `text_to_speech_generation_multi.yaml` - Audio generation
237
+ - `text_to_file_generation_single.yaml` / `text_to_file_generation_multi.yaml` - Document generation
238
+ - `crawl4ai_example.yaml` - Web scraping configuration
238
239
 
239
240
  **Documentation:**
240
- - `massgen/tool/docs/multimodal_tools.md` - Complete 779-line multimodal tools guide
241
- - `docs/source/user_guide/multimodal.rst` - Updated multimodal documentation with custom tools
242
- - `docs/source/user_guide/code_execution.rst` - Enhanced with 98 lines documenting sudo mode
243
- - `massgen/docker/README.md` - Updated Docker documentation with sudo mode instructions
244
-
245
- **Case Study:**
246
- - [Multimodal Video Understanding](../../docs/case_studies/multimodal-case-study-video-analysis.md)
247
-
248
- **Example Resources:**
249
- - `configs/resources/v0.1.3-example/multimodality.jpg` - Image example
250
- - `configs/resources/v0.1.3-example/Sherlock_Holmes.mp3` - Audio example
251
- - `configs/resources/v0.1.3-example/oppenheimer_trailer_1920.mp4` - Video example
252
- - `configs/resources/v0.1.3-example/TUMIX.pdf` - PDF document example
241
+ - `README_PYPI.md` - Standalone PyPI package documentation
242
+ - `docs/dev_notes/release_checklist.md` - Release workflow guide
243
+ - `docs/source/user_guide/protected_paths.rst` - Binary file protection documentation
244
+ - `.github/workflows/docs-automation.yml` - Documentation CI/CD automation
253
245
 
254
246
  **Key Features:**
255
- - **Post-Evaluation Tools**: Submit and restart capabilities for winning agents with confidence assessments
256
- - **Multimodal Understanding**: Analyze images, audio, video, and documents using GPT-4.1
257
- - **Docker Sudo Mode**: Execute privileged commands in containerized environments
258
- - **Config Builder**: Improved workflow with auto-detection and better provider handling
247
+ - **Generation Tools**: Create images, videos, audio, and documents using OpenAI APIs
248
+ - **Binary File Protection**: Automatic blocking prevents text tools from reading 40+ binary file types
249
+ - **Web Scraping**: Crawl4AI integration for intelligent content extraction
250
+ - **Enhanced Security**: Smart tool suggestions guide users to appropriate specialized tools
259
251
 
260
252
  **Try it:**
261
253
  ```bash
262
254
  # Install or upgrade
263
255
  pip install --upgrade massgen
264
256
 
257
+ # Generate an image from text
258
+ massgen --config @examples/tools/custom_tools/multimodal_tools/text_to_image_generation_single \
259
+ "Please generate an image of a cat in space."
260
+
261
+ # Generate a video from text
262
+ massgen --config @examples/tools/custom_tools/multimodal_tools/text_to_video_generation_single \
263
+ "Generate a 4 seconds video with neon-lit alley at night, light rain, slow push-in, cinematic."
264
+
265
+ # Generate documents (PDF, DOCX, etc.)
266
+ massgen --config @examples/tools/custom_tools/multimodal_tools/text_to_file_generation_single \
267
+ "Please generate a comprehensive technical report about the latest developments in Large Language Models (LLMs)."
268
+ ```
269
+
270
+ ### v0.1.3
271
+ **New Features:** Post-Evaluation Workflow, Custom Multimodal Understanding Tools, Docker Sudo Mode
272
+
273
+ **Configuration Files:**
274
+ - `understand_image.yaml`, `understand_audio.yaml`, `understand_video.yaml`, `understand_file.yaml`
275
+
276
+ **Key Features:**
277
+ - **Post-Evaluation Tools**: Submit and restart capabilities for winning agents
278
+ - **Multimodal Understanding**: Analyze images, audio, video, and documents
279
+ - **Docker Sudo Mode**: Execute privileged commands in containers
280
+
281
+ **Try it:**
282
+ ```bash
265
283
  # Try multimodal image understanding
266
- # (Requires OPENAI_API_KEY in .env)
267
284
  massgen --config @examples/tools/custom_tools/multimodal_tools/understand_image \
268
285
  "Please summarize the content in this image."
269
-
270
- # Try multimodal audio understanding
271
- massgen --config @examples/tools/custom_tools/multimodal_tools/understand_audio \
272
- "Please summarize the content in this audio."
273
-
274
- # Try multimodal video understanding
275
- massgen --config @examples/tools/custom_tools/multimodal_tools/understand_video \
276
- "What's happening in this video?"
277
286
  ```
278
287
 
279
288
  ### v0.1.2
@@ -284,7 +293,7 @@ massgen --config @examples/tools/custom_tools/multimodal_tools/understand_video
284
293
  - `configs/basic/multi/three_agents_default.yaml` - Updated with Grok-4-fast model
285
294
 
286
295
  **Documentation:**
287
- - `docs/case_studies/INTELLIGENT_PLANNING_MODE.md` - Complete intelligent planning mode guide
296
+ - `docs/dev_notes/intelligent_planning_mode.md` - Complete intelligent planning mode guide
288
297
 
289
298
  **Key Features:**
290
299
  - **Intelligent Planning Mode**: Automatic analysis of question irreversibility for dynamic MCP tool blocking
@@ -392,7 +401,7 @@ massgen --config @examples/tools/code-execution/docker_with_resource_limits \
392
401
  - `massgen/configs/basic/single/single_gpt4o_video_generation.yaml` - Video generation with OpenAI Sora-2
393
402
 
394
403
  **Case Study:**
395
- - [Universal Code Execution via MCP](../../docs/case_studies/universal-code-execution-mcp.md)
404
+ - [Universal Code Execution via MCP](../../docs/source/examples/case_studies/universal-code-execution-mcp.md)
396
405
 
397
406
  **Key Features:**
398
407
  - Universal `execute_command` tool works across Claude, Gemini, OpenAI (Response API), and Chat Completions providers (Grok, ZAI, etc.)
@@ -465,7 +474,7 @@ massgen --config @examples/tools/filesystem/cc_gpt5_gemini_filesystem \
465
474
  - New `FileOperationTracker` class for read-before-delete enforcement
466
475
  - Enhanced PathPermissionManager with operation tracking methods
467
476
 
468
- **Case Study:** [MCP Planning Mode](../../docs/case_studies/mcp-planning-mode.md)
477
+ **Case Study:** [MCP Planning Mode](../../docs/source/examples/case_studies/mcp-planning-mode.md)
469
478
 
470
479
  **Try it:**
471
480
  ```bash
@@ -492,7 +501,7 @@ massgen --config @examples/tools/planning/five_agents_twitter_mcp_planning_mode
492
501
  - New `ExternalAgentBackend` class bridging MassGen with external frameworks
493
502
  - Multiple code executor types: LocalCommandLineCodeExecutor, DockerCommandLineCodeExecutor, JupyterCodeExecutor, YepCodeCodeExecutor
494
503
 
495
- **Case Study:** [AG2 Framework Integration](../../docs/case_studies/ag2-framework-integration.md)
504
+ **Case Study:** [AG2 Framework Integration](../../docs/source/examples/case_studies/ag2-framework-integration.md)
496
505
 
497
506
  **Try it:**
498
507
  ```bash
@@ -561,7 +570,7 @@ massgen --config @examples/tools/filesystem/gemini_gpt5nano_file_context_path \
561
570
  - Automatic `.massgen` directory management for persistent conversation context
562
571
  - Enhanced path permissions with `will_be_writable` flag and smart exclusion patterns
563
572
 
564
- **Case Study:** [Multi-Turn Filesystem Support](../../docs/case_studies/multi-turn-filesystem-support.md)
573
+ **Case Study:** [Multi-Turn Filesystem Support](../../docs/source/examples/case_studies/multi-turn-filesystem-support.md)
565
574
  ```bash
566
575
  # Turn 1 - Initial creation
567
576
  Turn 1: Make a website about Bob Dylan
@@ -599,7 +608,7 @@ massgen --config @examples/basic/multi/two_qwen_vllm \
599
608
  - All configs now organized by provider & use case (basic/, providers/, tools/, teams/)
600
609
  - Use same configs as v0.0.21 for compatibility, but now with improved performance
601
610
 
602
- **Case Study:** [Advanced Filesystem with User Context Path Support](../../docs/case_studies/v0.0.21-v0.0.22-filesystem-permissions.md)
611
+ **Case Study:** [Advanced Filesystem with User Context Path Support](../../docs/source/examples/case_studies/v0.0.21-v0.0.22-filesystem-permissions.md)
603
612
  ```bash
604
613
  # Multi-agent collaboration with granular filesystem permissions
605
614
  massgen --config @examples/tools/filesystem/gpt5mini_cc_fs_context_path "Enhance the website in massgen/configs/resources with: 1) A dark/light theme toggle with smooth transitions, 2) An interactive feature that helps users engage with the blog content (your choice - could be search, filtering by topic, reading time estimates, social sharing, reactions, etc.), and 3) Visual polish with CSS animations or transitions that make the site feel more modern and responsive. Use vanilla JavaScript and be creative with the implementation details."
@@ -645,7 +654,7 @@ massgen --config @examples/tools/mcp/gpt5_nano_mcp_example \
645
654
 
646
655
  ### v0.0.16
647
656
  **New Features:** Unified Filesystem Support with MCP Integration
648
- **Case Study:** [Cross-Backend Collaboration with Gemini MCP Filesystem](../../docs/case_studies/unified-filesystem-mcp-integration.md)
657
+ **Case Study:** [Cross-Backend Collaboration with Gemini MCP Filesystem](../../docs/source/examples/case_studies/unified-filesystem-mcp-integration.md)
649
658
  ```bash
650
659
  # Gemini and Claude Code agents with unified filesystem via MCP
651
660
  massgen --config @examples/tools/mcp/gemini_mcp_filesystem_test_with_claude_code "Create a presentation that teaches a reinforcement learning algorithm and output it in LaTeX Beamer format. No figures should be added."
@@ -658,7 +667,7 @@ massgen --config @examples/tools/mcp/gemini_mcp_filesystem_test_with_claude_code
658
667
 
659
668
  ### v0.0.12 - v0.0.14
660
669
  **New Features:** Enhanced Logging and Workspace Management
661
- **Case Study:** [Claude Code Workspace Management with Comprehensive Logging](../../docs/case_studies/claude-code-workspace-management.md)
670
+ **Case Study:** [Claude Code Workspace Management with Comprehensive Logging](../../docs/source/examples/case_studies/claude-code-workspace-management.md)
662
671
  ```bash
663
672
  # Multi-agent Claude Code collaboration with enhanced workspace isolation
664
673
  massgen --config @examples/tools/filesystem/claude_code_context_sharing "Create a website about a diverse set of fun facts about LLMs, placing the output in one index.html file"
@@ -0,0 +1,55 @@
1
+ # MassGen Configuration: Crawl4AI Web Scraping via Custom Tools
2
+ #
3
+ # Prerequisites:
4
+ # 1. Start crawl4ai Docker container (one-time setup):
5
+ # docker pull unclecode/crawl4ai:latest
6
+ # docker run -d -p 11235:11235 --name crawl4ai --shm-size=1g unclecode/crawl4ai:latest
7
+ #
8
+ # 2. Verify container is running:
9
+ # docker ps | grep crawl4ai
10
+ #
11
+ # 3. Test REST API endpoint (optional):
12
+ # curl -X POST http://localhost:11235/md -H "Content-Type: application/json" -d '{"url": "https://example.com", "f": "fit"}'
13
+ #
14
+ # Usage:
15
+ # massgen --config @examples/configs/tools/custom_tools/crawl4ai_example.yaml "Please search for the MassGen docs, take a screenshot of the website, and explain that screenshot"
16
+ #
17
+ # Available Tools (via Custom Tools):
18
+ # - crawl4ai_md: Generate markdown from web content
19
+ # - crawl4ai_html: Extract preprocessed HTML
20
+ # - crawl4ai_screenshot: Capture webpage screenshots
21
+ # - crawl4ai_pdf: Generate PDF documents
22
+ # - crawl4ai_execute_js: Run JavaScript on web pages
23
+ # - crawl4ai_crawl: Perform multi-URL crawling
24
+ # - crawl4ai_ask: Query the Crawl4AI library context
25
+ #
26
+ # Note: Multiple agents can use these tools concurrently.
27
+ # The server handles up to 5 concurrent crawls by default.
28
+
29
+ agents:
30
+ - id: "web_scraper_agent"
31
+ backend:
32
+ type: "openai" # Works with any backend: openai, gemini, claude_code, etc.
33
+ model: "gpt-5-mini"
34
+ cwd: "workspace1"
35
+
36
+ # Register crawl4ai custom tools
37
+ custom_tools:
38
+ - name: ["crawl4ai_md", "crawl4ai_html", "crawl4ai_screenshot", "crawl4ai_pdf", "crawl4ai_execute_js", "crawl4ai_crawl"]
39
+ category: "web_scraping"
40
+ path: "massgen/tool/_web_tools/crawl4ai_tool.py"
41
+ function: ["crawl4ai_md", "crawl4ai_html", "crawl4ai_screenshot", "crawl4ai_pdf", "crawl4ai_execute_js", "crawl4ai_crawl"]
42
+ - name: ["understand_image"]
43
+ category: "multimodal"
44
+ path: "massgen/tool/_multimodal_tools/understand_image.py"
45
+ function: ["understand_image"]
46
+
47
+ orchestrator:
48
+ snapshot_storage: "snapshots"
49
+ agent_temporary_workspace: "temp_workspaces"
50
+ coordination:
51
+ max_orchestration_restarts: 2 # Default: 0 (allows 3 total attempts: initial + 2 restarts)
52
+
53
+ ui:
54
+ display_type: "rich_terminal"
55
+ logging_enabled: true
@@ -0,0 +1,61 @@
1
+ # MassGen Configuration: Text to File Generation Tool
2
+ # Usage:
3
+ # uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml "Please generate a comprehensive business presentation about Artificial Intelligence in Healthcare for our upcoming board meeting. The presentation should include the following slides: 1) Title slide with presentation title and date, 2) Executive Summary highlighting key findings, 3) Market Overview showing the current AI healthcare market size and growth trends, 4) Technology Applications including AI in diagnostics, drug discovery, and patient care, 5) Case Studies showcasing 3-4 successful implementations with metrics, 6) Competitive Landscape analyzing major players and their solutions, 7) Implementation Roadmap with timeline and milestones, 8) ROI Analysis with projected costs and benefits, 9) Risk Assessment and mitigation strategies, 10) Recommendations and next steps. Please make it professional with approximately 15-20 slides, use clear bullet points, include suggested visual elements for each slide, and save it as a PPTX file with a modern business layout."
4
+ agents:
5
+ - id: "text_to_file_generation_tool1"
6
+ backend:
7
+ type: "openai"
8
+ model: "gpt-4o"
9
+ cwd: "workspace1"
10
+ enable_file_generation: true
11
+ custom_tools:
12
+ - name: ["text_to_file_generation"]
13
+ category: "multimodal"
14
+ path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
15
+ function: ["text_to_file_generation"]
16
+ - name: ["understand_file"]
17
+ category: "multimodal"
18
+ path: "massgen/tool/_multimodal_tools/understand_file.py"
19
+ function: ["understand_file"]
20
+ system_message: |
21
+ You are an AI assistant with access to text-to-file generation capabilities.
22
+
23
+ When generating PPTX presentations, format your content with:
24
+ - Use "# Title" or "## Title" for slide titles
25
+ - Use "---" to separate slides
26
+ - Use "- Item" for bullet points
27
+ - Use " - Subitem" for sub-bullets (two spaces indent)
28
+ - Structure content in a slide-friendly format with clear, concise points
29
+
30
+ - id: "text_to_file_generation_tool2"
31
+ backend:
32
+ type: "openai"
33
+ model: "gpt-4o"
34
+ cwd: "workspace2"
35
+ enable_file_generation: true
36
+ custom_tools:
37
+ - name: ["text_to_file_generation"]
38
+ category: "multimodal"
39
+ path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
40
+ function: ["text_to_file_generation"]
41
+ - name: ["understand_file"]
42
+ category: "multimodal"
43
+ path: "massgen/tool/_multimodal_tools/understand_file.py"
44
+ function: ["understand_file"]
45
+ system_message: |
46
+ You are an AI assistant with access to text-to-file generation capabilities.
47
+
48
+ When generating PPTX presentations, format your content with:
49
+ - Use "# Title" or "## Title" for slide titles
50
+ - Use "---" to separate slides
51
+ - Use "- Item" for bullet points
52
+ - Use " - Subitem" for sub-bullets (two spaces indent)
53
+ - Structure content in a slide-friendly format with clear, concise points
54
+
55
+ orchestrator:
56
+ snapshot_storage: "snapshots"
57
+ agent_temporary_workspace: "temp_workspaces"
58
+
59
+ ui:
60
+ display_type: "rich_terminal"
61
+ logging_enabled: true
@@ -0,0 +1,29 @@
1
+ # MassGen Configuration: Text to File Generation Tool
2
+ # Usage:
3
+ # uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml "Please generate a comprehensive technical report about the latest developments in Large Language Models (LLMs) and Generative AI. The report should include the following sections: 1) Executive Summary, 2) Introduction to LLMs and their architecture, 3) Recent breakthroughs in 2024-2025, 4) Applications in industry including healthcare, finance, and education, 5) Ethical considerations and limitations, 6) Future directions and research opportunities. Please make the report approximately 10-15 pages long with proper citations and references, and save it as a PDF file with a professional layout."
4
+ agents:
5
+ - id: "text_to_file_generation_tool"
6
+ backend:
7
+ type: "openai"
8
+ model: "gpt-4o"
9
+ cwd: "workspace1"
10
+ enable_file_generation: true
11
+ custom_tools:
12
+ - name: ["text_to_file_generation"]
13
+ category: "multimodal"
14
+ path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
15
+ function: ["text_to_file_generation"]
16
+ - name: ["understand_file"]
17
+ category: "multimodal"
18
+ path: "massgen/tool/_multimodal_tools/understand_file.py"
19
+ function: ["understand_file"]
20
+ system_message: |
21
+ You are an AI assistant with access to text-to-file generation capabilities.
22
+
23
+ orchestrator:
24
+ snapshot_storage: "snapshots"
25
+ agent_temporary_workspace: "temp_workspaces"
26
+
27
+ ui:
28
+ display_type: "simple"
29
+ logging_enabled: true
@@ -0,0 +1,51 @@
1
+ # MassGen Configuration: Text to Image Generation Tool
2
+ # Usage:
3
+ # uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml "Please generate an image of a cat in space."
4
+ agents:
5
+ - id: "text_to_image_generation_tool1"
6
+ backend:
7
+ type: "openai"
8
+ model: "gpt-4o"
9
+ cwd: "workspace1"
10
+ enable_image_generation: true
11
+ custom_tools:
12
+ - name: ["text_to_image_generation"]
13
+ category: "multimodal"
14
+ path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
15
+ function: ["text_to_image_generation"]
16
+ - name: ["understand_image"]
17
+ category: "multimodal"
18
+ path: "massgen/tool/_multimodal_tools/understand_image.py"
19
+ function: ["understand_image"]
20
+ - name: ["image_to_image_generation"]
21
+ category: "multimodal"
22
+ path: "massgen/tool/_multimodal_tools/image_to_image_generation.py"
23
+ function: ["image_to_image_generation"]
24
+ system_message: |
25
+ You are an AI assistant with access to text-to-image generation capabilities.
26
+
27
+ - id: "text_to_image_generation_tool2"
28
+ backend:
29
+ type: "openai"
30
+ model: "gpt-4o"
31
+ cwd: "workspace2"
32
+ enable_image_generation: true
33
+ custom_tools:
34
+ - name: ["text_to_image_generation"]
35
+ category: "multimodal"
36
+ path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
37
+ function: ["text_to_image_generation"]
38
+ - name: ["understand_image"]
39
+ category: "multimodal"
40
+ path: "massgen/tool/_multimodal_tools/understand_image.py"
41
+ function: ["understand_image"]
42
+ system_message: |
43
+ You are an AI assistant with access to text-to-image generation capabilities.
44
+
45
+ orchestrator:
46
+ snapshot_storage: "snapshots"
47
+ agent_temporary_workspace: "temp_workspaces"
48
+
49
+ ui:
50
+ display_type: "rich_terminal"
51
+ logging_enabled: true
@@ -0,0 +1,33 @@
1
+ # MassGen Configuration: Text to Image Generation Tool
2
+ # Usage:
3
+ # uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml "Please generate an image of a cat in space."
4
+ agents:
5
+ - id: "text_to_image_generation_tool"
6
+ backend:
7
+ type: "openai"
8
+ model: "gpt-4o"
9
+ cwd: "workspace1"
10
+ enable_image_generation: true
11
+ custom_tools:
12
+ - name: ["text_to_image_generation"]
13
+ category: "multimodal"
14
+ path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
15
+ function: ["text_to_image_generation"]
16
+ - name: ["understand_image"]
17
+ category: "multimodal"
18
+ path: "massgen/tool/_multimodal_tools/understand_image.py"
19
+ function: ["understand_image"]
20
+ - name: ["image_to_image_generation"]
21
+ category: "multimodal"
22
+ path: "massgen/tool/_multimodal_tools/image_to_image_generation.py"
23
+ function: ["image_to_image_generation"]
24
+ system_message: |
25
+ You are an AI assistant with access to text-to-image generation capabilities.
26
+
27
+ orchestrator:
28
+ snapshot_storage: "snapshots"
29
+ agent_temporary_workspace: "temp_workspaces"
30
+
31
+ ui:
32
+ display_type: "simple"
33
+ logging_enabled: true
@@ -0,0 +1,55 @@
1
+ # MassGen Configuration: Text to Speech Continue Generation Tool
2
+ # Usage:
3
+ # uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml "I want to you tell me a very short introduction about Sherlock Homes in one sentence, and I want you to use emotion voice to read it out loud."
4
+ agents:
5
+ - id: "text_to_speech_continue_generation_tool1"
6
+ backend:
7
+ type: "openai"
8
+ model: "gpt-4o"
9
+ cwd: "workspace1"
10
+ enable_audio_generation: true
11
+ custom_tools:
12
+ - name: ["text_to_speech_transcription_generation"]
13
+ category: "multimodal"
14
+ path: "massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py"
15
+ function: ["text_to_speech_transcription_generation"]
16
+ - name: ["understand_audio"]
17
+ category: "multimodal"
18
+ path: "massgen/tool/_multimodal_tools/understand_audio.py"
19
+ function: ["understand_audio"]
20
+ - name: ["text_to_speech_continue_generation"]
21
+ category: "multimodal"
22
+ path: "massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py"
23
+ function: ["text_to_speech_continue_generation"]
24
+ system_message: |
25
+ You are an AI assistant with access to text-to-speech generation capabilities.
26
+
27
+ - id: "text_to_speech_continue_generation_tool2"
28
+ backend:
29
+ type: "openai"
30
+ model: "gpt-4o"
31
+ cwd: "workspace2"
32
+ enable_audio_generation: true
33
+ custom_tools:
34
+ - name: ["text_to_speech_transcription_generation"]
35
+ category: "multimodal"
36
+ path: "massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py"
37
+ function: ["text_to_speech_transcription_generation"]
38
+ - name: ["understand_audio"]
39
+ category: "multimodal"
40
+ path: "massgen/tool/_multimodal_tools/understand_audio.py"
41
+ function: ["understand_audio"]
42
+ - name: ["text_to_speech_continue_generation"]
43
+ category: "multimodal"
44
+ path: "massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py"
45
+ function: ["text_to_speech_continue_generation"]
46
+ system_message: |
47
+ You are an AI assistant with access to text-to-speech generation capabilities.
48
+
49
+ orchestrator:
50
+ snapshot_storage: "snapshots"
51
+ agent_temporary_workspace: "temp_workspaces"
52
+
53
+ ui:
54
+ display_type: "rich_terminal"
55
+ logging_enabled: true